diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,53458 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 889485, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999460361894804e-05, + "loss": 5.56, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.998898238868559e-05, + "loss": 4.9623, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9983361158423133e-05, + "loss": 4.7587, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 4.997773992816068e-05, + "loss": 4.6227, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 4.9972118697898226e-05, + "loss": 4.4943, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.996649746763577e-05, + "loss": 4.4251, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 4.996087623737332e-05, + "loss": 4.4039, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 4.995525500711086e-05, + "loss": 4.3345, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9949633776848404e-05, + "loss": 4.2333, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 4.994401254658595e-05, + "loss": 4.1646, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 4.993839131632349e-05, + "loss": 4.0956, + "step": 1100 + }, + { + "epoch": 0.01, + "learning_rate": 4.9932770086061036e-05, + "loss": 4.0783, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 4.992714885579858e-05, + "loss": 4.0371, + "step": 1300 + }, + { + "epoch": 0.01, + "learning_rate": 4.992152762553613e-05, + "loss": 3.9386, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9915962607576297e-05, + "loss": 3.8893, + "step": 1500 + }, + { + "epoch": 0.01, + "learning_rate": 4.991034137731384e-05, + "loss": 3.8706, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 4.990472014705139e-05, + "loss": 3.8605, + "step": 1700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9899098916788935e-05, + "loss": 3.8019, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9893477686526475e-05, + "loss": 3.7531, + "step": 1900 + }, + { + "epoch": 0.01, + "learning_rate": 4.988785645626402e-05, + "loss": 3.7156, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 4.988223522600157e-05, + "loss": 3.7135, + "step": 2100 + }, + { + "epoch": 0.01, + "learning_rate": 4.987661399573911e-05, + "loss": 3.6643, + "step": 2200 + }, + { + "epoch": 0.01, + "learning_rate": 4.987099276547665e-05, + "loss": 3.5797, + "step": 2300 + }, + { + "epoch": 0.01, + "learning_rate": 4.98653715352142e-05, + "loss": 3.5802, + "step": 2400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9859750304951746e-05, + "loss": 3.5381, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9854129074689285e-05, + "loss": 3.5357, + "step": 2600 + }, + { + "epoch": 0.02, + "learning_rate": 4.984850784442683e-05, + "loss": 3.4414, + "step": 2700 + }, + { + "epoch": 0.02, + "learning_rate": 4.984288661416438e-05, + "loss": 3.4507, + "step": 2800 + }, + { + "epoch": 0.02, + "learning_rate": 4.9837265383901924e-05, + "loss": 3.3674, + "step": 2900 + }, + { + "epoch": 0.02, + "learning_rate": 4.983164415363947e-05, + "loss": 3.3073, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 4.982602292337702e-05, + "loss": 3.3416, + "step": 3100 + }, + { + "epoch": 0.02, + "learning_rate": 4.982040169311456e-05, + "loss": 3.267, + "step": 3200 + }, + { + "epoch": 0.02, + "learning_rate": 4.98147804628521e-05, + "loss": 3.2396, + "step": 3300 + }, + { + "epoch": 0.02, + "learning_rate": 4.980915923258965e-05, + "loss": 3.2016, + "step": 3400 + }, + { + "epoch": 0.02, + "learning_rate": 4.9803538002327195e-05, + "loss": 3.1567, + "step": 3500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9797916772064734e-05, + "loss": 3.1444, + "step": 3600 + }, + { + "epoch": 0.02, + "learning_rate": 4.979229554180228e-05, + "loss": 3.097, + "step": 3700 + }, + { + "epoch": 0.02, + "learning_rate": 4.978667431153983e-05, + "loss": 3.0428, + "step": 3800 + }, + { + "epoch": 0.02, + "learning_rate": 4.978105308127737e-05, + "loss": 3.0852, + "step": 3900 + }, + { + "epoch": 0.02, + "learning_rate": 4.977543185101491e-05, + "loss": 2.9581, + "step": 4000 + }, + { + "epoch": 0.02, + "learning_rate": 4.976981062075246e-05, + "loss": 2.9466, + "step": 4100 + }, + { + "epoch": 0.02, + "learning_rate": 4.9764189390490005e-05, + "loss": 2.958, + "step": 4200 + }, + { + "epoch": 0.02, + "learning_rate": 4.9758568160227545e-05, + "loss": 2.9198, + "step": 4300 + }, + { + "epoch": 0.02, + "learning_rate": 4.975294692996509e-05, + "loss": 2.9059, + "step": 4400 + }, + { + "epoch": 0.03, + "learning_rate": 4.974732569970264e-05, + "loss": 2.8533, + "step": 4500 + }, + { + "epoch": 0.03, + "learning_rate": 4.9741704469440184e-05, + "loss": 2.8261, + "step": 4600 + }, + { + "epoch": 0.03, + "learning_rate": 4.973608323917773e-05, + "loss": 2.7826, + "step": 4700 + }, + { + "epoch": 0.03, + "learning_rate": 4.9730462008915276e-05, + "loss": 2.7884, + "step": 4800 + }, + { + "epoch": 0.03, + "learning_rate": 4.972484077865282e-05, + "loss": 2.7279, + "step": 4900 + }, + { + "epoch": 0.03, + "learning_rate": 4.971921954839036e-05, + "loss": 2.6892, + "step": 5000 + }, + { + "epoch": 0.03, + "learning_rate": 4.971359831812791e-05, + "loss": 2.7282, + "step": 5100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9707977087865455e-05, + "loss": 2.7126, + "step": 5200 + }, + { + "epoch": 0.03, + "learning_rate": 4.9702355857602994e-05, + "loss": 2.6314, + "step": 5300 + }, + { + "epoch": 0.03, + "learning_rate": 4.969673462734054e-05, + "loss": 2.6234, + "step": 5400 + }, + { + "epoch": 0.03, + "learning_rate": 4.969111339707809e-05, + "loss": 2.6051, + "step": 5500 + }, + { + "epoch": 0.03, + "learning_rate": 4.968549216681563e-05, + "loss": 2.6092, + "step": 5600 + }, + { + "epoch": 0.03, + "learning_rate": 4.967987093655317e-05, + "loss": 2.5187, + "step": 5700 + }, + { + "epoch": 0.03, + "learning_rate": 4.967424970629072e-05, + "loss": 2.5295, + "step": 5800 + }, + { + "epoch": 0.03, + "learning_rate": 4.9668628476028265e-05, + "loss": 2.4636, + "step": 5900 + }, + { + "epoch": 0.03, + "learning_rate": 4.966300724576581e-05, + "loss": 2.4679, + "step": 6000 + }, + { + "epoch": 0.03, + "learning_rate": 4.965738601550336e-05, + "loss": 2.4281, + "step": 6100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9651764785240904e-05, + "loss": 2.4284, + "step": 6200 + }, + { + "epoch": 0.04, + "learning_rate": 4.964614355497845e-05, + "loss": 2.3777, + "step": 6300 + }, + { + "epoch": 0.04, + "learning_rate": 4.964052232471599e-05, + "loss": 2.4207, + "step": 6400 + }, + { + "epoch": 0.04, + "learning_rate": 4.9634901094453536e-05, + "loss": 2.3493, + "step": 6500 + }, + { + "epoch": 0.04, + "learning_rate": 4.962927986419108e-05, + "loss": 2.3188, + "step": 6600 + }, + { + "epoch": 0.04, + "learning_rate": 4.962365863392862e-05, + "loss": 2.3228, + "step": 6700 + }, + { + "epoch": 0.04, + "learning_rate": 4.961803740366617e-05, + "loss": 2.2783, + "step": 6800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9612416173403714e-05, + "loss": 2.2501, + "step": 6900 + }, + { + "epoch": 0.04, + "learning_rate": 4.960679494314126e-05, + "loss": 2.2354, + "step": 7000 + }, + { + "epoch": 0.04, + "learning_rate": 4.96011737128788e-05, + "loss": 2.228, + "step": 7100 + }, + { + "epoch": 0.04, + "learning_rate": 4.9595552482616346e-05, + "loss": 2.1995, + "step": 7200 + }, + { + "epoch": 0.04, + "learning_rate": 4.958993125235389e-05, + "loss": 2.1941, + "step": 7300 + }, + { + "epoch": 0.04, + "learning_rate": 4.958431002209143e-05, + "loss": 2.1656, + "step": 7400 + }, + { + "epoch": 0.04, + "learning_rate": 4.957868879182898e-05, + "loss": 2.1499, + "step": 7500 + }, + { + "epoch": 0.04, + "learning_rate": 4.9573067561566525e-05, + "loss": 2.1266, + "step": 7600 + }, + { + "epoch": 0.04, + "learning_rate": 4.956744633130407e-05, + "loss": 2.1084, + "step": 7700 + }, + { + "epoch": 0.04, + "learning_rate": 4.956182510104162e-05, + "loss": 2.09, + "step": 7800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9556203870779163e-05, + "loss": 2.0773, + "step": 7900 + }, + { + "epoch": 0.04, + "learning_rate": 4.955058264051671e-05, + "loss": 2.0747, + "step": 8000 + }, + { + "epoch": 0.05, + "learning_rate": 4.954496141025425e-05, + "loss": 2.0388, + "step": 8100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9539340179991795e-05, + "loss": 2.0333, + "step": 8200 + }, + { + "epoch": 0.05, + "learning_rate": 4.953371894972934e-05, + "loss": 2.0312, + "step": 8300 + }, + { + "epoch": 0.05, + "learning_rate": 4.952809771946689e-05, + "loss": 1.9971, + "step": 8400 + }, + { + "epoch": 0.05, + "learning_rate": 4.952247648920443e-05, + "loss": 2.0001, + "step": 8500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9516855258941974e-05, + "loss": 1.9648, + "step": 8600 + }, + { + "epoch": 0.05, + "learning_rate": 4.951123402867952e-05, + "loss": 1.951, + "step": 8700 + }, + { + "epoch": 0.05, + "learning_rate": 4.950561279841706e-05, + "loss": 1.923, + "step": 8800 + }, + { + "epoch": 0.05, + "learning_rate": 4.9499991568154606e-05, + "loss": 1.9315, + "step": 8900 + }, + { + "epoch": 0.05, + "learning_rate": 4.949437033789215e-05, + "loss": 1.8978, + "step": 9000 + }, + { + "epoch": 0.05, + "learning_rate": 4.94887491076297e-05, + "loss": 1.8886, + "step": 9100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9483127877367245e-05, + "loss": 1.8667, + "step": 9200 + }, + { + "epoch": 0.05, + "learning_rate": 4.947750664710479e-05, + "loss": 1.8254, + "step": 9300 + }, + { + "epoch": 0.05, + "learning_rate": 4.947188541684234e-05, + "loss": 1.8372, + "step": 9400 + }, + { + "epoch": 0.05, + "learning_rate": 4.946626418657988e-05, + "loss": 1.8009, + "step": 9500 + }, + { + "epoch": 0.05, + "learning_rate": 4.946064295631742e-05, + "loss": 1.8007, + "step": 9600 + }, + { + "epoch": 0.05, + "learning_rate": 4.945502172605497e-05, + "loss": 1.7914, + "step": 9700 + }, + { + "epoch": 0.06, + "learning_rate": 4.944945670809514e-05, + "loss": 1.7598, + "step": 9800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9443835477832676e-05, + "loss": 1.7665, + "step": 9900 + }, + { + "epoch": 0.06, + "learning_rate": 4.943821424757022e-05, + "loss": 1.7547, + "step": 10000 + }, + { + "epoch": 0.06, + "learning_rate": 4.943259301730777e-05, + "loss": 1.7238, + "step": 10100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9426971787045315e-05, + "loss": 1.7135, + "step": 10200 + }, + { + "epoch": 0.06, + "learning_rate": 4.942135055678286e-05, + "loss": 1.7042, + "step": 10300 + }, + { + "epoch": 0.06, + "learning_rate": 4.941572932652041e-05, + "loss": 1.694, + "step": 10400 + }, + { + "epoch": 0.06, + "learning_rate": 4.9410108096257954e-05, + "loss": 1.6789, + "step": 10500 + }, + { + "epoch": 0.06, + "learning_rate": 4.9404486865995494e-05, + "loss": 1.6663, + "step": 10600 + }, + { + "epoch": 0.06, + "learning_rate": 4.939886563573304e-05, + "loss": 1.6806, + "step": 10700 + }, + { + "epoch": 0.06, + "learning_rate": 4.9393244405470586e-05, + "loss": 1.662, + "step": 10800 + }, + { + "epoch": 0.06, + "learning_rate": 4.938762317520813e-05, + "loss": 1.6356, + "step": 10900 + }, + { + "epoch": 0.06, + "learning_rate": 4.938200194494567e-05, + "loss": 1.6223, + "step": 11000 + }, + { + "epoch": 0.06, + "learning_rate": 4.937638071468322e-05, + "loss": 1.625, + "step": 11100 + }, + { + "epoch": 0.06, + "learning_rate": 4.9370759484420764e-05, + "loss": 1.6213, + "step": 11200 + }, + { + "epoch": 0.06, + "learning_rate": 4.9365138254158304e-05, + "loss": 1.6011, + "step": 11300 + }, + { + "epoch": 0.06, + "learning_rate": 4.935951702389585e-05, + "loss": 1.584, + "step": 11400 + }, + { + "epoch": 0.06, + "learning_rate": 4.9353895793633397e-05, + "loss": 1.5671, + "step": 11500 + }, + { + "epoch": 0.07, + "learning_rate": 4.934827456337094e-05, + "loss": 1.5545, + "step": 11600 + }, + { + "epoch": 0.07, + "learning_rate": 4.934265333310848e-05, + "loss": 1.5518, + "step": 11700 + }, + { + "epoch": 0.07, + "learning_rate": 4.933703210284603e-05, + "loss": 1.5334, + "step": 11800 + }, + { + "epoch": 0.07, + "learning_rate": 4.9331410872583575e-05, + "loss": 1.5476, + "step": 11900 + }, + { + "epoch": 0.07, + "learning_rate": 4.932578964232112e-05, + "loss": 1.5173, + "step": 12000 + }, + { + "epoch": 0.07, + "learning_rate": 4.932016841205867e-05, + "loss": 1.4909, + "step": 12100 + }, + { + "epoch": 0.07, + "learning_rate": 4.9314547181796214e-05, + "loss": 1.4898, + "step": 12200 + }, + { + "epoch": 0.07, + "learning_rate": 4.930892595153376e-05, + "loss": 1.4866, + "step": 12300 + }, + { + "epoch": 0.07, + "learning_rate": 4.93033047212713e-05, + "loss": 1.4586, + "step": 12400 + }, + { + "epoch": 0.07, + "learning_rate": 4.9297683491008846e-05, + "loss": 1.4521, + "step": 12500 + }, + { + "epoch": 0.07, + "learning_rate": 4.929206226074639e-05, + "loss": 1.444, + "step": 12600 + }, + { + "epoch": 0.07, + "learning_rate": 4.928649724278656e-05, + "loss": 1.4528, + "step": 12700 + }, + { + "epoch": 0.07, + "learning_rate": 4.92808760125241e-05, + "loss": 1.4409, + "step": 12800 + }, + { + "epoch": 0.07, + "learning_rate": 4.9275254782261645e-05, + "loss": 1.4285, + "step": 12900 + }, + { + "epoch": 0.07, + "learning_rate": 4.926963355199919e-05, + "loss": 1.4242, + "step": 13000 + }, + { + "epoch": 0.07, + "learning_rate": 4.926401232173674e-05, + "loss": 1.3913, + "step": 13100 + }, + { + "epoch": 0.07, + "learning_rate": 4.9258391091474284e-05, + "loss": 1.3986, + "step": 13200 + }, + { + "epoch": 0.07, + "learning_rate": 4.925276986121183e-05, + "loss": 1.3921, + "step": 13300 + }, + { + "epoch": 0.08, + "learning_rate": 4.924714863094938e-05, + "loss": 1.3849, + "step": 13400 + }, + { + "epoch": 0.08, + "learning_rate": 4.9241527400686916e-05, + "loss": 1.3758, + "step": 13500 + }, + { + "epoch": 0.08, + "learning_rate": 4.923590617042446e-05, + "loss": 1.3645, + "step": 13600 + }, + { + "epoch": 0.08, + "learning_rate": 4.923028494016201e-05, + "loss": 1.3521, + "step": 13700 + }, + { + "epoch": 0.08, + "learning_rate": 4.922466370989955e-05, + "loss": 1.3568, + "step": 13800 + }, + { + "epoch": 0.08, + "learning_rate": 4.9219042479637095e-05, + "loss": 1.3397, + "step": 13900 + }, + { + "epoch": 0.08, + "learning_rate": 4.921342124937464e-05, + "loss": 1.3331, + "step": 14000 + }, + { + "epoch": 0.08, + "learning_rate": 4.920780001911219e-05, + "loss": 1.314, + "step": 14100 + }, + { + "epoch": 0.08, + "learning_rate": 4.920217878884973e-05, + "loss": 1.2971, + "step": 14200 + }, + { + "epoch": 0.08, + "learning_rate": 4.919655755858727e-05, + "loss": 1.3133, + "step": 14300 + }, + { + "epoch": 0.08, + "learning_rate": 4.919093632832482e-05, + "loss": 1.284, + "step": 14400 + }, + { + "epoch": 0.08, + "learning_rate": 4.9185315098062366e-05, + "loss": 1.2809, + "step": 14500 + }, + { + "epoch": 0.08, + "learning_rate": 4.917969386779991e-05, + "loss": 1.2683, + "step": 14600 + }, + { + "epoch": 0.08, + "learning_rate": 4.917407263753746e-05, + "loss": 1.2821, + "step": 14700 + }, + { + "epoch": 0.08, + "learning_rate": 4.9168451407275004e-05, + "loss": 1.2753, + "step": 14800 + }, + { + "epoch": 0.08, + "learning_rate": 4.9162830177012544e-05, + "loss": 1.275, + "step": 14900 + }, + { + "epoch": 0.08, + "learning_rate": 4.915720894675009e-05, + "loss": 1.2367, + "step": 15000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9151587716487636e-05, + "loss": 1.2333, + "step": 15100 + }, + { + "epoch": 0.09, + "learning_rate": 4.9145966486225176e-05, + "loss": 1.2463, + "step": 15200 + }, + { + "epoch": 0.09, + "learning_rate": 4.914034525596272e-05, + "loss": 1.2498, + "step": 15300 + }, + { + "epoch": 0.09, + "learning_rate": 4.913472402570027e-05, + "loss": 1.2156, + "step": 15400 + }, + { + "epoch": 0.09, + "learning_rate": 4.912910279543781e-05, + "loss": 1.2191, + "step": 15500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9123481565175354e-05, + "loss": 1.2084, + "step": 15600 + }, + { + "epoch": 0.09, + "learning_rate": 4.91178603349129e-05, + "loss": 1.2092, + "step": 15700 + }, + { + "epoch": 0.09, + "learning_rate": 4.911223910465045e-05, + "loss": 1.2076, + "step": 15800 + }, + { + "epoch": 0.09, + "learning_rate": 4.9106617874387986e-05, + "loss": 1.2094, + "step": 15900 + }, + { + "epoch": 0.09, + "learning_rate": 4.910099664412553e-05, + "loss": 1.1816, + "step": 16000 + }, + { + "epoch": 0.09, + "learning_rate": 4.909537541386308e-05, + "loss": 1.1782, + "step": 16100 + }, + { + "epoch": 0.09, + "learning_rate": 4.908981039590325e-05, + "loss": 1.1858, + "step": 16200 + }, + { + "epoch": 0.09, + "learning_rate": 4.908418916564079e-05, + "loss": 1.1676, + "step": 16300 + }, + { + "epoch": 0.09, + "learning_rate": 4.907856793537834e-05, + "loss": 1.1634, + "step": 16400 + }, + { + "epoch": 0.09, + "learning_rate": 4.9072946705115885e-05, + "loss": 1.1367, + "step": 16500 + }, + { + "epoch": 0.09, + "learning_rate": 4.9067325474853425e-05, + "loss": 1.1347, + "step": 16600 + }, + { + "epoch": 0.09, + "learning_rate": 4.906170424459097e-05, + "loss": 1.1344, + "step": 16700 + }, + { + "epoch": 0.09, + "learning_rate": 4.905608301432852e-05, + "loss": 1.1463, + "step": 16800 + }, + { + "epoch": 0.09, + "learning_rate": 4.9050461784066064e-05, + "loss": 1.1195, + "step": 16900 + }, + { + "epoch": 0.1, + "learning_rate": 4.90448405538036e-05, + "loss": 1.1326, + "step": 17000 + }, + { + "epoch": 0.1, + "learning_rate": 4.903921932354115e-05, + "loss": 1.1155, + "step": 17100 + }, + { + "epoch": 0.1, + "learning_rate": 4.9033598093278696e-05, + "loss": 1.1083, + "step": 17200 + }, + { + "epoch": 0.1, + "learning_rate": 4.902797686301624e-05, + "loss": 1.0998, + "step": 17300 + }, + { + "epoch": 0.1, + "learning_rate": 4.902235563275379e-05, + "loss": 1.0986, + "step": 17400 + }, + { + "epoch": 0.1, + "learning_rate": 4.9016734402491335e-05, + "loss": 1.0992, + "step": 17500 + }, + { + "epoch": 0.1, + "learning_rate": 4.901111317222888e-05, + "loss": 1.0889, + "step": 17600 + }, + { + "epoch": 0.1, + "learning_rate": 4.900549194196642e-05, + "loss": 1.0869, + "step": 17700 + }, + { + "epoch": 0.1, + "learning_rate": 4.8999870711703967e-05, + "loss": 1.0851, + "step": 17800 + }, + { + "epoch": 0.1, + "learning_rate": 4.899424948144151e-05, + "loss": 1.0744, + "step": 17900 + }, + { + "epoch": 0.1, + "learning_rate": 4.898862825117905e-05, + "loss": 1.0647, + "step": 18000 + }, + { + "epoch": 0.1, + "learning_rate": 4.89830070209166e-05, + "loss": 1.0615, + "step": 18100 + }, + { + "epoch": 0.1, + "learning_rate": 4.8977385790654145e-05, + "loss": 1.0458, + "step": 18200 + }, + { + "epoch": 0.1, + "learning_rate": 4.897176456039169e-05, + "loss": 1.0597, + "step": 18300 + }, + { + "epoch": 0.1, + "learning_rate": 4.896614333012923e-05, + "loss": 1.0508, + "step": 18400 + }, + { + "epoch": 0.1, + "learning_rate": 4.896052209986678e-05, + "loss": 1.0611, + "step": 18500 + }, + { + "epoch": 0.1, + "learning_rate": 4.895490086960432e-05, + "loss": 1.0327, + "step": 18600 + }, + { + "epoch": 0.11, + "learning_rate": 4.894927963934187e-05, + "loss": 1.048, + "step": 18700 + }, + { + "epoch": 0.11, + "learning_rate": 4.8943658409079416e-05, + "loss": 1.0348, + "step": 18800 + }, + { + "epoch": 0.11, + "learning_rate": 4.893803717881696e-05, + "loss": 1.0399, + "step": 18900 + }, + { + "epoch": 0.11, + "learning_rate": 4.893241594855451e-05, + "loss": 1.0029, + "step": 19000 + }, + { + "epoch": 0.11, + "learning_rate": 4.892679471829205e-05, + "loss": 1.0157, + "step": 19100 + }, + { + "epoch": 0.11, + "learning_rate": 4.8921173488029594e-05, + "loss": 1.0069, + "step": 19200 + }, + { + "epoch": 0.11, + "learning_rate": 4.891555225776714e-05, + "loss": 1.0066, + "step": 19300 + }, + { + "epoch": 0.11, + "learning_rate": 4.890993102750468e-05, + "loss": 1.0058, + "step": 19400 + }, + { + "epoch": 0.11, + "learning_rate": 4.8904309797242226e-05, + "loss": 0.9922, + "step": 19500 + }, + { + "epoch": 0.11, + "learning_rate": 4.889868856697977e-05, + "loss": 0.9727, + "step": 19600 + }, + { + "epoch": 0.11, + "learning_rate": 4.889306733671732e-05, + "loss": 0.9964, + "step": 19700 + }, + { + "epoch": 0.11, + "learning_rate": 4.888744610645486e-05, + "loss": 0.9842, + "step": 19800 + }, + { + "epoch": 0.11, + "learning_rate": 4.8881824876192405e-05, + "loss": 0.9823, + "step": 19900 + }, + { + "epoch": 0.11, + "learning_rate": 4.887620364592995e-05, + "loss": 0.9619, + "step": 20000 + }, + { + "epoch": 0.11, + "learning_rate": 4.887058241566749e-05, + "loss": 0.9757, + "step": 20100 + }, + { + "epoch": 0.11, + "learning_rate": 4.8864961185405037e-05, + "loss": 0.9651, + "step": 20200 + }, + { + "epoch": 0.11, + "learning_rate": 4.885933995514258e-05, + "loss": 0.9723, + "step": 20300 + }, + { + "epoch": 0.11, + "learning_rate": 4.885371872488013e-05, + "loss": 0.9634, + "step": 20400 + }, + { + "epoch": 0.12, + "learning_rate": 4.8848097494617675e-05, + "loss": 0.9711, + "step": 20500 + }, + { + "epoch": 0.12, + "learning_rate": 4.884247626435522e-05, + "loss": 0.9605, + "step": 20600 + }, + { + "epoch": 0.12, + "learning_rate": 4.883691124639539e-05, + "loss": 0.9569, + "step": 20700 + }, + { + "epoch": 0.12, + "learning_rate": 4.8831290016132936e-05, + "loss": 0.9378, + "step": 20800 + }, + { + "epoch": 0.12, + "learning_rate": 4.8825668785870475e-05, + "loss": 0.9427, + "step": 20900 + }, + { + "epoch": 0.12, + "learning_rate": 4.882004755560802e-05, + "loss": 0.947, + "step": 21000 + }, + { + "epoch": 0.12, + "learning_rate": 4.881442632534557e-05, + "loss": 0.9455, + "step": 21100 + }, + { + "epoch": 0.12, + "learning_rate": 4.880880509508311e-05, + "loss": 0.9236, + "step": 21200 + }, + { + "epoch": 0.12, + "learning_rate": 4.8803183864820653e-05, + "loss": 0.9209, + "step": 21300 + }, + { + "epoch": 0.12, + "learning_rate": 4.87975626345582e-05, + "loss": 0.9187, + "step": 21400 + }, + { + "epoch": 0.12, + "learning_rate": 4.8791941404295746e-05, + "loss": 0.9161, + "step": 21500 + }, + { + "epoch": 0.12, + "learning_rate": 4.878632017403329e-05, + "loss": 0.9342, + "step": 21600 + }, + { + "epoch": 0.12, + "learning_rate": 4.878069894377084e-05, + "loss": 0.9072, + "step": 21700 + }, + { + "epoch": 0.12, + "learning_rate": 4.8775077713508385e-05, + "loss": 0.9067, + "step": 21800 + }, + { + "epoch": 0.12, + "learning_rate": 4.8769456483245924e-05, + "loss": 0.9052, + "step": 21900 + }, + { + "epoch": 0.12, + "learning_rate": 4.876383525298347e-05, + "loss": 0.8974, + "step": 22000 + }, + { + "epoch": 0.12, + "learning_rate": 4.875821402272102e-05, + "loss": 0.9138, + "step": 22100 + }, + { + "epoch": 0.12, + "learning_rate": 4.875259279245856e-05, + "loss": 0.8994, + "step": 22200 + }, + { + "epoch": 0.13, + "learning_rate": 4.87469715621961e-05, + "loss": 0.8976, + "step": 22300 + }, + { + "epoch": 0.13, + "learning_rate": 4.874135033193365e-05, + "loss": 0.8928, + "step": 22400 + }, + { + "epoch": 0.13, + "learning_rate": 4.8735729101671195e-05, + "loss": 0.8826, + "step": 22500 + }, + { + "epoch": 0.13, + "learning_rate": 4.8730107871408735e-05, + "loss": 0.8848, + "step": 22600 + }, + { + "epoch": 0.13, + "learning_rate": 4.872448664114628e-05, + "loss": 0.8941, + "step": 22700 + }, + { + "epoch": 0.13, + "learning_rate": 4.871886541088383e-05, + "loss": 0.8812, + "step": 22800 + }, + { + "epoch": 0.13, + "learning_rate": 4.8713244180621374e-05, + "loss": 0.879, + "step": 22900 + }, + { + "epoch": 0.13, + "learning_rate": 4.870762295035892e-05, + "loss": 0.8627, + "step": 23000 + }, + { + "epoch": 0.13, + "learning_rate": 4.8702001720096466e-05, + "loss": 0.8801, + "step": 23100 + }, + { + "epoch": 0.13, + "learning_rate": 4.869638048983401e-05, + "loss": 0.8576, + "step": 23200 + }, + { + "epoch": 0.13, + "learning_rate": 4.869075925957155e-05, + "loss": 0.8827, + "step": 23300 + }, + { + "epoch": 0.13, + "learning_rate": 4.86851380293091e-05, + "loss": 0.8744, + "step": 23400 + }, + { + "epoch": 0.13, + "learning_rate": 4.8679516799046644e-05, + "loss": 0.8578, + "step": 23500 + }, + { + "epoch": 0.13, + "learning_rate": 4.867389556878419e-05, + "loss": 0.8575, + "step": 23600 + }, + { + "epoch": 0.13, + "learning_rate": 4.866827433852173e-05, + "loss": 0.8612, + "step": 23700 + }, + { + "epoch": 0.13, + "learning_rate": 4.8662653108259276e-05, + "loss": 0.8579, + "step": 23800 + }, + { + "epoch": 0.13, + "learning_rate": 4.865703187799682e-05, + "loss": 0.8384, + "step": 23900 + }, + { + "epoch": 0.13, + "learning_rate": 4.865141064773436e-05, + "loss": 0.8358, + "step": 24000 + }, + { + "epoch": 0.14, + "learning_rate": 4.864578941747191e-05, + "loss": 0.8398, + "step": 24100 + }, + { + "epoch": 0.14, + "learning_rate": 4.8640168187209455e-05, + "loss": 0.8357, + "step": 24200 + }, + { + "epoch": 0.14, + "learning_rate": 4.8634546956947e-05, + "loss": 0.83, + "step": 24300 + }, + { + "epoch": 0.14, + "learning_rate": 4.862892572668454e-05, + "loss": 0.8276, + "step": 24400 + }, + { + "epoch": 0.14, + "learning_rate": 4.862330449642209e-05, + "loss": 0.8418, + "step": 24500 + }, + { + "epoch": 0.14, + "learning_rate": 4.861768326615963e-05, + "loss": 0.8304, + "step": 24600 + }, + { + "epoch": 0.14, + "learning_rate": 4.861206203589718e-05, + "loss": 0.8269, + "step": 24700 + }, + { + "epoch": 0.14, + "learning_rate": 4.8606440805634726e-05, + "loss": 0.8117, + "step": 24800 + }, + { + "epoch": 0.14, + "learning_rate": 4.860081957537227e-05, + "loss": 0.8232, + "step": 24900 + }, + { + "epoch": 0.14, + "learning_rate": 4.859525455741244e-05, + "loss": 0.8096, + "step": 25000 + }, + { + "epoch": 0.14, + "learning_rate": 4.858963332714998e-05, + "loss": 0.8123, + "step": 25100 + }, + { + "epoch": 0.14, + "learning_rate": 4.8584012096887525e-05, + "loss": 0.8072, + "step": 25200 + }, + { + "epoch": 0.14, + "learning_rate": 4.857839086662507e-05, + "loss": 0.8109, + "step": 25300 + }, + { + "epoch": 0.14, + "learning_rate": 4.857276963636261e-05, + "loss": 0.8017, + "step": 25400 + }, + { + "epoch": 0.14, + "learning_rate": 4.856714840610016e-05, + "loss": 0.7964, + "step": 25500 + }, + { + "epoch": 0.14, + "learning_rate": 4.8561527175837704e-05, + "loss": 0.8116, + "step": 25600 + }, + { + "epoch": 0.14, + "learning_rate": 4.855590594557525e-05, + "loss": 0.8038, + "step": 25700 + }, + { + "epoch": 0.15, + "learning_rate": 4.8550284715312796e-05, + "loss": 0.7844, + "step": 25800 + }, + { + "epoch": 0.15, + "learning_rate": 4.854466348505034e-05, + "loss": 0.8023, + "step": 25900 + }, + { + "epoch": 0.15, + "learning_rate": 4.853904225478789e-05, + "loss": 0.7929, + "step": 26000 + }, + { + "epoch": 0.15, + "learning_rate": 4.853342102452543e-05, + "loss": 0.7934, + "step": 26100 + }, + { + "epoch": 0.15, + "learning_rate": 4.8527799794262975e-05, + "loss": 0.7867, + "step": 26200 + }, + { + "epoch": 0.15, + "learning_rate": 4.852217856400052e-05, + "loss": 0.774, + "step": 26300 + }, + { + "epoch": 0.15, + "learning_rate": 4.851655733373807e-05, + "loss": 0.7856, + "step": 26400 + }, + { + "epoch": 0.15, + "learning_rate": 4.851093610347561e-05, + "loss": 0.7868, + "step": 26500 + }, + { + "epoch": 0.15, + "learning_rate": 4.850531487321315e-05, + "loss": 0.7751, + "step": 26600 + }, + { + "epoch": 0.15, + "learning_rate": 4.84996936429507e-05, + "loss": 0.7983, + "step": 26700 + }, + { + "epoch": 0.15, + "learning_rate": 4.849407241268824e-05, + "loss": 0.773, + "step": 26800 + }, + { + "epoch": 0.15, + "learning_rate": 4.8488451182425785e-05, + "loss": 0.7706, + "step": 26900 + }, + { + "epoch": 0.15, + "learning_rate": 4.848288616446596e-05, + "loss": 0.765, + "step": 27000 + }, + { + "epoch": 0.15, + "learning_rate": 4.8477264934203506e-05, + "loss": 0.7641, + "step": 27100 + }, + { + "epoch": 0.15, + "learning_rate": 4.8471643703941045e-05, + "loss": 0.7598, + "step": 27200 + }, + { + "epoch": 0.15, + "learning_rate": 4.846602247367859e-05, + "loss": 0.7558, + "step": 27300 + }, + { + "epoch": 0.15, + "learning_rate": 4.846040124341614e-05, + "loss": 0.7799, + "step": 27400 + }, + { + "epoch": 0.15, + "learning_rate": 4.8454780013153684e-05, + "loss": 0.7501, + "step": 27500 + }, + { + "epoch": 0.16, + "learning_rate": 4.8449158782891224e-05, + "loss": 0.7588, + "step": 27600 + }, + { + "epoch": 0.16, + "learning_rate": 4.844359376493139e-05, + "loss": 0.75, + "step": 27700 + }, + { + "epoch": 0.16, + "learning_rate": 4.843797253466894e-05, + "loss": 0.7576, + "step": 27800 + }, + { + "epoch": 0.16, + "learning_rate": 4.8432351304406484e-05, + "loss": 0.7513, + "step": 27900 + }, + { + "epoch": 0.16, + "learning_rate": 4.842678628644665e-05, + "loss": 0.7486, + "step": 28000 + }, + { + "epoch": 0.16, + "learning_rate": 4.84211650561842e-05, + "loss": 0.7464, + "step": 28100 + }, + { + "epoch": 0.16, + "learning_rate": 4.8415543825921744e-05, + "loss": 0.7427, + "step": 28200 + }, + { + "epoch": 0.16, + "learning_rate": 4.840992259565929e-05, + "loss": 0.7552, + "step": 28300 + }, + { + "epoch": 0.16, + "learning_rate": 4.840430136539683e-05, + "loss": 0.7422, + "step": 28400 + }, + { + "epoch": 0.16, + "learning_rate": 4.8398680135134376e-05, + "loss": 0.7318, + "step": 28500 + }, + { + "epoch": 0.16, + "learning_rate": 4.839305890487192e-05, + "loss": 0.7442, + "step": 28600 + }, + { + "epoch": 0.16, + "learning_rate": 4.838743767460946e-05, + "loss": 0.7439, + "step": 28700 + }, + { + "epoch": 0.16, + "learning_rate": 4.838181644434701e-05, + "loss": 0.7482, + "step": 28800 + }, + { + "epoch": 0.16, + "learning_rate": 4.8376195214084554e-05, + "loss": 0.7368, + "step": 28900 + }, + { + "epoch": 0.16, + "learning_rate": 4.83705739838221e-05, + "loss": 0.7318, + "step": 29000 + }, + { + "epoch": 0.16, + "learning_rate": 4.836495275355965e-05, + "loss": 0.7371, + "step": 29100 + }, + { + "epoch": 0.16, + "learning_rate": 4.835933152329719e-05, + "loss": 0.7123, + "step": 29200 + }, + { + "epoch": 0.16, + "learning_rate": 4.835371029303474e-05, + "loss": 0.7162, + "step": 29300 + }, + { + "epoch": 0.17, + "learning_rate": 4.834808906277228e-05, + "loss": 0.7219, + "step": 29400 + }, + { + "epoch": 0.17, + "learning_rate": 4.8342467832509825e-05, + "loss": 0.7159, + "step": 29500 + }, + { + "epoch": 0.17, + "learning_rate": 4.833684660224737e-05, + "loss": 0.7184, + "step": 29600 + }, + { + "epoch": 0.17, + "learning_rate": 4.833122537198492e-05, + "loss": 0.7137, + "step": 29700 + }, + { + "epoch": 0.17, + "learning_rate": 4.832560414172246e-05, + "loss": 0.7229, + "step": 29800 + }, + { + "epoch": 0.17, + "learning_rate": 4.8319982911460003e-05, + "loss": 0.7195, + "step": 29900 + }, + { + "epoch": 0.17, + "learning_rate": 4.831436168119755e-05, + "loss": 0.6938, + "step": 30000 + }, + { + "epoch": 0.17, + "learning_rate": 4.830874045093509e-05, + "loss": 0.7061, + "step": 30100 + }, + { + "epoch": 0.17, + "learning_rate": 4.8303119220672636e-05, + "loss": 0.7044, + "step": 30200 + }, + { + "epoch": 0.17, + "learning_rate": 4.829749799041018e-05, + "loss": 0.7225, + "step": 30300 + }, + { + "epoch": 0.17, + "learning_rate": 4.829187676014773e-05, + "loss": 0.7052, + "step": 30400 + }, + { + "epoch": 0.17, + "learning_rate": 4.8286311742187896e-05, + "loss": 0.7071, + "step": 30500 + }, + { + "epoch": 0.17, + "learning_rate": 4.828069051192544e-05, + "loss": 0.6939, + "step": 30600 + }, + { + "epoch": 0.17, + "learning_rate": 4.827506928166299e-05, + "loss": 0.7071, + "step": 30700 + }, + { + "epoch": 0.17, + "learning_rate": 4.8269448051400535e-05, + "loss": 0.6979, + "step": 30800 + }, + { + "epoch": 0.17, + "learning_rate": 4.8263826821138074e-05, + "loss": 0.6989, + "step": 30900 + }, + { + "epoch": 0.17, + "learning_rate": 4.825820559087562e-05, + "loss": 0.7005, + "step": 31000 + }, + { + "epoch": 0.17, + "learning_rate": 4.8252584360613167e-05, + "loss": 0.7155, + "step": 31100 + }, + { + "epoch": 0.18, + "learning_rate": 4.8246963130350706e-05, + "loss": 0.6853, + "step": 31200 + }, + { + "epoch": 0.18, + "learning_rate": 4.824134190008825e-05, + "loss": 0.6985, + "step": 31300 + }, + { + "epoch": 0.18, + "learning_rate": 4.82357206698258e-05, + "loss": 0.6883, + "step": 31400 + }, + { + "epoch": 0.18, + "learning_rate": 4.8230099439563345e-05, + "loss": 0.6888, + "step": 31500 + }, + { + "epoch": 0.18, + "learning_rate": 4.822447820930089e-05, + "loss": 0.6849, + "step": 31600 + }, + { + "epoch": 0.18, + "learning_rate": 4.821885697903844e-05, + "loss": 0.6787, + "step": 31700 + }, + { + "epoch": 0.18, + "learning_rate": 4.8213235748775984e-05, + "loss": 0.6801, + "step": 31800 + }, + { + "epoch": 0.18, + "learning_rate": 4.820761451851352e-05, + "loss": 0.6794, + "step": 31900 + }, + { + "epoch": 0.18, + "learning_rate": 4.820199328825107e-05, + "loss": 0.6825, + "step": 32000 + }, + { + "epoch": 0.18, + "learning_rate": 4.8196372057988616e-05, + "loss": 0.6663, + "step": 32100 + }, + { + "epoch": 0.18, + "learning_rate": 4.8190807040028783e-05, + "loss": 0.6741, + "step": 32200 + }, + { + "epoch": 0.18, + "learning_rate": 4.818518580976632e-05, + "loss": 0.6689, + "step": 32300 + }, + { + "epoch": 0.18, + "learning_rate": 4.817956457950387e-05, + "loss": 0.6635, + "step": 32400 + }, + { + "epoch": 0.18, + "learning_rate": 4.8173943349241416e-05, + "loss": 0.6652, + "step": 32500 + }, + { + "epoch": 0.18, + "learning_rate": 4.816832211897896e-05, + "loss": 0.6667, + "step": 32600 + }, + { + "epoch": 0.18, + "learning_rate": 4.816270088871651e-05, + "loss": 0.6679, + "step": 32700 + }, + { + "epoch": 0.18, + "learning_rate": 4.8157079658454054e-05, + "loss": 0.6754, + "step": 32800 + }, + { + "epoch": 0.18, + "learning_rate": 4.81514584281916e-05, + "loss": 0.6697, + "step": 32900 + }, + { + "epoch": 0.19, + "learning_rate": 4.814583719792914e-05, + "loss": 0.6604, + "step": 33000 + }, + { + "epoch": 0.19, + "learning_rate": 4.8140215967666686e-05, + "loss": 0.6722, + "step": 33100 + }, + { + "epoch": 0.19, + "learning_rate": 4.813459473740423e-05, + "loss": 0.6607, + "step": 33200 + }, + { + "epoch": 0.19, + "learning_rate": 4.812897350714178e-05, + "loss": 0.6645, + "step": 33300 + }, + { + "epoch": 0.19, + "learning_rate": 4.812335227687932e-05, + "loss": 0.6651, + "step": 33400 + }, + { + "epoch": 0.19, + "learning_rate": 4.8117731046616865e-05, + "loss": 0.6565, + "step": 33500 + }, + { + "epoch": 0.19, + "learning_rate": 4.811210981635441e-05, + "loss": 0.6602, + "step": 33600 + }, + { + "epoch": 0.19, + "learning_rate": 4.810648858609195e-05, + "loss": 0.6573, + "step": 33700 + }, + { + "epoch": 0.19, + "learning_rate": 4.81008673558295e-05, + "loss": 0.646, + "step": 33800 + }, + { + "epoch": 0.19, + "learning_rate": 4.809524612556704e-05, + "loss": 0.657, + "step": 33900 + }, + { + "epoch": 0.19, + "learning_rate": 4.808962489530459e-05, + "loss": 0.65, + "step": 34000 + }, + { + "epoch": 0.19, + "learning_rate": 4.808400366504213e-05, + "loss": 0.643, + "step": 34100 + }, + { + "epoch": 0.19, + "learning_rate": 4.807838243477968e-05, + "loss": 0.6582, + "step": 34200 + }, + { + "epoch": 0.19, + "learning_rate": 4.807276120451723e-05, + "loss": 0.6506, + "step": 34300 + }, + { + "epoch": 0.19, + "learning_rate": 4.806713997425477e-05, + "loss": 0.6557, + "step": 34400 + }, + { + "epoch": 0.19, + "learning_rate": 4.8061518743992314e-05, + "loss": 0.6447, + "step": 34500 + }, + { + "epoch": 0.19, + "learning_rate": 4.805589751372986e-05, + "loss": 0.6404, + "step": 34600 + }, + { + "epoch": 0.2, + "learning_rate": 4.8050276283467407e-05, + "loss": 0.6528, + "step": 34700 + }, + { + "epoch": 0.2, + "learning_rate": 4.8044655053204946e-05, + "loss": 0.6402, + "step": 34800 + }, + { + "epoch": 0.2, + "learning_rate": 4.803903382294249e-05, + "loss": 0.6516, + "step": 34900 + }, + { + "epoch": 0.2, + "learning_rate": 4.803341259268004e-05, + "loss": 0.6389, + "step": 35000 + }, + { + "epoch": 0.2, + "learning_rate": 4.802779136241758e-05, + "loss": 0.6196, + "step": 35100 + }, + { + "epoch": 0.2, + "learning_rate": 4.8022170132155124e-05, + "loss": 0.645, + "step": 35200 + }, + { + "epoch": 0.2, + "learning_rate": 4.801654890189267e-05, + "loss": 0.6388, + "step": 35300 + }, + { + "epoch": 0.2, + "learning_rate": 4.801092767163021e-05, + "loss": 0.6307, + "step": 35400 + }, + { + "epoch": 0.2, + "learning_rate": 4.8005306441367756e-05, + "loss": 0.6394, + "step": 35500 + }, + { + "epoch": 0.2, + "learning_rate": 4.79996852111053e-05, + "loss": 0.6264, + "step": 35600 + }, + { + "epoch": 0.2, + "learning_rate": 4.799406398084285e-05, + "loss": 0.638, + "step": 35700 + }, + { + "epoch": 0.2, + "learning_rate": 4.7988442750580395e-05, + "loss": 0.6363, + "step": 35800 + }, + { + "epoch": 0.2, + "learning_rate": 4.798282152031794e-05, + "loss": 0.6343, + "step": 35900 + }, + { + "epoch": 0.2, + "learning_rate": 4.797720029005549e-05, + "loss": 0.6424, + "step": 36000 + }, + { + "epoch": 0.2, + "learning_rate": 4.797157905979303e-05, + "loss": 0.6313, + "step": 36100 + }, + { + "epoch": 0.2, + "learning_rate": 4.7965957829530574e-05, + "loss": 0.6223, + "step": 36200 + }, + { + "epoch": 0.2, + "learning_rate": 4.796033659926812e-05, + "loss": 0.6152, + "step": 36300 + }, + { + "epoch": 0.2, + "learning_rate": 4.7954715369005666e-05, + "loss": 0.6199, + "step": 36400 + }, + { + "epoch": 0.21, + "learning_rate": 4.7949094138743206e-05, + "loss": 0.618, + "step": 36500 + }, + { + "epoch": 0.21, + "learning_rate": 4.794347290848075e-05, + "loss": 0.6154, + "step": 36600 + }, + { + "epoch": 0.21, + "learning_rate": 4.79378516782183e-05, + "loss": 0.624, + "step": 36700 + }, + { + "epoch": 0.21, + "learning_rate": 4.793223044795584e-05, + "loss": 0.6217, + "step": 36800 + }, + { + "epoch": 0.21, + "learning_rate": 4.7926609217693384e-05, + "loss": 0.6219, + "step": 36900 + }, + { + "epoch": 0.21, + "learning_rate": 4.792098798743093e-05, + "loss": 0.6203, + "step": 37000 + }, + { + "epoch": 0.21, + "learning_rate": 4.7915366757168476e-05, + "loss": 0.6171, + "step": 37100 + }, + { + "epoch": 0.21, + "learning_rate": 4.7909745526906016e-05, + "loss": 0.6213, + "step": 37200 + }, + { + "epoch": 0.21, + "learning_rate": 4.790412429664356e-05, + "loss": 0.606, + "step": 37300 + }, + { + "epoch": 0.21, + "learning_rate": 4.789850306638111e-05, + "loss": 0.6017, + "step": 37400 + }, + { + "epoch": 0.21, + "learning_rate": 4.7892881836118655e-05, + "loss": 0.6235, + "step": 37500 + }, + { + "epoch": 0.21, + "learning_rate": 4.78872606058562e-05, + "loss": 0.6087, + "step": 37600 + }, + { + "epoch": 0.21, + "learning_rate": 4.788163937559375e-05, + "loss": 0.6125, + "step": 37700 + }, + { + "epoch": 0.21, + "learning_rate": 4.7876018145331294e-05, + "loss": 0.6126, + "step": 37800 + }, + { + "epoch": 0.21, + "learning_rate": 4.787039691506883e-05, + "loss": 0.5987, + "step": 37900 + }, + { + "epoch": 0.21, + "learning_rate": 4.786477568480638e-05, + "loss": 0.6213, + "step": 38000 + }, + { + "epoch": 0.21, + "learning_rate": 4.7859154454543926e-05, + "loss": 0.614, + "step": 38100 + }, + { + "epoch": 0.21, + "learning_rate": 4.7853533224281465e-05, + "loss": 0.5892, + "step": 38200 + }, + { + "epoch": 0.22, + "learning_rate": 4.784791199401901e-05, + "loss": 0.6065, + "step": 38300 + }, + { + "epoch": 0.22, + "learning_rate": 4.784229076375656e-05, + "loss": 0.5908, + "step": 38400 + }, + { + "epoch": 0.22, + "learning_rate": 4.7836669533494104e-05, + "loss": 0.5908, + "step": 38500 + }, + { + "epoch": 0.22, + "learning_rate": 4.7831048303231644e-05, + "loss": 0.5955, + "step": 38600 + }, + { + "epoch": 0.22, + "learning_rate": 4.782542707296919e-05, + "loss": 0.5827, + "step": 38700 + }, + { + "epoch": 0.22, + "learning_rate": 4.7819805842706736e-05, + "loss": 0.5941, + "step": 38800 + }, + { + "epoch": 0.22, + "learning_rate": 4.781418461244428e-05, + "loss": 0.6, + "step": 38900 + }, + { + "epoch": 0.22, + "learning_rate": 4.780856338218183e-05, + "loss": 0.6139, + "step": 39000 + }, + { + "epoch": 0.22, + "learning_rate": 4.7802942151919375e-05, + "loss": 0.5912, + "step": 39100 + }, + { + "epoch": 0.22, + "learning_rate": 4.779732092165692e-05, + "loss": 0.5866, + "step": 39200 + }, + { + "epoch": 0.22, + "learning_rate": 4.779169969139446e-05, + "loss": 0.5968, + "step": 39300 + }, + { + "epoch": 0.22, + "learning_rate": 4.778607846113201e-05, + "loss": 0.5908, + "step": 39400 + }, + { + "epoch": 0.22, + "learning_rate": 4.778045723086955e-05, + "loss": 0.5863, + "step": 39500 + }, + { + "epoch": 0.22, + "learning_rate": 4.777483600060709e-05, + "loss": 0.5903, + "step": 39600 + }, + { + "epoch": 0.22, + "learning_rate": 4.776921477034464e-05, + "loss": 0.5915, + "step": 39700 + }, + { + "epoch": 0.22, + "learning_rate": 4.776364975238481e-05, + "loss": 0.5993, + "step": 39800 + }, + { + "epoch": 0.22, + "learning_rate": 4.775802852212235e-05, + "loss": 0.5862, + "step": 39900 + }, + { + "epoch": 0.22, + "learning_rate": 4.77524072918599e-05, + "loss": 0.5937, + "step": 40000 + }, + { + "epoch": 0.23, + "learning_rate": 4.7746786061597446e-05, + "loss": 0.5916, + "step": 40100 + }, + { + "epoch": 0.23, + "learning_rate": 4.774116483133499e-05, + "loss": 0.588, + "step": 40200 + }, + { + "epoch": 0.23, + "learning_rate": 4.773554360107254e-05, + "loss": 0.5916, + "step": 40300 + }, + { + "epoch": 0.23, + "learning_rate": 4.77299785831127e-05, + "loss": 0.5878, + "step": 40400 + }, + { + "epoch": 0.23, + "learning_rate": 4.7724357352850245e-05, + "loss": 0.5806, + "step": 40500 + }, + { + "epoch": 0.23, + "learning_rate": 4.771873612258779e-05, + "loss": 0.5764, + "step": 40600 + }, + { + "epoch": 0.23, + "learning_rate": 4.771311489232534e-05, + "loss": 0.5806, + "step": 40700 + }, + { + "epoch": 0.23, + "learning_rate": 4.770749366206288e-05, + "loss": 0.5707, + "step": 40800 + }, + { + "epoch": 0.23, + "learning_rate": 4.7701872431800424e-05, + "loss": 0.5792, + "step": 40900 + }, + { + "epoch": 0.23, + "learning_rate": 4.769625120153797e-05, + "loss": 0.5717, + "step": 41000 + }, + { + "epoch": 0.23, + "learning_rate": 4.7690629971275516e-05, + "loss": 0.5671, + "step": 41100 + }, + { + "epoch": 0.23, + "learning_rate": 4.768500874101306e-05, + "loss": 0.5795, + "step": 41200 + }, + { + "epoch": 0.23, + "learning_rate": 4.767938751075061e-05, + "loss": 0.5838, + "step": 41300 + }, + { + "epoch": 0.23, + "learning_rate": 4.7673766280488155e-05, + "loss": 0.5716, + "step": 41400 + }, + { + "epoch": 0.23, + "learning_rate": 4.7668145050225694e-05, + "loss": 0.5705, + "step": 41500 + }, + { + "epoch": 0.23, + "learning_rate": 4.766252381996324e-05, + "loss": 0.5736, + "step": 41600 + }, + { + "epoch": 0.23, + "learning_rate": 4.765690258970079e-05, + "loss": 0.5746, + "step": 41700 + }, + { + "epoch": 0.23, + "learning_rate": 4.7651281359438326e-05, + "loss": 0.5777, + "step": 41800 + }, + { + "epoch": 0.24, + "learning_rate": 4.764566012917587e-05, + "loss": 0.5647, + "step": 41900 + }, + { + "epoch": 0.24, + "learning_rate": 4.764003889891342e-05, + "loss": 0.5699, + "step": 42000 + }, + { + "epoch": 0.24, + "learning_rate": 4.7634417668650965e-05, + "loss": 0.5739, + "step": 42100 + }, + { + "epoch": 0.24, + "learning_rate": 4.7628796438388505e-05, + "loss": 0.5734, + "step": 42200 + }, + { + "epoch": 0.24, + "learning_rate": 4.762317520812605e-05, + "loss": 0.5632, + "step": 42300 + }, + { + "epoch": 0.24, + "learning_rate": 4.76175539778636e-05, + "loss": 0.5627, + "step": 42400 + }, + { + "epoch": 0.24, + "learning_rate": 4.761193274760114e-05, + "loss": 0.5612, + "step": 42500 + }, + { + "epoch": 0.24, + "learning_rate": 4.760631151733868e-05, + "loss": 0.5699, + "step": 42600 + }, + { + "epoch": 0.24, + "learning_rate": 4.760069028707623e-05, + "loss": 0.5696, + "step": 42700 + }, + { + "epoch": 0.24, + "learning_rate": 4.759506905681378e-05, + "loss": 0.5643, + "step": 42800 + }, + { + "epoch": 0.24, + "learning_rate": 4.758944782655132e-05, + "loss": 0.5644, + "step": 42900 + }, + { + "epoch": 0.24, + "learning_rate": 4.758382659628887e-05, + "loss": 0.5591, + "step": 43000 + }, + { + "epoch": 0.24, + "learning_rate": 4.7578205366026415e-05, + "loss": 0.5659, + "step": 43100 + }, + { + "epoch": 0.24, + "learning_rate": 4.7572584135763954e-05, + "loss": 0.5691, + "step": 43200 + }, + { + "epoch": 0.24, + "learning_rate": 4.75669629055015e-05, + "loss": 0.5496, + "step": 43300 + }, + { + "epoch": 0.24, + "learning_rate": 4.7561341675239047e-05, + "loss": 0.5467, + "step": 43400 + }, + { + "epoch": 0.24, + "learning_rate": 4.755572044497659e-05, + "loss": 0.5464, + "step": 43500 + }, + { + "epoch": 0.25, + "learning_rate": 4.755009921471413e-05, + "loss": 0.5653, + "step": 43600 + }, + { + "epoch": 0.25, + "learning_rate": 4.754447798445168e-05, + "loss": 0.5556, + "step": 43700 + }, + { + "epoch": 0.25, + "learning_rate": 4.7538856754189225e-05, + "loss": 0.5611, + "step": 43800 + }, + { + "epoch": 0.25, + "learning_rate": 4.7533235523926764e-05, + "loss": 0.5461, + "step": 43900 + }, + { + "epoch": 0.25, + "learning_rate": 4.752761429366431e-05, + "loss": 0.5605, + "step": 44000 + }, + { + "epoch": 0.25, + "learning_rate": 4.752199306340186e-05, + "loss": 0.5608, + "step": 44100 + }, + { + "epoch": 0.25, + "learning_rate": 4.75163718331394e-05, + "loss": 0.5509, + "step": 44200 + }, + { + "epoch": 0.25, + "learning_rate": 4.751075060287695e-05, + "loss": 0.5537, + "step": 44300 + }, + { + "epoch": 0.25, + "learning_rate": 4.7505129372614496e-05, + "loss": 0.5621, + "step": 44400 + }, + { + "epoch": 0.25, + "learning_rate": 4.749950814235204e-05, + "loss": 0.5529, + "step": 44500 + }, + { + "epoch": 0.25, + "learning_rate": 4.749388691208958e-05, + "loss": 0.5398, + "step": 44600 + }, + { + "epoch": 0.25, + "learning_rate": 4.748826568182713e-05, + "loss": 0.5467, + "step": 44700 + }, + { + "epoch": 0.25, + "learning_rate": 4.7482644451564674e-05, + "loss": 0.5558, + "step": 44800 + }, + { + "epoch": 0.25, + "learning_rate": 4.747702322130222e-05, + "loss": 0.5417, + "step": 44900 + }, + { + "epoch": 0.25, + "learning_rate": 4.747140199103976e-05, + "loss": 0.5424, + "step": 45000 + }, + { + "epoch": 0.25, + "learning_rate": 4.7465780760777306e-05, + "loss": 0.5427, + "step": 45100 + }, + { + "epoch": 0.25, + "learning_rate": 4.7460215742817474e-05, + "loss": 0.5467, + "step": 45200 + }, + { + "epoch": 0.25, + "learning_rate": 4.745459451255502e-05, + "loss": 0.5575, + "step": 45300 + }, + { + "epoch": 0.26, + "learning_rate": 4.7448973282292566e-05, + "loss": 0.5457, + "step": 45400 + }, + { + "epoch": 0.26, + "learning_rate": 4.744335205203011e-05, + "loss": 0.547, + "step": 45500 + }, + { + "epoch": 0.26, + "learning_rate": 4.743773082176766e-05, + "loss": 0.5439, + "step": 45600 + }, + { + "epoch": 0.26, + "learning_rate": 4.743216580380782e-05, + "loss": 0.5453, + "step": 45700 + }, + { + "epoch": 0.26, + "learning_rate": 4.7426544573545366e-05, + "loss": 0.5481, + "step": 45800 + }, + { + "epoch": 0.26, + "learning_rate": 4.742092334328291e-05, + "loss": 0.5355, + "step": 45900 + }, + { + "epoch": 0.26, + "learning_rate": 4.741530211302046e-05, + "loss": 0.5395, + "step": 46000 + }, + { + "epoch": 0.26, + "learning_rate": 4.7409680882758e-05, + "loss": 0.5314, + "step": 46100 + }, + { + "epoch": 0.26, + "learning_rate": 4.7404059652495544e-05, + "loss": 0.5525, + "step": 46200 + }, + { + "epoch": 0.26, + "learning_rate": 4.739843842223309e-05, + "loss": 0.5336, + "step": 46300 + }, + { + "epoch": 0.26, + "learning_rate": 4.7392873404273265e-05, + "loss": 0.5406, + "step": 46400 + }, + { + "epoch": 0.26, + "learning_rate": 4.7387252174010805e-05, + "loss": 0.5479, + "step": 46500 + }, + { + "epoch": 0.26, + "learning_rate": 4.738163094374835e-05, + "loss": 0.5409, + "step": 46600 + }, + { + "epoch": 0.26, + "learning_rate": 4.73760097134859e-05, + "loss": 0.5355, + "step": 46700 + }, + { + "epoch": 0.26, + "learning_rate": 4.737038848322344e-05, + "loss": 0.5427, + "step": 46800 + }, + { + "epoch": 0.26, + "learning_rate": 4.736476725296098e-05, + "loss": 0.5488, + "step": 46900 + }, + { + "epoch": 0.26, + "learning_rate": 4.735914602269853e-05, + "loss": 0.5335, + "step": 47000 + }, + { + "epoch": 0.26, + "learning_rate": 4.7353524792436075e-05, + "loss": 0.5394, + "step": 47100 + }, + { + "epoch": 0.27, + "learning_rate": 4.7347903562173615e-05, + "loss": 0.5269, + "step": 47200 + }, + { + "epoch": 0.27, + "learning_rate": 4.734228233191116e-05, + "loss": 0.5407, + "step": 47300 + }, + { + "epoch": 0.27, + "learning_rate": 4.733666110164871e-05, + "loss": 0.5285, + "step": 47400 + }, + { + "epoch": 0.27, + "learning_rate": 4.7331039871386254e-05, + "loss": 0.5359, + "step": 47500 + }, + { + "epoch": 0.27, + "learning_rate": 4.73254186411238e-05, + "loss": 0.5205, + "step": 47600 + }, + { + "epoch": 0.27, + "learning_rate": 4.7319797410861346e-05, + "loss": 0.5269, + "step": 47700 + }, + { + "epoch": 0.27, + "learning_rate": 4.731417618059889e-05, + "loss": 0.5353, + "step": 47800 + }, + { + "epoch": 0.27, + "learning_rate": 4.730855495033643e-05, + "loss": 0.5351, + "step": 47900 + }, + { + "epoch": 0.27, + "learning_rate": 4.730293372007398e-05, + "loss": 0.5202, + "step": 48000 + }, + { + "epoch": 0.27, + "learning_rate": 4.7297312489811525e-05, + "loss": 0.5287, + "step": 48100 + }, + { + "epoch": 0.27, + "learning_rate": 4.7291691259549064e-05, + "loss": 0.5241, + "step": 48200 + }, + { + "epoch": 0.27, + "learning_rate": 4.728607002928661e-05, + "loss": 0.5315, + "step": 48300 + }, + { + "epoch": 0.27, + "learning_rate": 4.728044879902416e-05, + "loss": 0.5169, + "step": 48400 + }, + { + "epoch": 0.27, + "learning_rate": 4.72748275687617e-05, + "loss": 0.5283, + "step": 48500 + }, + { + "epoch": 0.27, + "learning_rate": 4.726920633849924e-05, + "loss": 0.5113, + "step": 48600 + }, + { + "epoch": 0.27, + "learning_rate": 4.726358510823679e-05, + "loss": 0.531, + "step": 48700 + }, + { + "epoch": 0.27, + "learning_rate": 4.7257963877974335e-05, + "loss": 0.5341, + "step": 48800 + }, + { + "epoch": 0.27, + "learning_rate": 4.725234264771188e-05, + "loss": 0.5159, + "step": 48900 + }, + { + "epoch": 0.28, + "learning_rate": 4.724672141744943e-05, + "loss": 0.5132, + "step": 49000 + }, + { + "epoch": 0.28, + "learning_rate": 4.7241100187186974e-05, + "loss": 0.5142, + "step": 49100 + }, + { + "epoch": 0.28, + "learning_rate": 4.723547895692452e-05, + "loss": 0.5249, + "step": 49200 + }, + { + "epoch": 0.28, + "learning_rate": 4.722985772666206e-05, + "loss": 0.5125, + "step": 49300 + }, + { + "epoch": 0.28, + "learning_rate": 4.7224236496399606e-05, + "loss": 0.5165, + "step": 49400 + }, + { + "epoch": 0.28, + "learning_rate": 4.721861526613715e-05, + "loss": 0.5082, + "step": 49500 + }, + { + "epoch": 0.28, + "learning_rate": 4.721299403587469e-05, + "loss": 0.5204, + "step": 49600 + }, + { + "epoch": 0.28, + "learning_rate": 4.720737280561224e-05, + "loss": 0.5191, + "step": 49700 + }, + { + "epoch": 0.28, + "learning_rate": 4.7201751575349784e-05, + "loss": 0.516, + "step": 49800 + }, + { + "epoch": 0.28, + "learning_rate": 4.719613034508733e-05, + "loss": 0.5196, + "step": 49900 + }, + { + "epoch": 0.28, + "learning_rate": 4.719050911482487e-05, + "loss": 0.5218, + "step": 50000 + }, + { + "epoch": 0.28, + "learning_rate": 4.7184887884562416e-05, + "loss": 0.5144, + "step": 50100 + }, + { + "epoch": 0.28, + "learning_rate": 4.717926665429996e-05, + "loss": 0.5042, + "step": 50200 + }, + { + "epoch": 0.28, + "learning_rate": 4.71736454240375e-05, + "loss": 0.4984, + "step": 50300 + }, + { + "epoch": 0.28, + "learning_rate": 4.716802419377505e-05, + "loss": 0.523, + "step": 50400 + }, + { + "epoch": 0.28, + "learning_rate": 4.7162402963512595e-05, + "loss": 0.5104, + "step": 50500 + }, + { + "epoch": 0.28, + "learning_rate": 4.715683794555277e-05, + "loss": 0.5157, + "step": 50600 + }, + { + "epoch": 0.28, + "learning_rate": 4.715121671529031e-05, + "loss": 0.5152, + "step": 50700 + }, + { + "epoch": 0.29, + "learning_rate": 4.7145595485027855e-05, + "loss": 0.5171, + "step": 50800 + }, + { + "epoch": 0.29, + "learning_rate": 4.71399742547654e-05, + "loss": 0.5099, + "step": 50900 + }, + { + "epoch": 0.29, + "learning_rate": 4.713435302450295e-05, + "loss": 0.5175, + "step": 51000 + }, + { + "epoch": 0.29, + "learning_rate": 4.712873179424049e-05, + "loss": 0.5084, + "step": 51100 + }, + { + "epoch": 0.29, + "learning_rate": 4.712311056397803e-05, + "loss": 0.5163, + "step": 51200 + }, + { + "epoch": 0.29, + "learning_rate": 4.711748933371558e-05, + "loss": 0.5148, + "step": 51300 + }, + { + "epoch": 0.29, + "learning_rate": 4.711186810345312e-05, + "loss": 0.5055, + "step": 51400 + }, + { + "epoch": 0.29, + "learning_rate": 4.710630308549329e-05, + "loss": 0.5077, + "step": 51500 + }, + { + "epoch": 0.29, + "learning_rate": 4.710068185523084e-05, + "loss": 0.5028, + "step": 51600 + }, + { + "epoch": 0.29, + "learning_rate": 4.7095060624968386e-05, + "loss": 0.4996, + "step": 51700 + }, + { + "epoch": 0.29, + "learning_rate": 4.7089439394705925e-05, + "loss": 0.4939, + "step": 51800 + }, + { + "epoch": 0.29, + "learning_rate": 4.708381816444347e-05, + "loss": 0.4991, + "step": 51900 + }, + { + "epoch": 0.29, + "learning_rate": 4.707819693418102e-05, + "loss": 0.4987, + "step": 52000 + }, + { + "epoch": 0.29, + "learning_rate": 4.7072575703918564e-05, + "loss": 0.5071, + "step": 52100 + }, + { + "epoch": 0.29, + "learning_rate": 4.7066954473656104e-05, + "loss": 0.5012, + "step": 52200 + }, + { + "epoch": 0.29, + "learning_rate": 4.706133324339365e-05, + "loss": 0.5066, + "step": 52300 + }, + { + "epoch": 0.29, + "learning_rate": 4.7055712013131196e-05, + "loss": 0.51, + "step": 52400 + }, + { + "epoch": 0.3, + "learning_rate": 4.7050090782868736e-05, + "loss": 0.5056, + "step": 52500 + }, + { + "epoch": 0.3, + "learning_rate": 4.704446955260628e-05, + "loss": 0.5013, + "step": 52600 + }, + { + "epoch": 0.3, + "learning_rate": 4.703884832234383e-05, + "loss": 0.4858, + "step": 52700 + }, + { + "epoch": 0.3, + "learning_rate": 4.7033227092081375e-05, + "loss": 0.4944, + "step": 52800 + }, + { + "epoch": 0.3, + "learning_rate": 4.702760586181892e-05, + "loss": 0.4991, + "step": 52900 + }, + { + "epoch": 0.3, + "learning_rate": 4.702198463155647e-05, + "loss": 0.4931, + "step": 53000 + }, + { + "epoch": 0.3, + "learning_rate": 4.7016363401294013e-05, + "loss": 0.4889, + "step": 53100 + }, + { + "epoch": 0.3, + "learning_rate": 4.701074217103155e-05, + "loss": 0.491, + "step": 53200 + }, + { + "epoch": 0.3, + "learning_rate": 4.70051209407691e-05, + "loss": 0.4891, + "step": 53300 + }, + { + "epoch": 0.3, + "learning_rate": 4.6999499710506646e-05, + "loss": 0.4994, + "step": 53400 + }, + { + "epoch": 0.3, + "learning_rate": 4.699387848024419e-05, + "loss": 0.4999, + "step": 53500 + }, + { + "epoch": 0.3, + "learning_rate": 4.698825724998173e-05, + "loss": 0.495, + "step": 53600 + }, + { + "epoch": 0.3, + "learning_rate": 4.698263601971928e-05, + "loss": 0.4947, + "step": 53700 + }, + { + "epoch": 0.3, + "learning_rate": 4.6977014789456824e-05, + "loss": 0.4921, + "step": 53800 + }, + { + "epoch": 0.3, + "learning_rate": 4.697139355919436e-05, + "loss": 0.492, + "step": 53900 + }, + { + "epoch": 0.3, + "learning_rate": 4.696577232893191e-05, + "loss": 0.4883, + "step": 54000 + }, + { + "epoch": 0.3, + "learning_rate": 4.6960151098669456e-05, + "loss": 0.4949, + "step": 54100 + }, + { + "epoch": 0.3, + "learning_rate": 4.6954529868407e-05, + "loss": 0.4899, + "step": 54200 + }, + { + "epoch": 0.31, + "learning_rate": 4.694890863814455e-05, + "loss": 0.4936, + "step": 54300 + }, + { + "epoch": 0.31, + "learning_rate": 4.6943287407882095e-05, + "loss": 0.4996, + "step": 54400 + }, + { + "epoch": 0.31, + "learning_rate": 4.693766617761964e-05, + "loss": 0.4875, + "step": 54500 + }, + { + "epoch": 0.31, + "learning_rate": 4.693204494735718e-05, + "loss": 0.488, + "step": 54600 + }, + { + "epoch": 0.31, + "learning_rate": 4.692642371709473e-05, + "loss": 0.4932, + "step": 54700 + }, + { + "epoch": 0.31, + "learning_rate": 4.6920858699134894e-05, + "loss": 0.4859, + "step": 54800 + }, + { + "epoch": 0.31, + "learning_rate": 4.691523746887244e-05, + "loss": 0.4888, + "step": 54900 + }, + { + "epoch": 0.31, + "learning_rate": 4.690961623860998e-05, + "loss": 0.5022, + "step": 55000 + }, + { + "epoch": 0.31, + "learning_rate": 4.6903995008347526e-05, + "loss": 0.4857, + "step": 55100 + }, + { + "epoch": 0.31, + "learning_rate": 4.689837377808507e-05, + "loss": 0.4905, + "step": 55200 + }, + { + "epoch": 0.31, + "learning_rate": 4.689275254782262e-05, + "loss": 0.4887, + "step": 55300 + }, + { + "epoch": 0.31, + "learning_rate": 4.6887131317560165e-05, + "loss": 0.4915, + "step": 55400 + }, + { + "epoch": 0.31, + "learning_rate": 4.688151008729771e-05, + "loss": 0.4805, + "step": 55500 + }, + { + "epoch": 0.31, + "learning_rate": 4.687588885703526e-05, + "loss": 0.4791, + "step": 55600 + }, + { + "epoch": 0.31, + "learning_rate": 4.68702676267728e-05, + "loss": 0.4871, + "step": 55700 + }, + { + "epoch": 0.31, + "learning_rate": 4.6864646396510344e-05, + "loss": 0.4924, + "step": 55800 + }, + { + "epoch": 0.31, + "learning_rate": 4.685902516624789e-05, + "loss": 0.493, + "step": 55900 + }, + { + "epoch": 0.31, + "learning_rate": 4.685340393598543e-05, + "loss": 0.4799, + "step": 56000 + }, + { + "epoch": 0.32, + "learning_rate": 4.6847782705722976e-05, + "loss": 0.4861, + "step": 56100 + }, + { + "epoch": 0.32, + "learning_rate": 4.684216147546052e-05, + "loss": 0.483, + "step": 56200 + }, + { + "epoch": 0.32, + "learning_rate": 4.683654024519807e-05, + "loss": 0.4819, + "step": 56300 + }, + { + "epoch": 0.32, + "learning_rate": 4.683091901493561e-05, + "loss": 0.4866, + "step": 56400 + }, + { + "epoch": 0.32, + "learning_rate": 4.6825297784673154e-05, + "loss": 0.4866, + "step": 56500 + }, + { + "epoch": 0.32, + "learning_rate": 4.68196765544107e-05, + "loss": 0.4952, + "step": 56600 + }, + { + "epoch": 0.32, + "learning_rate": 4.681405532414824e-05, + "loss": 0.4756, + "step": 56700 + }, + { + "epoch": 0.32, + "learning_rate": 4.6808434093885786e-05, + "loss": 0.4787, + "step": 56800 + }, + { + "epoch": 0.32, + "learning_rate": 4.680281286362333e-05, + "loss": 0.475, + "step": 56900 + }, + { + "epoch": 0.32, + "learning_rate": 4.679719163336088e-05, + "loss": 0.4722, + "step": 57000 + }, + { + "epoch": 0.32, + "learning_rate": 4.6791570403098425e-05, + "loss": 0.4806, + "step": 57100 + }, + { + "epoch": 0.32, + "learning_rate": 4.678594917283597e-05, + "loss": 0.4799, + "step": 57200 + }, + { + "epoch": 0.32, + "learning_rate": 4.678032794257352e-05, + "loss": 0.4839, + "step": 57300 + }, + { + "epoch": 0.32, + "learning_rate": 4.677470671231106e-05, + "loss": 0.477, + "step": 57400 + }, + { + "epoch": 0.32, + "learning_rate": 4.67690854820486e-05, + "loss": 0.4852, + "step": 57500 + }, + { + "epoch": 0.32, + "learning_rate": 4.676346425178615e-05, + "loss": 0.4754, + "step": 57600 + }, + { + "epoch": 0.32, + "learning_rate": 4.6757843021523696e-05, + "loss": 0.4777, + "step": 57700 + }, + { + "epoch": 0.32, + "learning_rate": 4.6752221791261235e-05, + "loss": 0.4838, + "step": 57800 + }, + { + "epoch": 0.33, + "learning_rate": 4.674660056099878e-05, + "loss": 0.4802, + "step": 57900 + }, + { + "epoch": 0.33, + "learning_rate": 4.674097933073633e-05, + "loss": 0.4778, + "step": 58000 + }, + { + "epoch": 0.33, + "learning_rate": 4.673535810047387e-05, + "loss": 0.4722, + "step": 58100 + }, + { + "epoch": 0.33, + "learning_rate": 4.6729736870211414e-05, + "loss": 0.467, + "step": 58200 + }, + { + "epoch": 0.33, + "learning_rate": 4.672411563994896e-05, + "loss": 0.4714, + "step": 58300 + }, + { + "epoch": 0.33, + "learning_rate": 4.6718494409686506e-05, + "loss": 0.4773, + "step": 58400 + }, + { + "epoch": 0.33, + "learning_rate": 4.671287317942405e-05, + "loss": 0.4726, + "step": 58500 + }, + { + "epoch": 0.33, + "learning_rate": 4.67072519491616e-05, + "loss": 0.4723, + "step": 58600 + }, + { + "epoch": 0.33, + "learning_rate": 4.6701630718899145e-05, + "loss": 0.4675, + "step": 58700 + }, + { + "epoch": 0.33, + "learning_rate": 4.6696009488636685e-05, + "loss": 0.463, + "step": 58800 + }, + { + "epoch": 0.33, + "learning_rate": 4.669038825837423e-05, + "loss": 0.4659, + "step": 58900 + }, + { + "epoch": 0.33, + "learning_rate": 4.668476702811178e-05, + "loss": 0.4716, + "step": 59000 + }, + { + "epoch": 0.33, + "learning_rate": 4.667914579784932e-05, + "loss": 0.478, + "step": 59100 + }, + { + "epoch": 0.33, + "learning_rate": 4.667352456758686e-05, + "loss": 0.4893, + "step": 59200 + }, + { + "epoch": 0.33, + "learning_rate": 4.666790333732441e-05, + "loss": 0.4669, + "step": 59300 + }, + { + "epoch": 0.33, + "learning_rate": 4.6662282107061955e-05, + "loss": 0.4741, + "step": 59400 + }, + { + "epoch": 0.33, + "learning_rate": 4.6656660876799495e-05, + "loss": 0.4697, + "step": 59500 + }, + { + "epoch": 0.34, + "learning_rate": 4.665103964653704e-05, + "loss": 0.4627, + "step": 59600 + }, + { + "epoch": 0.34, + "learning_rate": 4.664541841627459e-05, + "loss": 0.4728, + "step": 59700 + }, + { + "epoch": 0.34, + "learning_rate": 4.6639797186012134e-05, + "loss": 0.462, + "step": 59800 + }, + { + "epoch": 0.34, + "learning_rate": 4.663417595574967e-05, + "loss": 0.4705, + "step": 59900 + }, + { + "epoch": 0.34, + "learning_rate": 4.662855472548722e-05, + "loss": 0.4764, + "step": 60000 + }, + { + "epoch": 0.34, + "learning_rate": 4.6622933495224766e-05, + "loss": 0.4716, + "step": 60100 + }, + { + "epoch": 0.34, + "learning_rate": 4.661731226496231e-05, + "loss": 0.4594, + "step": 60200 + }, + { + "epoch": 0.34, + "learning_rate": 4.661169103469986e-05, + "loss": 0.4697, + "step": 60300 + }, + { + "epoch": 0.34, + "learning_rate": 4.6606069804437405e-05, + "loss": 0.4805, + "step": 60400 + }, + { + "epoch": 0.34, + "learning_rate": 4.660044857417495e-05, + "loss": 0.4526, + "step": 60500 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482734391249e-05, + "loss": 0.4647, + "step": 60600 + }, + { + "epoch": 0.34, + "learning_rate": 4.658920611365004e-05, + "loss": 0.4632, + "step": 60700 + }, + { + "epoch": 0.34, + "learning_rate": 4.658358488338758e-05, + "loss": 0.4716, + "step": 60800 + }, + { + "epoch": 0.34, + "learning_rate": 4.657796365312512e-05, + "loss": 0.4562, + "step": 60900 + }, + { + "epoch": 0.34, + "learning_rate": 4.657234242286267e-05, + "loss": 0.4647, + "step": 61000 + }, + { + "epoch": 0.34, + "learning_rate": 4.6566721192600215e-05, + "loss": 0.458, + "step": 61100 + }, + { + "epoch": 0.34, + "learning_rate": 4.656109996233776e-05, + "loss": 0.4512, + "step": 61200 + }, + { + "epoch": 0.34, + "learning_rate": 4.65554787320753e-05, + "loss": 0.4568, + "step": 61300 + }, + { + "epoch": 0.35, + "learning_rate": 4.654985750181285e-05, + "loss": 0.473, + "step": 61400 + }, + { + "epoch": 0.35, + "learning_rate": 4.654423627155039e-05, + "loss": 0.4656, + "step": 61500 + }, + { + "epoch": 0.35, + "learning_rate": 4.653861504128794e-05, + "loss": 0.4524, + "step": 61600 + }, + { + "epoch": 0.35, + "learning_rate": 4.6532993811025486e-05, + "loss": 0.4547, + "step": 61700 + }, + { + "epoch": 0.35, + "learning_rate": 4.652737258076303e-05, + "loss": 0.4603, + "step": 61800 + }, + { + "epoch": 0.35, + "learning_rate": 4.652175135050058e-05, + "loss": 0.4546, + "step": 61900 + }, + { + "epoch": 0.35, + "learning_rate": 4.651613012023812e-05, + "loss": 0.4588, + "step": 62000 + }, + { + "epoch": 0.35, + "learning_rate": 4.6510508889975664e-05, + "loss": 0.4516, + "step": 62100 + }, + { + "epoch": 0.35, + "learning_rate": 4.650488765971321e-05, + "loss": 0.4684, + "step": 62200 + }, + { + "epoch": 0.35, + "learning_rate": 4.649926642945075e-05, + "loss": 0.4686, + "step": 62300 + }, + { + "epoch": 0.35, + "learning_rate": 4.649370141149092e-05, + "loss": 0.4605, + "step": 62400 + }, + { + "epoch": 0.35, + "learning_rate": 4.6488080181228464e-05, + "loss": 0.4632, + "step": 62500 + }, + { + "epoch": 0.35, + "learning_rate": 4.648251516326864e-05, + "loss": 0.4625, + "step": 62600 + }, + { + "epoch": 0.35, + "learning_rate": 4.6476893933006185e-05, + "loss": 0.4693, + "step": 62700 + }, + { + "epoch": 0.35, + "learning_rate": 4.6471272702743724e-05, + "loss": 0.466, + "step": 62800 + }, + { + "epoch": 0.35, + "learning_rate": 4.646565147248127e-05, + "loss": 0.4563, + "step": 62900 + }, + { + "epoch": 0.35, + "learning_rate": 4.646003024221882e-05, + "loss": 0.4606, + "step": 63000 + }, + { + "epoch": 0.35, + "learning_rate": 4.6454409011956356e-05, + "loss": 0.4536, + "step": 63100 + }, + { + "epoch": 0.36, + "learning_rate": 4.64487877816939e-05, + "loss": 0.4657, + "step": 63200 + }, + { + "epoch": 0.36, + "learning_rate": 4.644316655143145e-05, + "loss": 0.4547, + "step": 63300 + }, + { + "epoch": 0.36, + "learning_rate": 4.6437545321168995e-05, + "loss": 0.4559, + "step": 63400 + }, + { + "epoch": 0.36, + "learning_rate": 4.6431924090906534e-05, + "loss": 0.4501, + "step": 63500 + }, + { + "epoch": 0.36, + "learning_rate": 4.642630286064408e-05, + "loss": 0.4473, + "step": 63600 + }, + { + "epoch": 0.36, + "learning_rate": 4.642068163038163e-05, + "loss": 0.4531, + "step": 63700 + }, + { + "epoch": 0.36, + "learning_rate": 4.641506040011917e-05, + "loss": 0.4516, + "step": 63800 + }, + { + "epoch": 0.36, + "learning_rate": 4.640943916985672e-05, + "loss": 0.4521, + "step": 63900 + }, + { + "epoch": 0.36, + "learning_rate": 4.6403817939594266e-05, + "loss": 0.4437, + "step": 64000 + }, + { + "epoch": 0.36, + "learning_rate": 4.639819670933181e-05, + "loss": 0.4515, + "step": 64100 + }, + { + "epoch": 0.36, + "learning_rate": 4.639257547906935e-05, + "loss": 0.4556, + "step": 64200 + }, + { + "epoch": 0.36, + "learning_rate": 4.63869542488069e-05, + "loss": 0.4506, + "step": 64300 + }, + { + "epoch": 0.36, + "learning_rate": 4.6381333018544444e-05, + "loss": 0.4499, + "step": 64400 + }, + { + "epoch": 0.36, + "learning_rate": 4.6375711788281984e-05, + "loss": 0.4517, + "step": 64500 + }, + { + "epoch": 0.36, + "learning_rate": 4.637009055801953e-05, + "loss": 0.4442, + "step": 64600 + }, + { + "epoch": 0.36, + "learning_rate": 4.6364469327757076e-05, + "loss": 0.4413, + "step": 64700 + }, + { + "epoch": 0.36, + "learning_rate": 4.635884809749462e-05, + "loss": 0.4487, + "step": 64800 + }, + { + "epoch": 0.36, + "learning_rate": 4.635322686723216e-05, + "loss": 0.4499, + "step": 64900 + }, + { + "epoch": 0.37, + "learning_rate": 4.634760563696971e-05, + "loss": 0.451, + "step": 65000 + }, + { + "epoch": 0.37, + "learning_rate": 4.6341984406707255e-05, + "loss": 0.4427, + "step": 65100 + }, + { + "epoch": 0.37, + "learning_rate": 4.6336363176444794e-05, + "loss": 0.4438, + "step": 65200 + }, + { + "epoch": 0.37, + "learning_rate": 4.633074194618234e-05, + "loss": 0.445, + "step": 65300 + }, + { + "epoch": 0.37, + "learning_rate": 4.632512071591989e-05, + "loss": 0.4445, + "step": 65400 + }, + { + "epoch": 0.37, + "learning_rate": 4.631949948565743e-05, + "loss": 0.4469, + "step": 65500 + }, + { + "epoch": 0.37, + "learning_rate": 4.631387825539498e-05, + "loss": 0.4627, + "step": 65600 + }, + { + "epoch": 0.37, + "learning_rate": 4.6308257025132525e-05, + "loss": 0.4422, + "step": 65700 + }, + { + "epoch": 0.37, + "learning_rate": 4.630263579487007e-05, + "loss": 0.4461, + "step": 65800 + }, + { + "epoch": 0.37, + "learning_rate": 4.629701456460761e-05, + "loss": 0.4455, + "step": 65900 + }, + { + "epoch": 0.37, + "learning_rate": 4.629144954664778e-05, + "loss": 0.4481, + "step": 66000 + }, + { + "epoch": 0.37, + "learning_rate": 4.6285828316385325e-05, + "loss": 0.4432, + "step": 66100 + }, + { + "epoch": 0.37, + "learning_rate": 4.62802632984255e-05, + "loss": 0.455, + "step": 66200 + }, + { + "epoch": 0.37, + "learning_rate": 4.627464206816304e-05, + "loss": 0.4409, + "step": 66300 + }, + { + "epoch": 0.37, + "learning_rate": 4.6269020837900585e-05, + "loss": 0.4439, + "step": 66400 + }, + { + "epoch": 0.37, + "learning_rate": 4.626339960763813e-05, + "loss": 0.4386, + "step": 66500 + }, + { + "epoch": 0.37, + "learning_rate": 4.625777837737568e-05, + "loss": 0.4552, + "step": 66600 + }, + { + "epoch": 0.37, + "learning_rate": 4.625215714711322e-05, + "loss": 0.4465, + "step": 66700 + }, + { + "epoch": 0.38, + "learning_rate": 4.6246535916850764e-05, + "loss": 0.4331, + "step": 66800 + }, + { + "epoch": 0.38, + "learning_rate": 4.624091468658831e-05, + "loss": 0.4369, + "step": 66900 + }, + { + "epoch": 0.38, + "learning_rate": 4.623529345632585e-05, + "loss": 0.4469, + "step": 67000 + }, + { + "epoch": 0.38, + "learning_rate": 4.6229672226063396e-05, + "loss": 0.4464, + "step": 67100 + }, + { + "epoch": 0.38, + "learning_rate": 4.622405099580094e-05, + "loss": 0.4334, + "step": 67200 + }, + { + "epoch": 0.38, + "learning_rate": 4.621842976553849e-05, + "loss": 0.4467, + "step": 67300 + }, + { + "epoch": 0.38, + "learning_rate": 4.621280853527603e-05, + "loss": 0.4427, + "step": 67400 + }, + { + "epoch": 0.38, + "learning_rate": 4.6207187305013574e-05, + "loss": 0.4348, + "step": 67500 + }, + { + "epoch": 0.38, + "learning_rate": 4.620156607475112e-05, + "loss": 0.4385, + "step": 67600 + }, + { + "epoch": 0.38, + "learning_rate": 4.619594484448867e-05, + "loss": 0.4439, + "step": 67700 + }, + { + "epoch": 0.38, + "learning_rate": 4.619032361422621e-05, + "loss": 0.4281, + "step": 67800 + }, + { + "epoch": 0.38, + "learning_rate": 4.618470238396376e-05, + "loss": 0.445, + "step": 67900 + }, + { + "epoch": 0.38, + "learning_rate": 4.6179081153701305e-05, + "loss": 0.4379, + "step": 68000 + }, + { + "epoch": 0.38, + "learning_rate": 4.6173459923438845e-05, + "loss": 0.4353, + "step": 68100 + }, + { + "epoch": 0.38, + "learning_rate": 4.616783869317639e-05, + "loss": 0.434, + "step": 68200 + }, + { + "epoch": 0.38, + "learning_rate": 4.616221746291394e-05, + "loss": 0.4357, + "step": 68300 + }, + { + "epoch": 0.38, + "learning_rate": 4.615659623265148e-05, + "loss": 0.438, + "step": 68400 + }, + { + "epoch": 0.39, + "learning_rate": 4.615097500238902e-05, + "loss": 0.434, + "step": 68500 + }, + { + "epoch": 0.39, + "learning_rate": 4.614535377212657e-05, + "loss": 0.4348, + "step": 68600 + }, + { + "epoch": 0.39, + "learning_rate": 4.6139732541864116e-05, + "loss": 0.4327, + "step": 68700 + }, + { + "epoch": 0.39, + "learning_rate": 4.6134111311601655e-05, + "loss": 0.4405, + "step": 68800 + }, + { + "epoch": 0.39, + "learning_rate": 4.61284900813392e-05, + "loss": 0.433, + "step": 68900 + }, + { + "epoch": 0.39, + "learning_rate": 4.612286885107675e-05, + "loss": 0.4385, + "step": 69000 + }, + { + "epoch": 0.39, + "learning_rate": 4.6117247620814294e-05, + "loss": 0.4342, + "step": 69100 + }, + { + "epoch": 0.39, + "learning_rate": 4.611162639055184e-05, + "loss": 0.4351, + "step": 69200 + }, + { + "epoch": 0.39, + "learning_rate": 4.610600516028939e-05, + "loss": 0.4251, + "step": 69300 + }, + { + "epoch": 0.39, + "learning_rate": 4.610038393002693e-05, + "loss": 0.4417, + "step": 69400 + }, + { + "epoch": 0.39, + "learning_rate": 4.609476269976447e-05, + "loss": 0.4244, + "step": 69500 + }, + { + "epoch": 0.39, + "learning_rate": 4.608914146950202e-05, + "loss": 0.4421, + "step": 69600 + }, + { + "epoch": 0.39, + "learning_rate": 4.6083520239239565e-05, + "loss": 0.4315, + "step": 69700 + }, + { + "epoch": 0.39, + "learning_rate": 4.6077899008977105e-05, + "loss": 0.4381, + "step": 69800 + }, + { + "epoch": 0.39, + "learning_rate": 4.607227777871465e-05, + "loss": 0.4286, + "step": 69900 + }, + { + "epoch": 0.39, + "learning_rate": 4.60666565484522e-05, + "loss": 0.4386, + "step": 70000 + }, + { + "epoch": 0.39, + "learning_rate": 4.6061035318189743e-05, + "loss": 0.4371, + "step": 70100 + }, + { + "epoch": 0.39, + "learning_rate": 4.605541408792728e-05, + "loss": 0.4308, + "step": 70200 + }, + { + "epoch": 0.4, + "learning_rate": 4.604979285766483e-05, + "loss": 0.4405, + "step": 70300 + }, + { + "epoch": 0.4, + "learning_rate": 4.6044171627402375e-05, + "loss": 0.4317, + "step": 70400 + }, + { + "epoch": 0.4, + "learning_rate": 4.6038550397139915e-05, + "loss": 0.4277, + "step": 70500 + }, + { + "epoch": 0.4, + "learning_rate": 4.603292916687746e-05, + "loss": 0.4246, + "step": 70600 + }, + { + "epoch": 0.4, + "learning_rate": 4.602730793661501e-05, + "loss": 0.4378, + "step": 70700 + }, + { + "epoch": 0.4, + "learning_rate": 4.6021686706352554e-05, + "loss": 0.4236, + "step": 70800 + }, + { + "epoch": 0.4, + "learning_rate": 4.60160654760901e-05, + "loss": 0.4345, + "step": 70900 + }, + { + "epoch": 0.4, + "learning_rate": 4.6010444245827646e-05, + "loss": 0.4259, + "step": 71000 + }, + { + "epoch": 0.4, + "learning_rate": 4.600482301556519e-05, + "loss": 0.4212, + "step": 71100 + }, + { + "epoch": 0.4, + "learning_rate": 4.599920178530273e-05, + "loss": 0.4217, + "step": 71200 + }, + { + "epoch": 0.4, + "learning_rate": 4.59936367673429e-05, + "loss": 0.4275, + "step": 71300 + }, + { + "epoch": 0.4, + "learning_rate": 4.5988015537080446e-05, + "loss": 0.4295, + "step": 71400 + }, + { + "epoch": 0.4, + "learning_rate": 4.598239430681799e-05, + "loss": 0.438, + "step": 71500 + }, + { + "epoch": 0.4, + "learning_rate": 4.597677307655553e-05, + "loss": 0.4344, + "step": 71600 + }, + { + "epoch": 0.4, + "learning_rate": 4.597115184629308e-05, + "loss": 0.4427, + "step": 71700 + }, + { + "epoch": 0.4, + "learning_rate": 4.5965530616030624e-05, + "loss": 0.4231, + "step": 71800 + }, + { + "epoch": 0.4, + "learning_rate": 4.595990938576817e-05, + "loss": 0.4262, + "step": 71900 + }, + { + "epoch": 0.4, + "learning_rate": 4.595428815550572e-05, + "loss": 0.4209, + "step": 72000 + }, + { + "epoch": 0.41, + "learning_rate": 4.594866692524326e-05, + "loss": 0.418, + "step": 72100 + }, + { + "epoch": 0.41, + "learning_rate": 4.594304569498081e-05, + "loss": 0.4331, + "step": 72200 + }, + { + "epoch": 0.41, + "learning_rate": 4.593742446471835e-05, + "loss": 0.4315, + "step": 72300 + }, + { + "epoch": 0.41, + "learning_rate": 4.5931803234455895e-05, + "loss": 0.4144, + "step": 72400 + }, + { + "epoch": 0.41, + "learning_rate": 4.592618200419344e-05, + "loss": 0.4295, + "step": 72500 + }, + { + "epoch": 0.41, + "learning_rate": 4.592056077393099e-05, + "loss": 0.4257, + "step": 72600 + }, + { + "epoch": 0.41, + "learning_rate": 4.591493954366853e-05, + "loss": 0.4214, + "step": 72700 + }, + { + "epoch": 0.41, + "learning_rate": 4.5909318313406074e-05, + "loss": 0.4137, + "step": 72800 + }, + { + "epoch": 0.41, + "learning_rate": 4.590369708314362e-05, + "loss": 0.4205, + "step": 72900 + }, + { + "epoch": 0.41, + "learning_rate": 4.589807585288116e-05, + "loss": 0.4298, + "step": 73000 + }, + { + "epoch": 0.41, + "learning_rate": 4.5892454622618706e-05, + "loss": 0.4227, + "step": 73100 + }, + { + "epoch": 0.41, + "learning_rate": 4.588683339235625e-05, + "loss": 0.4216, + "step": 73200 + }, + { + "epoch": 0.41, + "learning_rate": 4.58812121620938e-05, + "loss": 0.4173, + "step": 73300 + }, + { + "epoch": 0.41, + "learning_rate": 4.5875590931831344e-05, + "loss": 0.4108, + "step": 73400 + }, + { + "epoch": 0.41, + "learning_rate": 4.586996970156889e-05, + "loss": 0.4276, + "step": 73500 + }, + { + "epoch": 0.41, + "learning_rate": 4.586434847130644e-05, + "loss": 0.4279, + "step": 73600 + }, + { + "epoch": 0.41, + "learning_rate": 4.5858727241043977e-05, + "loss": 0.4337, + "step": 73700 + }, + { + "epoch": 0.41, + "learning_rate": 4.585310601078152e-05, + "loss": 0.4205, + "step": 73800 + }, + { + "epoch": 0.42, + "learning_rate": 4.584748478051907e-05, + "loss": 0.4231, + "step": 73900 + }, + { + "epoch": 0.42, + "learning_rate": 4.5841863550256615e-05, + "loss": 0.4204, + "step": 74000 + }, + { + "epoch": 0.42, + "learning_rate": 4.5836242319994155e-05, + "loss": 0.4261, + "step": 74100 + }, + { + "epoch": 0.42, + "learning_rate": 4.58306210897317e-05, + "loss": 0.4137, + "step": 74200 + }, + { + "epoch": 0.42, + "learning_rate": 4.582499985946925e-05, + "loss": 0.4252, + "step": 74300 + }, + { + "epoch": 0.42, + "learning_rate": 4.581937862920679e-05, + "loss": 0.4241, + "step": 74400 + }, + { + "epoch": 0.42, + "learning_rate": 4.581375739894433e-05, + "loss": 0.4216, + "step": 74500 + }, + { + "epoch": 0.42, + "learning_rate": 4.580813616868188e-05, + "loss": 0.4132, + "step": 74600 + }, + { + "epoch": 0.42, + "learning_rate": 4.5802514938419426e-05, + "loss": 0.4148, + "step": 74700 + }, + { + "epoch": 0.42, + "learning_rate": 4.5796893708156965e-05, + "loss": 0.4217, + "step": 74800 + }, + { + "epoch": 0.42, + "learning_rate": 4.579127247789451e-05, + "loss": 0.4199, + "step": 74900 + }, + { + "epoch": 0.42, + "learning_rate": 4.578565124763206e-05, + "loss": 0.4204, + "step": 75000 + }, + { + "epoch": 0.42, + "learning_rate": 4.5780030017369604e-05, + "loss": 0.424, + "step": 75100 + }, + { + "epoch": 0.42, + "learning_rate": 4.577440878710715e-05, + "loss": 0.4222, + "step": 75200 + }, + { + "epoch": 0.42, + "learning_rate": 4.57687875568447e-05, + "loss": 0.4139, + "step": 75300 + }, + { + "epoch": 0.42, + "learning_rate": 4.5763166326582236e-05, + "loss": 0.4214, + "step": 75400 + }, + { + "epoch": 0.42, + "learning_rate": 4.575754509631978e-05, + "loss": 0.418, + "step": 75500 + }, + { + "epoch": 0.42, + "learning_rate": 4.575192386605733e-05, + "loss": 0.4099, + "step": 75600 + }, + { + "epoch": 0.43, + "learning_rate": 4.5746302635794875e-05, + "loss": 0.4275, + "step": 75700 + }, + { + "epoch": 0.43, + "learning_rate": 4.5740681405532414e-05, + "loss": 0.4121, + "step": 75800 + }, + { + "epoch": 0.43, + "learning_rate": 4.573506017526996e-05, + "loss": 0.4234, + "step": 75900 + }, + { + "epoch": 0.43, + "learning_rate": 4.572943894500751e-05, + "loss": 0.4195, + "step": 76000 + }, + { + "epoch": 0.43, + "learning_rate": 4.5723817714745047e-05, + "loss": 0.4145, + "step": 76100 + }, + { + "epoch": 0.43, + "learning_rate": 4.571819648448259e-05, + "loss": 0.4125, + "step": 76200 + }, + { + "epoch": 0.43, + "learning_rate": 4.571257525422014e-05, + "loss": 0.4148, + "step": 76300 + }, + { + "epoch": 0.43, + "learning_rate": 4.5706954023957685e-05, + "loss": 0.408, + "step": 76400 + }, + { + "epoch": 0.43, + "learning_rate": 4.570138900599785e-05, + "loss": 0.412, + "step": 76500 + }, + { + "epoch": 0.43, + "learning_rate": 4.56957677757354e-05, + "loss": 0.4158, + "step": 76600 + }, + { + "epoch": 0.43, + "learning_rate": 4.5690146545472946e-05, + "loss": 0.4132, + "step": 76700 + }, + { + "epoch": 0.43, + "learning_rate": 4.568452531521049e-05, + "loss": 0.4073, + "step": 76800 + }, + { + "epoch": 0.43, + "learning_rate": 4.567890408494803e-05, + "loss": 0.4129, + "step": 76900 + }, + { + "epoch": 0.43, + "learning_rate": 4.567328285468558e-05, + "loss": 0.4103, + "step": 77000 + }, + { + "epoch": 0.43, + "learning_rate": 4.5667661624423124e-05, + "loss": 0.4244, + "step": 77100 + }, + { + "epoch": 0.43, + "learning_rate": 4.566204039416066e-05, + "loss": 0.4111, + "step": 77200 + }, + { + "epoch": 0.43, + "learning_rate": 4.565641916389821e-05, + "loss": 0.4124, + "step": 77300 + }, + { + "epoch": 0.44, + "learning_rate": 4.5650797933635756e-05, + "loss": 0.4209, + "step": 77400 + }, + { + "epoch": 0.44, + "learning_rate": 4.56451767033733e-05, + "loss": 0.4105, + "step": 77500 + }, + { + "epoch": 0.44, + "learning_rate": 4.563955547311085e-05, + "loss": 0.4103, + "step": 77600 + }, + { + "epoch": 0.44, + "learning_rate": 4.5633934242848395e-05, + "loss": 0.4164, + "step": 77700 + }, + { + "epoch": 0.44, + "learning_rate": 4.562831301258594e-05, + "loss": 0.4123, + "step": 77800 + }, + { + "epoch": 0.44, + "learning_rate": 4.562269178232348e-05, + "loss": 0.411, + "step": 77900 + }, + { + "epoch": 0.44, + "learning_rate": 4.561707055206103e-05, + "loss": 0.4174, + "step": 78000 + }, + { + "epoch": 0.44, + "learning_rate": 4.561144932179857e-05, + "loss": 0.4092, + "step": 78100 + }, + { + "epoch": 0.44, + "learning_rate": 4.560582809153612e-05, + "loss": 0.4112, + "step": 78200 + }, + { + "epoch": 0.44, + "learning_rate": 4.560020686127366e-05, + "loss": 0.4089, + "step": 78300 + }, + { + "epoch": 0.44, + "learning_rate": 4.5594585631011205e-05, + "loss": 0.4066, + "step": 78400 + }, + { + "epoch": 0.44, + "learning_rate": 4.558896440074875e-05, + "loss": 0.414, + "step": 78500 + }, + { + "epoch": 0.44, + "learning_rate": 4.558334317048629e-05, + "loss": 0.4095, + "step": 78600 + }, + { + "epoch": 0.44, + "learning_rate": 4.557772194022384e-05, + "loss": 0.4193, + "step": 78700 + }, + { + "epoch": 0.44, + "learning_rate": 4.5572100709961383e-05, + "loss": 0.4097, + "step": 78800 + }, + { + "epoch": 0.44, + "learning_rate": 4.556647947969893e-05, + "loss": 0.4193, + "step": 78900 + }, + { + "epoch": 0.44, + "learning_rate": 4.556085824943647e-05, + "loss": 0.4059, + "step": 79000 + }, + { + "epoch": 0.44, + "learning_rate": 4.5555237019174016e-05, + "loss": 0.4143, + "step": 79100 + }, + { + "epoch": 0.45, + "learning_rate": 4.554961578891156e-05, + "loss": 0.4096, + "step": 79200 + }, + { + "epoch": 0.45, + "learning_rate": 4.554399455864911e-05, + "loss": 0.4046, + "step": 79300 + }, + { + "epoch": 0.45, + "learning_rate": 4.5538373328386654e-05, + "loss": 0.3962, + "step": 79400 + }, + { + "epoch": 0.45, + "learning_rate": 4.55327520981242e-05, + "loss": 0.4028, + "step": 79500 + }, + { + "epoch": 0.45, + "learning_rate": 4.552713086786175e-05, + "loss": 0.4048, + "step": 79600 + }, + { + "epoch": 0.45, + "learning_rate": 4.5521509637599286e-05, + "loss": 0.4197, + "step": 79700 + }, + { + "epoch": 0.45, + "learning_rate": 4.551588840733683e-05, + "loss": 0.3987, + "step": 79800 + }, + { + "epoch": 0.45, + "learning_rate": 4.551026717707438e-05, + "loss": 0.4029, + "step": 79900 + }, + { + "epoch": 0.45, + "learning_rate": 4.550464594681192e-05, + "loss": 0.4082, + "step": 80000 + }, + { + "epoch": 0.45, + "learning_rate": 4.5499024716549465e-05, + "loss": 0.4073, + "step": 80100 + }, + { + "epoch": 0.45, + "learning_rate": 4.549340348628701e-05, + "loss": 0.4171, + "step": 80200 + }, + { + "epoch": 0.45, + "learning_rate": 4.548778225602456e-05, + "loss": 0.4008, + "step": 80300 + }, + { + "epoch": 0.45, + "learning_rate": 4.54821610257621e-05, + "loss": 0.4024, + "step": 80400 + }, + { + "epoch": 0.45, + "learning_rate": 4.547653979549964e-05, + "loss": 0.4013, + "step": 80500 + }, + { + "epoch": 0.45, + "learning_rate": 4.547091856523719e-05, + "loss": 0.4018, + "step": 80600 + }, + { + "epoch": 0.45, + "learning_rate": 4.5465297334974736e-05, + "loss": 0.4012, + "step": 80700 + }, + { + "epoch": 0.45, + "learning_rate": 4.545967610471228e-05, + "loss": 0.404, + "step": 80800 + }, + { + "epoch": 0.45, + "learning_rate": 4.545405487444983e-05, + "loss": 0.4079, + "step": 80900 + }, + { + "epoch": 0.46, + "learning_rate": 4.5448433644187374e-05, + "loss": 0.4086, + "step": 81000 + }, + { + "epoch": 0.46, + "learning_rate": 4.5442812413924914e-05, + "loss": 0.4128, + "step": 81100 + }, + { + "epoch": 0.46, + "learning_rate": 4.543719118366246e-05, + "loss": 0.4105, + "step": 81200 + }, + { + "epoch": 0.46, + "learning_rate": 4.5431569953400007e-05, + "loss": 0.4054, + "step": 81300 + }, + { + "epoch": 0.46, + "learning_rate": 4.5425948723137546e-05, + "loss": 0.4084, + "step": 81400 + }, + { + "epoch": 0.46, + "learning_rate": 4.542032749287509e-05, + "loss": 0.402, + "step": 81500 + }, + { + "epoch": 0.46, + "learning_rate": 4.541470626261264e-05, + "loss": 0.398, + "step": 81600 + }, + { + "epoch": 0.46, + "learning_rate": 4.5409085032350185e-05, + "loss": 0.3975, + "step": 81700 + }, + { + "epoch": 0.46, + "learning_rate": 4.5403463802087724e-05, + "loss": 0.4029, + "step": 81800 + }, + { + "epoch": 0.46, + "learning_rate": 4.539784257182527e-05, + "loss": 0.3996, + "step": 81900 + }, + { + "epoch": 0.46, + "learning_rate": 4.539222134156282e-05, + "loss": 0.398, + "step": 82000 + }, + { + "epoch": 0.46, + "learning_rate": 4.5386600111300356e-05, + "loss": 0.4076, + "step": 82100 + }, + { + "epoch": 0.46, + "learning_rate": 4.53809788810379e-05, + "loss": 0.3962, + "step": 82200 + }, + { + "epoch": 0.46, + "learning_rate": 4.537535765077545e-05, + "loss": 0.4029, + "step": 82300 + }, + { + "epoch": 0.46, + "learning_rate": 4.5369736420512995e-05, + "loss": 0.409, + "step": 82400 + }, + { + "epoch": 0.46, + "learning_rate": 4.536417140255316e-05, + "loss": 0.3991, + "step": 82500 + }, + { + "epoch": 0.46, + "learning_rate": 4.535860638459333e-05, + "loss": 0.4067, + "step": 82600 + }, + { + "epoch": 0.46, + "learning_rate": 4.535298515433088e-05, + "loss": 0.3977, + "step": 82700 + }, + { + "epoch": 0.47, + "learning_rate": 4.534736392406842e-05, + "loss": 0.4063, + "step": 82800 + }, + { + "epoch": 0.47, + "learning_rate": 4.534174269380597e-05, + "loss": 0.4032, + "step": 82900 + }, + { + "epoch": 0.47, + "learning_rate": 4.5336121463543516e-05, + "loss": 0.3986, + "step": 83000 + }, + { + "epoch": 0.47, + "learning_rate": 4.533050023328106e-05, + "loss": 0.4043, + "step": 83100 + }, + { + "epoch": 0.47, + "learning_rate": 4.532487900301861e-05, + "loss": 0.4041, + "step": 83200 + }, + { + "epoch": 0.47, + "learning_rate": 4.531925777275615e-05, + "loss": 0.3911, + "step": 83300 + }, + { + "epoch": 0.47, + "learning_rate": 4.5313692754796315e-05, + "loss": 0.4016, + "step": 83400 + }, + { + "epoch": 0.47, + "learning_rate": 4.530807152453386e-05, + "loss": 0.3922, + "step": 83500 + }, + { + "epoch": 0.47, + "learning_rate": 4.530245029427141e-05, + "loss": 0.3918, + "step": 83600 + }, + { + "epoch": 0.47, + "learning_rate": 4.529682906400895e-05, + "loss": 0.4082, + "step": 83700 + }, + { + "epoch": 0.47, + "learning_rate": 4.5291207833746494e-05, + "loss": 0.3864, + "step": 83800 + }, + { + "epoch": 0.47, + "learning_rate": 4.528558660348404e-05, + "loss": 0.3952, + "step": 83900 + }, + { + "epoch": 0.47, + "learning_rate": 4.5279965373221586e-05, + "loss": 0.4032, + "step": 84000 + }, + { + "epoch": 0.47, + "learning_rate": 4.527434414295913e-05, + "loss": 0.4039, + "step": 84100 + }, + { + "epoch": 0.47, + "learning_rate": 4.526872291269668e-05, + "loss": 0.397, + "step": 84200 + }, + { + "epoch": 0.47, + "learning_rate": 4.5263101682434225e-05, + "loss": 0.3981, + "step": 84300 + }, + { + "epoch": 0.47, + "learning_rate": 4.5257480452171764e-05, + "loss": 0.3955, + "step": 84400 + }, + { + "epoch": 0.47, + "learning_rate": 4.525185922190931e-05, + "loss": 0.3864, + "step": 84500 + }, + { + "epoch": 0.48, + "learning_rate": 4.524623799164686e-05, + "loss": 0.4063, + "step": 84600 + }, + { + "epoch": 0.48, + "learning_rate": 4.5240616761384397e-05, + "loss": 0.4034, + "step": 84700 + }, + { + "epoch": 0.48, + "learning_rate": 4.523499553112194e-05, + "loss": 0.3911, + "step": 84800 + }, + { + "epoch": 0.48, + "learning_rate": 4.522937430085949e-05, + "loss": 0.396, + "step": 84900 + }, + { + "epoch": 0.48, + "learning_rate": 4.522375307059703e-05, + "loss": 0.4057, + "step": 85000 + }, + { + "epoch": 0.48, + "learning_rate": 4.5218131840334575e-05, + "loss": 0.4053, + "step": 85100 + }, + { + "epoch": 0.48, + "learning_rate": 4.521251061007212e-05, + "loss": 0.396, + "step": 85200 + }, + { + "epoch": 0.48, + "learning_rate": 4.520688937980967e-05, + "loss": 0.394, + "step": 85300 + }, + { + "epoch": 0.48, + "learning_rate": 4.520126814954721e-05, + "loss": 0.4032, + "step": 85400 + }, + { + "epoch": 0.48, + "learning_rate": 4.519564691928475e-05, + "loss": 0.3956, + "step": 85500 + }, + { + "epoch": 0.48, + "learning_rate": 4.519008190132493e-05, + "loss": 0.3831, + "step": 85600 + }, + { + "epoch": 0.48, + "learning_rate": 4.5184460671062474e-05, + "loss": 0.3957, + "step": 85700 + }, + { + "epoch": 0.48, + "learning_rate": 4.5178839440800013e-05, + "loss": 0.3915, + "step": 85800 + }, + { + "epoch": 0.48, + "learning_rate": 4.517321821053756e-05, + "loss": 0.3864, + "step": 85900 + }, + { + "epoch": 0.48, + "learning_rate": 4.5167596980275106e-05, + "loss": 0.4007, + "step": 86000 + }, + { + "epoch": 0.48, + "learning_rate": 4.5161975750012645e-05, + "loss": 0.4016, + "step": 86100 + }, + { + "epoch": 0.48, + "learning_rate": 4.515635451975019e-05, + "loss": 0.3911, + "step": 86200 + }, + { + "epoch": 0.49, + "learning_rate": 4.515073328948774e-05, + "loss": 0.4034, + "step": 86300 + }, + { + "epoch": 0.49, + "learning_rate": 4.5145112059225284e-05, + "loss": 0.3978, + "step": 86400 + }, + { + "epoch": 0.49, + "learning_rate": 4.5139490828962824e-05, + "loss": 0.3952, + "step": 86500 + }, + { + "epoch": 0.49, + "learning_rate": 4.513386959870038e-05, + "loss": 0.3855, + "step": 86600 + }, + { + "epoch": 0.49, + "learning_rate": 4.512824836843792e-05, + "loss": 0.3987, + "step": 86700 + }, + { + "epoch": 0.49, + "learning_rate": 4.512262713817546e-05, + "loss": 0.3935, + "step": 86800 + }, + { + "epoch": 0.49, + "learning_rate": 4.511700590791301e-05, + "loss": 0.3926, + "step": 86900 + }, + { + "epoch": 0.49, + "learning_rate": 4.5111384677650555e-05, + "loss": 0.3953, + "step": 87000 + }, + { + "epoch": 0.49, + "learning_rate": 4.51057634473881e-05, + "loss": 0.4058, + "step": 87100 + }, + { + "epoch": 0.49, + "learning_rate": 4.510014221712564e-05, + "loss": 0.3933, + "step": 87200 + }, + { + "epoch": 0.49, + "learning_rate": 4.509452098686319e-05, + "loss": 0.3961, + "step": 87300 + }, + { + "epoch": 0.49, + "learning_rate": 4.5088899756600733e-05, + "loss": 0.3976, + "step": 87400 + }, + { + "epoch": 0.49, + "learning_rate": 4.508327852633827e-05, + "loss": 0.3842, + "step": 87500 + }, + { + "epoch": 0.49, + "learning_rate": 4.507765729607582e-05, + "loss": 0.3939, + "step": 87600 + }, + { + "epoch": 0.49, + "learning_rate": 4.5072036065813366e-05, + "loss": 0.4024, + "step": 87700 + }, + { + "epoch": 0.49, + "learning_rate": 4.506641483555091e-05, + "loss": 0.3787, + "step": 87800 + }, + { + "epoch": 0.49, + "learning_rate": 4.506084981759108e-05, + "loss": 0.3899, + "step": 87900 + }, + { + "epoch": 0.49, + "learning_rate": 4.5055228587328626e-05, + "loss": 0.3893, + "step": 88000 + }, + { + "epoch": 0.5, + "learning_rate": 4.504960735706617e-05, + "loss": 0.3995, + "step": 88100 + }, + { + "epoch": 0.5, + "learning_rate": 4.504398612680372e-05, + "loss": 0.3848, + "step": 88200 + }, + { + "epoch": 0.5, + "learning_rate": 4.503836489654126e-05, + "loss": 0.3919, + "step": 88300 + }, + { + "epoch": 0.5, + "learning_rate": 4.5032743666278804e-05, + "loss": 0.3883, + "step": 88400 + }, + { + "epoch": 0.5, + "learning_rate": 4.502712243601635e-05, + "loss": 0.3879, + "step": 88500 + }, + { + "epoch": 0.5, + "learning_rate": 4.502150120575389e-05, + "loss": 0.4023, + "step": 88600 + }, + { + "epoch": 0.5, + "learning_rate": 4.5015879975491436e-05, + "loss": 0.3925, + "step": 88700 + }, + { + "epoch": 0.5, + "learning_rate": 4.501025874522898e-05, + "loss": 0.3939, + "step": 88800 + }, + { + "epoch": 0.5, + "learning_rate": 4.500463751496653e-05, + "loss": 0.3865, + "step": 88900 + }, + { + "epoch": 0.5, + "learning_rate": 4.499901628470407e-05, + "loss": 0.3903, + "step": 89000 + }, + { + "epoch": 0.5, + "learning_rate": 4.4993395054441614e-05, + "loss": 0.3945, + "step": 89100 + }, + { + "epoch": 0.5, + "learning_rate": 4.498783003648179e-05, + "loss": 0.3892, + "step": 89200 + }, + { + "epoch": 0.5, + "learning_rate": 4.4982208806219335e-05, + "loss": 0.3886, + "step": 89300 + }, + { + "epoch": 0.5, + "learning_rate": 4.4976587575956875e-05, + "loss": 0.3819, + "step": 89400 + }, + { + "epoch": 0.5, + "learning_rate": 4.497096634569442e-05, + "loss": 0.3897, + "step": 89500 + }, + { + "epoch": 0.5, + "learning_rate": 4.496534511543197e-05, + "loss": 0.3738, + "step": 89600 + }, + { + "epoch": 0.5, + "learning_rate": 4.495972388516951e-05, + "loss": 0.3918, + "step": 89700 + }, + { + "epoch": 0.5, + "learning_rate": 4.495410265490705e-05, + "loss": 0.3992, + "step": 89800 + }, + { + "epoch": 0.51, + "learning_rate": 4.49484814246446e-05, + "loss": 0.3863, + "step": 89900 + }, + { + "epoch": 0.51, + "learning_rate": 4.4942860194382146e-05, + "loss": 0.3942, + "step": 90000 + }, + { + "epoch": 0.51, + "learning_rate": 4.493729517642231e-05, + "loss": 0.3784, + "step": 90100 + }, + { + "epoch": 0.51, + "learning_rate": 4.493167394615986e-05, + "loss": 0.3935, + "step": 90200 + }, + { + "epoch": 0.51, + "learning_rate": 4.4926052715897406e-05, + "loss": 0.3869, + "step": 90300 + }, + { + "epoch": 0.51, + "learning_rate": 4.492043148563495e-05, + "loss": 0.3901, + "step": 90400 + }, + { + "epoch": 0.51, + "learning_rate": 4.491481025537249e-05, + "loss": 0.3868, + "step": 90500 + }, + { + "epoch": 0.51, + "learning_rate": 4.490918902511004e-05, + "loss": 0.3753, + "step": 90600 + }, + { + "epoch": 0.51, + "learning_rate": 4.4903567794847584e-05, + "loss": 0.3794, + "step": 90700 + }, + { + "epoch": 0.51, + "learning_rate": 4.4897946564585124e-05, + "loss": 0.393, + "step": 90800 + }, + { + "epoch": 0.51, + "learning_rate": 4.489232533432267e-05, + "loss": 0.3867, + "step": 90900 + }, + { + "epoch": 0.51, + "learning_rate": 4.4886704104060216e-05, + "loss": 0.3908, + "step": 91000 + }, + { + "epoch": 0.51, + "learning_rate": 4.488108287379776e-05, + "loss": 0.3859, + "step": 91100 + }, + { + "epoch": 0.51, + "learning_rate": 4.48754616435353e-05, + "loss": 0.3838, + "step": 91200 + }, + { + "epoch": 0.51, + "learning_rate": 4.486984041327285e-05, + "loss": 0.3861, + "step": 91300 + }, + { + "epoch": 0.51, + "learning_rate": 4.4864219183010394e-05, + "loss": 0.4011, + "step": 91400 + }, + { + "epoch": 0.51, + "learning_rate": 4.485859795274794e-05, + "loss": 0.3825, + "step": 91500 + }, + { + "epoch": 0.51, + "learning_rate": 4.485297672248549e-05, + "loss": 0.3919, + "step": 91600 + }, + { + "epoch": 0.52, + "learning_rate": 4.484735549222303e-05, + "loss": 0.3752, + "step": 91700 + }, + { + "epoch": 0.52, + "learning_rate": 4.484173426196058e-05, + "loss": 0.3754, + "step": 91800 + }, + { + "epoch": 0.52, + "learning_rate": 4.483611303169812e-05, + "loss": 0.3903, + "step": 91900 + }, + { + "epoch": 0.52, + "learning_rate": 4.4830491801435665e-05, + "loss": 0.3906, + "step": 92000 + }, + { + "epoch": 0.52, + "learning_rate": 4.482487057117321e-05, + "loss": 0.3713, + "step": 92100 + }, + { + "epoch": 0.52, + "learning_rate": 4.481924934091075e-05, + "loss": 0.3835, + "step": 92200 + }, + { + "epoch": 0.52, + "learning_rate": 4.48136281106483e-05, + "loss": 0.3925, + "step": 92300 + }, + { + "epoch": 0.52, + "learning_rate": 4.4808006880385844e-05, + "loss": 0.3793, + "step": 92400 + }, + { + "epoch": 0.52, + "learning_rate": 4.480238565012339e-05, + "loss": 0.3809, + "step": 92500 + }, + { + "epoch": 0.52, + "learning_rate": 4.479676441986093e-05, + "loss": 0.3763, + "step": 92600 + }, + { + "epoch": 0.52, + "learning_rate": 4.4791143189598476e-05, + "loss": 0.3846, + "step": 92700 + }, + { + "epoch": 0.52, + "learning_rate": 4.478552195933602e-05, + "loss": 0.3815, + "step": 92800 + }, + { + "epoch": 0.52, + "learning_rate": 4.477990072907357e-05, + "loss": 0.3811, + "step": 92900 + }, + { + "epoch": 0.52, + "learning_rate": 4.4774279498811115e-05, + "loss": 0.3862, + "step": 93000 + }, + { + "epoch": 0.52, + "learning_rate": 4.476865826854866e-05, + "loss": 0.3752, + "step": 93100 + }, + { + "epoch": 0.52, + "learning_rate": 4.476303703828621e-05, + "loss": 0.376, + "step": 93200 + }, + { + "epoch": 0.52, + "learning_rate": 4.4757415808023747e-05, + "loss": 0.38, + "step": 93300 + }, + { + "epoch": 0.53, + "learning_rate": 4.475179457776129e-05, + "loss": 0.3714, + "step": 93400 + }, + { + "epoch": 0.53, + "learning_rate": 4.474617334749884e-05, + "loss": 0.374, + "step": 93500 + }, + { + "epoch": 0.53, + "learning_rate": 4.474055211723638e-05, + "loss": 0.3832, + "step": 93600 + }, + { + "epoch": 0.53, + "learning_rate": 4.4734930886973925e-05, + "loss": 0.373, + "step": 93700 + }, + { + "epoch": 0.53, + "learning_rate": 4.472930965671147e-05, + "loss": 0.3801, + "step": 93800 + }, + { + "epoch": 0.53, + "learning_rate": 4.472368842644902e-05, + "loss": 0.383, + "step": 93900 + }, + { + "epoch": 0.53, + "learning_rate": 4.471806719618656e-05, + "loss": 0.3805, + "step": 94000 + }, + { + "epoch": 0.53, + "learning_rate": 4.47124459659241e-05, + "loss": 0.3795, + "step": 94100 + }, + { + "epoch": 0.53, + "learning_rate": 4.470682473566165e-05, + "loss": 0.3773, + "step": 94200 + }, + { + "epoch": 0.53, + "learning_rate": 4.470120350539919e-05, + "loss": 0.3767, + "step": 94300 + }, + { + "epoch": 0.53, + "learning_rate": 4.4695582275136735e-05, + "loss": 0.3762, + "step": 94400 + }, + { + "epoch": 0.53, + "learning_rate": 4.468996104487428e-05, + "loss": 0.3764, + "step": 94500 + }, + { + "epoch": 0.53, + "learning_rate": 4.468433981461183e-05, + "loss": 0.3771, + "step": 94600 + }, + { + "epoch": 0.53, + "learning_rate": 4.4678718584349374e-05, + "loss": 0.3781, + "step": 94700 + }, + { + "epoch": 0.53, + "learning_rate": 4.467309735408692e-05, + "loss": 0.3699, + "step": 94800 + }, + { + "epoch": 0.53, + "learning_rate": 4.466747612382447e-05, + "loss": 0.3763, + "step": 94900 + }, + { + "epoch": 0.53, + "learning_rate": 4.466191110586463e-05, + "loss": 0.3853, + "step": 95000 + }, + { + "epoch": 0.53, + "learning_rate": 4.4656289875602174e-05, + "loss": 0.3851, + "step": 95100 + }, + { + "epoch": 0.54, + "learning_rate": 4.465066864533972e-05, + "loss": 0.3761, + "step": 95200 + }, + { + "epoch": 0.54, + "learning_rate": 4.4645047415077266e-05, + "loss": 0.375, + "step": 95300 + }, + { + "epoch": 0.54, + "learning_rate": 4.4639426184814806e-05, + "loss": 0.3679, + "step": 95400 + }, + { + "epoch": 0.54, + "learning_rate": 4.463380495455235e-05, + "loss": 0.3827, + "step": 95500 + }, + { + "epoch": 0.54, + "learning_rate": 4.46281837242899e-05, + "loss": 0.3676, + "step": 95600 + }, + { + "epoch": 0.54, + "learning_rate": 4.4622562494027445e-05, + "loss": 0.3824, + "step": 95700 + }, + { + "epoch": 0.54, + "learning_rate": 4.461694126376499e-05, + "loss": 0.3812, + "step": 95800 + }, + { + "epoch": 0.54, + "learning_rate": 4.461132003350254e-05, + "loss": 0.376, + "step": 95900 + }, + { + "epoch": 0.54, + "learning_rate": 4.4605698803240084e-05, + "loss": 0.3842, + "step": 96000 + }, + { + "epoch": 0.54, + "learning_rate": 4.460007757297762e-05, + "loss": 0.3765, + "step": 96100 + }, + { + "epoch": 0.54, + "learning_rate": 4.459445634271517e-05, + "loss": 0.381, + "step": 96200 + }, + { + "epoch": 0.54, + "learning_rate": 4.4588835112452716e-05, + "loss": 0.377, + "step": 96300 + }, + { + "epoch": 0.54, + "learning_rate": 4.4583213882190255e-05, + "loss": 0.3812, + "step": 96400 + }, + { + "epoch": 0.54, + "learning_rate": 4.45775926519278e-05, + "loss": 0.3758, + "step": 96500 + }, + { + "epoch": 0.54, + "learning_rate": 4.457197142166535e-05, + "loss": 0.3731, + "step": 96600 + }, + { + "epoch": 0.54, + "learning_rate": 4.4566350191402894e-05, + "loss": 0.378, + "step": 96700 + }, + { + "epoch": 0.54, + "learning_rate": 4.4560728961140433e-05, + "loss": 0.3776, + "step": 96800 + }, + { + "epoch": 0.54, + "learning_rate": 4.455510773087798e-05, + "loss": 0.3781, + "step": 96900 + }, + { + "epoch": 0.55, + "learning_rate": 4.4549486500615526e-05, + "loss": 0.379, + "step": 97000 + }, + { + "epoch": 0.55, + "learning_rate": 4.454386527035307e-05, + "loss": 0.3747, + "step": 97100 + }, + { + "epoch": 0.55, + "learning_rate": 4.453824404009062e-05, + "loss": 0.3598, + "step": 97200 + }, + { + "epoch": 0.55, + "learning_rate": 4.4532622809828165e-05, + "loss": 0.3821, + "step": 97300 + }, + { + "epoch": 0.55, + "learning_rate": 4.452705779186833e-05, + "loss": 0.3837, + "step": 97400 + }, + { + "epoch": 0.55, + "learning_rate": 4.452143656160587e-05, + "loss": 0.3714, + "step": 97500 + }, + { + "epoch": 0.55, + "learning_rate": 4.451581533134342e-05, + "loss": 0.3694, + "step": 97600 + }, + { + "epoch": 0.55, + "learning_rate": 4.4510194101080965e-05, + "loss": 0.3728, + "step": 97700 + }, + { + "epoch": 0.55, + "learning_rate": 4.450457287081851e-05, + "loss": 0.3767, + "step": 97800 + }, + { + "epoch": 0.55, + "learning_rate": 4.449895164055605e-05, + "loss": 0.3666, + "step": 97900 + }, + { + "epoch": 0.55, + "learning_rate": 4.4493330410293597e-05, + "loss": 0.3803, + "step": 98000 + }, + { + "epoch": 0.55, + "learning_rate": 4.448770918003114e-05, + "loss": 0.3693, + "step": 98100 + }, + { + "epoch": 0.55, + "learning_rate": 4.448208794976869e-05, + "loss": 0.3818, + "step": 98200 + }, + { + "epoch": 0.55, + "learning_rate": 4.4476466719506235e-05, + "loss": 0.3752, + "step": 98300 + }, + { + "epoch": 0.55, + "learning_rate": 4.447084548924378e-05, + "loss": 0.3716, + "step": 98400 + }, + { + "epoch": 0.55, + "learning_rate": 4.446522425898133e-05, + "loss": 0.3697, + "step": 98500 + }, + { + "epoch": 0.55, + "learning_rate": 4.445960302871887e-05, + "loss": 0.3741, + "step": 98600 + }, + { + "epoch": 0.55, + "learning_rate": 4.4453981798456414e-05, + "loss": 0.3774, + "step": 98700 + }, + { + "epoch": 0.56, + "learning_rate": 4.444836056819396e-05, + "loss": 0.3637, + "step": 98800 + }, + { + "epoch": 0.56, + "learning_rate": 4.44427393379315e-05, + "loss": 0.3748, + "step": 98900 + }, + { + "epoch": 0.56, + "learning_rate": 4.4437118107669046e-05, + "loss": 0.3756, + "step": 99000 + }, + { + "epoch": 0.56, + "learning_rate": 4.443149687740659e-05, + "loss": 0.3713, + "step": 99100 + }, + { + "epoch": 0.56, + "learning_rate": 4.442587564714414e-05, + "loss": 0.3794, + "step": 99200 + }, + { + "epoch": 0.56, + "learning_rate": 4.442025441688168e-05, + "loss": 0.3647, + "step": 99300 + }, + { + "epoch": 0.56, + "learning_rate": 4.4414633186619224e-05, + "loss": 0.3762, + "step": 99400 + }, + { + "epoch": 0.56, + "learning_rate": 4.440901195635677e-05, + "loss": 0.3655, + "step": 99500 + }, + { + "epoch": 0.56, + "learning_rate": 4.440339072609431e-05, + "loss": 0.3762, + "step": 99600 + }, + { + "epoch": 0.56, + "learning_rate": 4.4397769495831856e-05, + "loss": 0.3643, + "step": 99700 + }, + { + "epoch": 0.56, + "learning_rate": 4.43921482655694e-05, + "loss": 0.3744, + "step": 99800 + }, + { + "epoch": 0.56, + "learning_rate": 4.438652703530695e-05, + "loss": 0.3697, + "step": 99900 + }, + { + "epoch": 0.56, + "learning_rate": 4.4380905805044495e-05, + "loss": 0.375, + "step": 100000 + }, + { + "epoch": 0.56, + "learning_rate": 4.437528457478204e-05, + "loss": 0.3759, + "step": 100100 + }, + { + "epoch": 0.56, + "learning_rate": 4.436966334451959e-05, + "loss": 0.3705, + "step": 100200 + }, + { + "epoch": 0.56, + "learning_rate": 4.436404211425713e-05, + "loss": 0.3684, + "step": 100300 + }, + { + "epoch": 0.56, + "learning_rate": 4.435842088399467e-05, + "loss": 0.3733, + "step": 100400 + }, + { + "epoch": 0.56, + "learning_rate": 4.435279965373222e-05, + "loss": 0.3791, + "step": 100500 + }, + { + "epoch": 0.57, + "learning_rate": 4.4347178423469766e-05, + "loss": 0.3708, + "step": 100600 + }, + { + "epoch": 0.57, + "learning_rate": 4.4341557193207305e-05, + "loss": 0.3655, + "step": 100700 + }, + { + "epoch": 0.57, + "learning_rate": 4.433593596294485e-05, + "loss": 0.3764, + "step": 100800 + }, + { + "epoch": 0.57, + "learning_rate": 4.43303147326824e-05, + "loss": 0.3794, + "step": 100900 + }, + { + "epoch": 0.57, + "learning_rate": 4.432469350241994e-05, + "loss": 0.3646, + "step": 101000 + }, + { + "epoch": 0.57, + "learning_rate": 4.4319072272157484e-05, + "loss": 0.3682, + "step": 101100 + }, + { + "epoch": 0.57, + "learning_rate": 4.431345104189503e-05, + "loss": 0.3701, + "step": 101200 + }, + { + "epoch": 0.57, + "learning_rate": 4.4307829811632576e-05, + "loss": 0.3687, + "step": 101300 + }, + { + "epoch": 0.57, + "learning_rate": 4.430220858137012e-05, + "loss": 0.3653, + "step": 101400 + }, + { + "epoch": 0.57, + "learning_rate": 4.429658735110767e-05, + "loss": 0.3593, + "step": 101500 + }, + { + "epoch": 0.57, + "learning_rate": 4.4290966120845215e-05, + "loss": 0.3754, + "step": 101600 + }, + { + "epoch": 0.57, + "learning_rate": 4.4285344890582755e-05, + "loss": 0.3658, + "step": 101700 + }, + { + "epoch": 0.57, + "learning_rate": 4.42797236603203e-05, + "loss": 0.3705, + "step": 101800 + }, + { + "epoch": 0.57, + "learning_rate": 4.427410243005785e-05, + "loss": 0.3659, + "step": 101900 + }, + { + "epoch": 0.57, + "learning_rate": 4.4268481199795393e-05, + "loss": 0.358, + "step": 102000 + }, + { + "epoch": 0.57, + "learning_rate": 4.426285996953293e-05, + "loss": 0.3584, + "step": 102100 + }, + { + "epoch": 0.57, + "learning_rate": 4.425723873927048e-05, + "loss": 0.3656, + "step": 102200 + }, + { + "epoch": 0.58, + "learning_rate": 4.4251617509008025e-05, + "loss": 0.3595, + "step": 102300 + }, + { + "epoch": 0.58, + "learning_rate": 4.4245996278745565e-05, + "loss": 0.3651, + "step": 102400 + }, + { + "epoch": 0.58, + "learning_rate": 4.424037504848311e-05, + "loss": 0.3589, + "step": 102500 + }, + { + "epoch": 0.58, + "learning_rate": 4.423475381822066e-05, + "loss": 0.3677, + "step": 102600 + }, + { + "epoch": 0.58, + "learning_rate": 4.4229132587958204e-05, + "loss": 0.3654, + "step": 102700 + }, + { + "epoch": 0.58, + "learning_rate": 4.422351135769574e-05, + "loss": 0.367, + "step": 102800 + }, + { + "epoch": 0.58, + "learning_rate": 4.421789012743329e-05, + "loss": 0.3565, + "step": 102900 + }, + { + "epoch": 0.58, + "learning_rate": 4.4212268897170836e-05, + "loss": 0.3613, + "step": 103000 + }, + { + "epoch": 0.58, + "learning_rate": 4.420664766690838e-05, + "loss": 0.3681, + "step": 103100 + }, + { + "epoch": 0.58, + "learning_rate": 4.420102643664593e-05, + "loss": 0.3757, + "step": 103200 + }, + { + "epoch": 0.58, + "learning_rate": 4.4195405206383475e-05, + "loss": 0.3704, + "step": 103300 + }, + { + "epoch": 0.58, + "learning_rate": 4.418978397612102e-05, + "loss": 0.3582, + "step": 103400 + }, + { + "epoch": 0.58, + "learning_rate": 4.418416274585856e-05, + "loss": 0.3719, + "step": 103500 + }, + { + "epoch": 0.58, + "learning_rate": 4.417854151559611e-05, + "loss": 0.3696, + "step": 103600 + }, + { + "epoch": 0.58, + "learning_rate": 4.417292028533365e-05, + "loss": 0.3537, + "step": 103700 + }, + { + "epoch": 0.58, + "learning_rate": 4.416729905507119e-05, + "loss": 0.364, + "step": 103800 + }, + { + "epoch": 0.58, + "learning_rate": 4.416167782480874e-05, + "loss": 0.3716, + "step": 103900 + }, + { + "epoch": 0.58, + "learning_rate": 4.4156056594546285e-05, + "loss": 0.3623, + "step": 104000 + }, + { + "epoch": 0.59, + "learning_rate": 4.415043536428383e-05, + "loss": 0.358, + "step": 104100 + }, + { + "epoch": 0.59, + "learning_rate": 4.414481413402137e-05, + "loss": 0.3691, + "step": 104200 + }, + { + "epoch": 0.59, + "learning_rate": 4.413919290375892e-05, + "loss": 0.3629, + "step": 104300 + }, + { + "epoch": 0.59, + "learning_rate": 4.4133571673496463e-05, + "loss": 0.372, + "step": 104400 + }, + { + "epoch": 0.59, + "learning_rate": 4.4127950443234e-05, + "loss": 0.3583, + "step": 104500 + }, + { + "epoch": 0.59, + "learning_rate": 4.4122329212971556e-05, + "loss": 0.3695, + "step": 104600 + }, + { + "epoch": 0.59, + "learning_rate": 4.41167079827091e-05, + "loss": 0.3648, + "step": 104700 + }, + { + "epoch": 0.59, + "learning_rate": 4.411108675244665e-05, + "loss": 0.3688, + "step": 104800 + }, + { + "epoch": 0.59, + "learning_rate": 4.410546552218419e-05, + "loss": 0.3637, + "step": 104900 + }, + { + "epoch": 0.59, + "learning_rate": 4.4099844291921734e-05, + "loss": 0.3569, + "step": 105000 + }, + { + "epoch": 0.59, + "learning_rate": 4.409422306165928e-05, + "loss": 0.3601, + "step": 105100 + }, + { + "epoch": 0.59, + "learning_rate": 4.408860183139682e-05, + "loss": 0.3607, + "step": 105200 + }, + { + "epoch": 0.59, + "learning_rate": 4.4082980601134366e-05, + "loss": 0.3626, + "step": 105300 + }, + { + "epoch": 0.59, + "learning_rate": 4.407735937087191e-05, + "loss": 0.3644, + "step": 105400 + }, + { + "epoch": 0.59, + "learning_rate": 4.407173814060945e-05, + "loss": 0.3607, + "step": 105500 + }, + { + "epoch": 0.59, + "learning_rate": 4.4066173122649627e-05, + "loss": 0.3715, + "step": 105600 + }, + { + "epoch": 0.59, + "learning_rate": 4.406055189238717e-05, + "loss": 0.3562, + "step": 105700 + }, + { + "epoch": 0.59, + "learning_rate": 4.405493066212472e-05, + "loss": 0.3494, + "step": 105800 + }, + { + "epoch": 0.6, + "learning_rate": 4.404930943186226e-05, + "loss": 0.3573, + "step": 105900 + }, + { + "epoch": 0.6, + "learning_rate": 4.4043688201599805e-05, + "loss": 0.3628, + "step": 106000 + }, + { + "epoch": 0.6, + "learning_rate": 4.403806697133735e-05, + "loss": 0.3572, + "step": 106100 + }, + { + "epoch": 0.6, + "learning_rate": 4.40324457410749e-05, + "loss": 0.3681, + "step": 106200 + }, + { + "epoch": 0.6, + "learning_rate": 4.402688072311506e-05, + "loss": 0.3614, + "step": 106300 + }, + { + "epoch": 0.6, + "learning_rate": 4.4021259492852605e-05, + "loss": 0.3619, + "step": 106400 + }, + { + "epoch": 0.6, + "learning_rate": 4.401563826259015e-05, + "loss": 0.3644, + "step": 106500 + }, + { + "epoch": 0.6, + "learning_rate": 4.40100170323277e-05, + "loss": 0.363, + "step": 106600 + }, + { + "epoch": 0.6, + "learning_rate": 4.4004395802065243e-05, + "loss": 0.3593, + "step": 106700 + }, + { + "epoch": 0.6, + "learning_rate": 4.399877457180279e-05, + "loss": 0.3642, + "step": 106800 + }, + { + "epoch": 0.6, + "learning_rate": 4.3993153341540336e-05, + "loss": 0.362, + "step": 106900 + }, + { + "epoch": 0.6, + "learning_rate": 4.3987532111277875e-05, + "loss": 0.3646, + "step": 107000 + }, + { + "epoch": 0.6, + "learning_rate": 4.398191088101542e-05, + "loss": 0.3577, + "step": 107100 + }, + { + "epoch": 0.6, + "learning_rate": 4.397628965075297e-05, + "loss": 0.3555, + "step": 107200 + }, + { + "epoch": 0.6, + "learning_rate": 4.3970668420490514e-05, + "loss": 0.3641, + "step": 107300 + }, + { + "epoch": 0.6, + "learning_rate": 4.3965047190228054e-05, + "loss": 0.3537, + "step": 107400 + }, + { + "epoch": 0.6, + "learning_rate": 4.39594259599656e-05, + "loss": 0.3582, + "step": 107500 + }, + { + "epoch": 0.6, + "learning_rate": 4.3953804729703146e-05, + "loss": 0.359, + "step": 107600 + }, + { + "epoch": 0.61, + "learning_rate": 4.3948183499440686e-05, + "loss": 0.3665, + "step": 107700 + }, + { + "epoch": 0.61, + "learning_rate": 4.394256226917823e-05, + "loss": 0.3615, + "step": 107800 + }, + { + "epoch": 0.61, + "learning_rate": 4.393694103891578e-05, + "loss": 0.3653, + "step": 107900 + }, + { + "epoch": 0.61, + "learning_rate": 4.3931319808653325e-05, + "loss": 0.3588, + "step": 108000 + }, + { + "epoch": 0.61, + "learning_rate": 4.3925698578390864e-05, + "loss": 0.3648, + "step": 108100 + }, + { + "epoch": 0.61, + "learning_rate": 4.392007734812841e-05, + "loss": 0.3551, + "step": 108200 + }, + { + "epoch": 0.61, + "learning_rate": 4.391445611786596e-05, + "loss": 0.3581, + "step": 108300 + }, + { + "epoch": 0.61, + "learning_rate": 4.39088348876035e-05, + "loss": 0.3519, + "step": 108400 + }, + { + "epoch": 0.61, + "learning_rate": 4.390326986964367e-05, + "loss": 0.3532, + "step": 108500 + }, + { + "epoch": 0.61, + "learning_rate": 4.389764863938122e-05, + "loss": 0.3555, + "step": 108600 + }, + { + "epoch": 0.61, + "learning_rate": 4.389202740911876e-05, + "loss": 0.3519, + "step": 108700 + }, + { + "epoch": 0.61, + "learning_rate": 4.38864061788563e-05, + "loss": 0.3562, + "step": 108800 + }, + { + "epoch": 0.61, + "learning_rate": 4.388078494859385e-05, + "loss": 0.3502, + "step": 108900 + }, + { + "epoch": 0.61, + "learning_rate": 4.3875163718331395e-05, + "loss": 0.3544, + "step": 109000 + }, + { + "epoch": 0.61, + "learning_rate": 4.386954248806894e-05, + "loss": 0.3599, + "step": 109100 + }, + { + "epoch": 0.61, + "learning_rate": 4.386392125780648e-05, + "loss": 0.3595, + "step": 109200 + }, + { + "epoch": 0.61, + "learning_rate": 4.385830002754403e-05, + "loss": 0.3606, + "step": 109300 + }, + { + "epoch": 0.61, + "learning_rate": 4.3852678797281574e-05, + "loss": 0.3586, + "step": 109400 + }, + { + "epoch": 0.62, + "learning_rate": 4.384711377932175e-05, + "loss": 0.3639, + "step": 109500 + }, + { + "epoch": 0.62, + "learning_rate": 4.384149254905929e-05, + "loss": 0.3576, + "step": 109600 + }, + { + "epoch": 0.62, + "learning_rate": 4.3835871318796834e-05, + "loss": 0.3542, + "step": 109700 + }, + { + "epoch": 0.62, + "learning_rate": 4.383025008853438e-05, + "loss": 0.3622, + "step": 109800 + }, + { + "epoch": 0.62, + "learning_rate": 4.382462885827192e-05, + "loss": 0.3677, + "step": 109900 + }, + { + "epoch": 0.62, + "learning_rate": 4.3819007628009466e-05, + "loss": 0.3556, + "step": 110000 + }, + { + "epoch": 0.62, + "learning_rate": 4.381338639774701e-05, + "loss": 0.3512, + "step": 110100 + }, + { + "epoch": 0.62, + "learning_rate": 4.3807821379787186e-05, + "loss": 0.3553, + "step": 110200 + }, + { + "epoch": 0.62, + "learning_rate": 4.3802200149524726e-05, + "loss": 0.355, + "step": 110300 + }, + { + "epoch": 0.62, + "learning_rate": 4.379657891926227e-05, + "loss": 0.3479, + "step": 110400 + }, + { + "epoch": 0.62, + "learning_rate": 4.379101390130244e-05, + "loss": 0.3513, + "step": 110500 + }, + { + "epoch": 0.62, + "learning_rate": 4.3785392671039986e-05, + "loss": 0.3509, + "step": 110600 + }, + { + "epoch": 0.62, + "learning_rate": 4.3779771440777526e-05, + "loss": 0.3499, + "step": 110700 + }, + { + "epoch": 0.62, + "learning_rate": 4.377415021051507e-05, + "loss": 0.3526, + "step": 110800 + }, + { + "epoch": 0.62, + "learning_rate": 4.376852898025262e-05, + "loss": 0.3636, + "step": 110900 + }, + { + "epoch": 0.62, + "learning_rate": 4.3762907749990165e-05, + "loss": 0.3453, + "step": 111000 + }, + { + "epoch": 0.62, + "learning_rate": 4.375728651972771e-05, + "loss": 0.3463, + "step": 111100 + }, + { + "epoch": 0.63, + "learning_rate": 4.375166528946526e-05, + "loss": 0.3647, + "step": 111200 + }, + { + "epoch": 0.63, + "learning_rate": 4.37460440592028e-05, + "loss": 0.3533, + "step": 111300 + }, + { + "epoch": 0.63, + "learning_rate": 4.374042282894034e-05, + "loss": 0.3477, + "step": 111400 + }, + { + "epoch": 0.63, + "learning_rate": 4.373480159867789e-05, + "loss": 0.351, + "step": 111500 + }, + { + "epoch": 0.63, + "learning_rate": 4.3729180368415435e-05, + "loss": 0.3571, + "step": 111600 + }, + { + "epoch": 0.63, + "learning_rate": 4.372355913815298e-05, + "loss": 0.3505, + "step": 111700 + }, + { + "epoch": 0.63, + "learning_rate": 4.371793790789052e-05, + "loss": 0.3574, + "step": 111800 + }, + { + "epoch": 0.63, + "learning_rate": 4.371231667762807e-05, + "loss": 0.341, + "step": 111900 + }, + { + "epoch": 0.63, + "learning_rate": 4.3706695447365614e-05, + "loss": 0.3523, + "step": 112000 + }, + { + "epoch": 0.63, + "learning_rate": 4.370107421710315e-05, + "loss": 0.347, + "step": 112100 + }, + { + "epoch": 0.63, + "learning_rate": 4.36954529868407e-05, + "loss": 0.3608, + "step": 112200 + }, + { + "epoch": 0.63, + "learning_rate": 4.3689831756578246e-05, + "loss": 0.36, + "step": 112300 + }, + { + "epoch": 0.63, + "learning_rate": 4.368421052631579e-05, + "loss": 0.3526, + "step": 112400 + }, + { + "epoch": 0.63, + "learning_rate": 4.367858929605333e-05, + "loss": 0.3534, + "step": 112500 + }, + { + "epoch": 0.63, + "learning_rate": 4.3672968065790885e-05, + "loss": 0.3602, + "step": 112600 + }, + { + "epoch": 0.63, + "learning_rate": 4.366734683552843e-05, + "loss": 0.3528, + "step": 112700 + }, + { + "epoch": 0.63, + "learning_rate": 4.366172560526597e-05, + "loss": 0.3475, + "step": 112800 + }, + { + "epoch": 0.63, + "learning_rate": 4.365610437500352e-05, + "loss": 0.3558, + "step": 112900 + }, + { + "epoch": 0.64, + "learning_rate": 4.365048314474106e-05, + "loss": 0.3528, + "step": 113000 + }, + { + "epoch": 0.64, + "learning_rate": 4.364486191447861e-05, + "loss": 0.3506, + "step": 113100 + }, + { + "epoch": 0.64, + "learning_rate": 4.363924068421615e-05, + "loss": 0.3514, + "step": 113200 + }, + { + "epoch": 0.64, + "learning_rate": 4.3633619453953695e-05, + "loss": 0.3579, + "step": 113300 + }, + { + "epoch": 0.64, + "learning_rate": 4.362799822369124e-05, + "loss": 0.352, + "step": 113400 + }, + { + "epoch": 0.64, + "learning_rate": 4.362237699342878e-05, + "loss": 0.3467, + "step": 113500 + }, + { + "epoch": 0.64, + "learning_rate": 4.361675576316633e-05, + "loss": 0.3551, + "step": 113600 + }, + { + "epoch": 0.64, + "learning_rate": 4.361113453290387e-05, + "loss": 0.3525, + "step": 113700 + }, + { + "epoch": 0.64, + "learning_rate": 4.360551330264142e-05, + "loss": 0.3523, + "step": 113800 + }, + { + "epoch": 0.64, + "learning_rate": 4.359989207237896e-05, + "loss": 0.3458, + "step": 113900 + }, + { + "epoch": 0.64, + "learning_rate": 4.3594270842116505e-05, + "loss": 0.3494, + "step": 114000 + }, + { + "epoch": 0.64, + "learning_rate": 4.358864961185405e-05, + "loss": 0.3562, + "step": 114100 + }, + { + "epoch": 0.64, + "learning_rate": 4.35830283815916e-05, + "loss": 0.3537, + "step": 114200 + }, + { + "epoch": 0.64, + "learning_rate": 4.3577407151329144e-05, + "loss": 0.346, + "step": 114300 + }, + { + "epoch": 0.64, + "learning_rate": 4.357178592106669e-05, + "loss": 0.3409, + "step": 114400 + }, + { + "epoch": 0.64, + "learning_rate": 4.356616469080424e-05, + "loss": 0.3479, + "step": 114500 + }, + { + "epoch": 0.64, + "learning_rate": 4.3560543460541776e-05, + "loss": 0.3468, + "step": 114600 + }, + { + "epoch": 0.64, + "learning_rate": 4.355492223027932e-05, + "loss": 0.3493, + "step": 114700 + }, + { + "epoch": 0.65, + "learning_rate": 4.354930100001687e-05, + "loss": 0.3539, + "step": 114800 + }, + { + "epoch": 0.65, + "learning_rate": 4.354367976975441e-05, + "loss": 0.3429, + "step": 114900 + }, + { + "epoch": 0.65, + "learning_rate": 4.3538058539491955e-05, + "loss": 0.351, + "step": 115000 + }, + { + "epoch": 0.65, + "learning_rate": 4.35324373092295e-05, + "loss": 0.3555, + "step": 115100 + }, + { + "epoch": 0.65, + "learning_rate": 4.352681607896704e-05, + "loss": 0.3376, + "step": 115200 + }, + { + "epoch": 0.65, + "learning_rate": 4.352119484870459e-05, + "loss": 0.3434, + "step": 115300 + }, + { + "epoch": 0.65, + "learning_rate": 4.351557361844213e-05, + "loss": 0.3473, + "step": 115400 + }, + { + "epoch": 0.65, + "learning_rate": 4.350995238817968e-05, + "loss": 0.3501, + "step": 115500 + }, + { + "epoch": 0.65, + "learning_rate": 4.350433115791722e-05, + "loss": 0.3468, + "step": 115600 + }, + { + "epoch": 0.65, + "learning_rate": 4.3498709927654765e-05, + "loss": 0.3444, + "step": 115700 + }, + { + "epoch": 0.65, + "learning_rate": 4.349308869739231e-05, + "loss": 0.3502, + "step": 115800 + }, + { + "epoch": 0.65, + "learning_rate": 4.348746746712986e-05, + "loss": 0.3477, + "step": 115900 + }, + { + "epoch": 0.65, + "learning_rate": 4.3481846236867404e-05, + "loss": 0.3475, + "step": 116000 + }, + { + "epoch": 0.65, + "learning_rate": 4.347622500660495e-05, + "loss": 0.3537, + "step": 116100 + }, + { + "epoch": 0.65, + "learning_rate": 4.3470603776342496e-05, + "loss": 0.3423, + "step": 116200 + }, + { + "epoch": 0.65, + "learning_rate": 4.3464982546080036e-05, + "loss": 0.3438, + "step": 116300 + }, + { + "epoch": 0.65, + "learning_rate": 4.345936131581758e-05, + "loss": 0.3458, + "step": 116400 + }, + { + "epoch": 0.65, + "learning_rate": 4.345374008555513e-05, + "loss": 0.3398, + "step": 116500 + }, + { + "epoch": 0.66, + "learning_rate": 4.344811885529267e-05, + "loss": 0.3426, + "step": 116600 + }, + { + "epoch": 0.66, + "learning_rate": 4.3442497625030214e-05, + "loss": 0.3487, + "step": 116700 + }, + { + "epoch": 0.66, + "learning_rate": 4.343687639476776e-05, + "loss": 0.3398, + "step": 116800 + }, + { + "epoch": 0.66, + "learning_rate": 4.343125516450531e-05, + "loss": 0.3475, + "step": 116900 + }, + { + "epoch": 0.66, + "learning_rate": 4.3425633934242846e-05, + "loss": 0.3444, + "step": 117000 + }, + { + "epoch": 0.66, + "learning_rate": 4.342001270398039e-05, + "loss": 0.3456, + "step": 117100 + }, + { + "epoch": 0.66, + "learning_rate": 4.341439147371794e-05, + "loss": 0.3578, + "step": 117200 + }, + { + "epoch": 0.66, + "learning_rate": 4.3408770243455485e-05, + "loss": 0.3421, + "step": 117300 + }, + { + "epoch": 0.66, + "learning_rate": 4.340314901319303e-05, + "loss": 0.3474, + "step": 117400 + }, + { + "epoch": 0.66, + "learning_rate": 4.339752778293058e-05, + "loss": 0.3477, + "step": 117500 + }, + { + "epoch": 0.66, + "learning_rate": 4.3391906552668124e-05, + "loss": 0.3502, + "step": 117600 + }, + { + "epoch": 0.66, + "learning_rate": 4.3386285322405663e-05, + "loss": 0.3441, + "step": 117700 + }, + { + "epoch": 0.66, + "learning_rate": 4.338066409214321e-05, + "loss": 0.3409, + "step": 117800 + }, + { + "epoch": 0.66, + "learning_rate": 4.3375042861880756e-05, + "loss": 0.339, + "step": 117900 + }, + { + "epoch": 0.66, + "learning_rate": 4.3369421631618295e-05, + "loss": 0.3435, + "step": 118000 + }, + { + "epoch": 0.66, + "learning_rate": 4.336380040135584e-05, + "loss": 0.3441, + "step": 118100 + }, + { + "epoch": 0.66, + "learning_rate": 4.335817917109339e-05, + "loss": 0.3443, + "step": 118200 + }, + { + "epoch": 0.66, + "learning_rate": 4.3352557940830934e-05, + "loss": 0.345, + "step": 118300 + }, + { + "epoch": 0.67, + "learning_rate": 4.3346936710568474e-05, + "loss": 0.3517, + "step": 118400 + }, + { + "epoch": 0.67, + "learning_rate": 4.334131548030602e-05, + "loss": 0.3485, + "step": 118500 + }, + { + "epoch": 0.67, + "learning_rate": 4.3335694250043566e-05, + "loss": 0.3437, + "step": 118600 + }, + { + "epoch": 0.67, + "learning_rate": 4.3330073019781106e-05, + "loss": 0.3456, + "step": 118700 + }, + { + "epoch": 0.67, + "learning_rate": 4.332445178951865e-05, + "loss": 0.3412, + "step": 118800 + }, + { + "epoch": 0.67, + "learning_rate": 4.33188305592562e-05, + "loss": 0.3447, + "step": 118900 + }, + { + "epoch": 0.67, + "learning_rate": 4.3313209328993745e-05, + "loss": 0.3557, + "step": 119000 + }, + { + "epoch": 0.67, + "learning_rate": 4.330758809873129e-05, + "loss": 0.3389, + "step": 119100 + }, + { + "epoch": 0.67, + "learning_rate": 4.330196686846884e-05, + "loss": 0.3366, + "step": 119200 + }, + { + "epoch": 0.67, + "learning_rate": 4.3296345638206384e-05, + "loss": 0.3379, + "step": 119300 + }, + { + "epoch": 0.67, + "learning_rate": 4.329072440794392e-05, + "loss": 0.34, + "step": 119400 + }, + { + "epoch": 0.67, + "learning_rate": 4.328510317768147e-05, + "loss": 0.3384, + "step": 119500 + }, + { + "epoch": 0.67, + "learning_rate": 4.3279481947419016e-05, + "loss": 0.3498, + "step": 119600 + }, + { + "epoch": 0.67, + "learning_rate": 4.327386071715656e-05, + "loss": 0.3439, + "step": 119700 + }, + { + "epoch": 0.67, + "learning_rate": 4.32682394868941e-05, + "loss": 0.3456, + "step": 119800 + }, + { + "epoch": 0.67, + "learning_rate": 4.326261825663165e-05, + "loss": 0.3453, + "step": 119900 + }, + { + "epoch": 0.67, + "learning_rate": 4.3256997026369194e-05, + "loss": 0.3428, + "step": 120000 + }, + { + "epoch": 0.68, + "learning_rate": 4.3251375796106733e-05, + "loss": 0.342, + "step": 120100 + }, + { + "epoch": 0.68, + "learning_rate": 4.324575456584428e-05, + "loss": 0.3443, + "step": 120200 + }, + { + "epoch": 0.68, + "learning_rate": 4.3240133335581826e-05, + "loss": 0.3374, + "step": 120300 + }, + { + "epoch": 0.68, + "learning_rate": 4.323451210531937e-05, + "loss": 0.34, + "step": 120400 + }, + { + "epoch": 0.68, + "learning_rate": 4.322889087505692e-05, + "loss": 0.3435, + "step": 120500 + }, + { + "epoch": 0.68, + "learning_rate": 4.3223325857097086e-05, + "loss": 0.3439, + "step": 120600 + }, + { + "epoch": 0.68, + "learning_rate": 4.3217760839137254e-05, + "loss": 0.3405, + "step": 120700 + }, + { + "epoch": 0.68, + "learning_rate": 4.32121396088748e-05, + "loss": 0.3414, + "step": 120800 + }, + { + "epoch": 0.68, + "learning_rate": 4.320651837861234e-05, + "loss": 0.3361, + "step": 120900 + }, + { + "epoch": 0.68, + "learning_rate": 4.3200897148349886e-05, + "loss": 0.343, + "step": 121000 + }, + { + "epoch": 0.68, + "learning_rate": 4.319527591808744e-05, + "loss": 0.3488, + "step": 121100 + }, + { + "epoch": 0.68, + "learning_rate": 4.3189654687824985e-05, + "loss": 0.3365, + "step": 121200 + }, + { + "epoch": 0.68, + "learning_rate": 4.3184033457562525e-05, + "loss": 0.3493, + "step": 121300 + }, + { + "epoch": 0.68, + "learning_rate": 4.317841222730007e-05, + "loss": 0.3376, + "step": 121400 + }, + { + "epoch": 0.68, + "learning_rate": 4.317279099703762e-05, + "loss": 0.3513, + "step": 121500 + }, + { + "epoch": 0.68, + "learning_rate": 4.316716976677516e-05, + "loss": 0.3471, + "step": 121600 + }, + { + "epoch": 0.68, + "learning_rate": 4.31615485365127e-05, + "loss": 0.3441, + "step": 121700 + }, + { + "epoch": 0.68, + "learning_rate": 4.315592730625025e-05, + "loss": 0.3504, + "step": 121800 + }, + { + "epoch": 0.69, + "learning_rate": 4.3150306075987796e-05, + "loss": 0.3491, + "step": 121900 + }, + { + "epoch": 0.69, + "learning_rate": 4.3144684845725335e-05, + "loss": 0.3413, + "step": 122000 + }, + { + "epoch": 0.69, + "learning_rate": 4.313906361546288e-05, + "loss": 0.3367, + "step": 122100 + }, + { + "epoch": 0.69, + "learning_rate": 4.313344238520043e-05, + "loss": 0.3429, + "step": 122200 + }, + { + "epoch": 0.69, + "learning_rate": 4.312782115493797e-05, + "loss": 0.3401, + "step": 122300 + }, + { + "epoch": 0.69, + "learning_rate": 4.3122199924675513e-05, + "loss": 0.3377, + "step": 122400 + }, + { + "epoch": 0.69, + "learning_rate": 4.311657869441306e-05, + "loss": 0.3321, + "step": 122500 + }, + { + "epoch": 0.69, + "learning_rate": 4.3110957464150606e-05, + "loss": 0.3469, + "step": 122600 + }, + { + "epoch": 0.69, + "learning_rate": 4.310533623388815e-05, + "loss": 0.348, + "step": 122700 + }, + { + "epoch": 0.69, + "learning_rate": 4.30997150036257e-05, + "loss": 0.3361, + "step": 122800 + }, + { + "epoch": 0.69, + "learning_rate": 4.3094093773363245e-05, + "loss": 0.342, + "step": 122900 + }, + { + "epoch": 0.69, + "learning_rate": 4.3088472543100784e-05, + "loss": 0.3407, + "step": 123000 + }, + { + "epoch": 0.69, + "learning_rate": 4.308285131283833e-05, + "loss": 0.3421, + "step": 123100 + }, + { + "epoch": 0.69, + "learning_rate": 4.307723008257588e-05, + "loss": 0.3407, + "step": 123200 + }, + { + "epoch": 0.69, + "learning_rate": 4.307160885231342e-05, + "loss": 0.3419, + "step": 123300 + }, + { + "epoch": 0.69, + "learning_rate": 4.306598762205096e-05, + "loss": 0.3477, + "step": 123400 + }, + { + "epoch": 0.69, + "learning_rate": 4.306042260409113e-05, + "loss": 0.3413, + "step": 123500 + }, + { + "epoch": 0.69, + "learning_rate": 4.3054801373828677e-05, + "loss": 0.3489, + "step": 123600 + }, + { + "epoch": 0.7, + "learning_rate": 4.304918014356622e-05, + "loss": 0.3377, + "step": 123700 + }, + { + "epoch": 0.7, + "learning_rate": 4.304355891330377e-05, + "loss": 0.3399, + "step": 123800 + }, + { + "epoch": 0.7, + "learning_rate": 4.3037937683041315e-05, + "loss": 0.3462, + "step": 123900 + }, + { + "epoch": 0.7, + "learning_rate": 4.303231645277886e-05, + "loss": 0.3372, + "step": 124000 + }, + { + "epoch": 0.7, + "learning_rate": 4.30266952225164e-05, + "loss": 0.3437, + "step": 124100 + }, + { + "epoch": 0.7, + "learning_rate": 4.302107399225395e-05, + "loss": 0.3354, + "step": 124200 + }, + { + "epoch": 0.7, + "learning_rate": 4.3015452761991494e-05, + "loss": 0.3396, + "step": 124300 + }, + { + "epoch": 0.7, + "learning_rate": 4.300983153172904e-05, + "loss": 0.3395, + "step": 124400 + }, + { + "epoch": 0.7, + "learning_rate": 4.30042665137692e-05, + "loss": 0.3368, + "step": 124500 + }, + { + "epoch": 0.7, + "learning_rate": 4.299864528350675e-05, + "loss": 0.3395, + "step": 124600 + }, + { + "epoch": 0.7, + "learning_rate": 4.299302405324429e-05, + "loss": 0.3379, + "step": 124700 + }, + { + "epoch": 0.7, + "learning_rate": 4.298740282298184e-05, + "loss": 0.3436, + "step": 124800 + }, + { + "epoch": 0.7, + "learning_rate": 4.2981781592719386e-05, + "loss": 0.3424, + "step": 124900 + }, + { + "epoch": 0.7, + "learning_rate": 4.297616036245693e-05, + "loss": 0.3456, + "step": 125000 + }, + { + "epoch": 0.7, + "learning_rate": 4.297053913219448e-05, + "loss": 0.3356, + "step": 125100 + }, + { + "epoch": 0.7, + "learning_rate": 4.296491790193202e-05, + "loss": 0.3398, + "step": 125200 + }, + { + "epoch": 0.7, + "learning_rate": 4.2959296671669564e-05, + "loss": 0.3482, + "step": 125300 + }, + { + "epoch": 0.7, + "learning_rate": 4.295367544140711e-05, + "loss": 0.3391, + "step": 125400 + }, + { + "epoch": 0.71, + "learning_rate": 4.294805421114465e-05, + "loss": 0.3461, + "step": 125500 + }, + { + "epoch": 0.71, + "learning_rate": 4.2942432980882196e-05, + "loss": 0.3406, + "step": 125600 + }, + { + "epoch": 0.71, + "learning_rate": 4.293681175061974e-05, + "loss": 0.34, + "step": 125700 + }, + { + "epoch": 0.71, + "learning_rate": 4.293119052035729e-05, + "loss": 0.3382, + "step": 125800 + }, + { + "epoch": 0.71, + "learning_rate": 4.292556929009483e-05, + "loss": 0.3326, + "step": 125900 + }, + { + "epoch": 0.71, + "learning_rate": 4.2919948059832375e-05, + "loss": 0.3215, + "step": 126000 + }, + { + "epoch": 0.71, + "learning_rate": 4.291432682956992e-05, + "loss": 0.3347, + "step": 126100 + }, + { + "epoch": 0.71, + "learning_rate": 4.290870559930746e-05, + "loss": 0.3429, + "step": 126200 + }, + { + "epoch": 0.71, + "learning_rate": 4.2903084369045013e-05, + "loss": 0.3298, + "step": 126300 + }, + { + "epoch": 0.71, + "learning_rate": 4.289746313878256e-05, + "loss": 0.3425, + "step": 126400 + }, + { + "epoch": 0.71, + "learning_rate": 4.2891841908520106e-05, + "loss": 0.3315, + "step": 126500 + }, + { + "epoch": 0.71, + "learning_rate": 4.2886220678257646e-05, + "loss": 0.3422, + "step": 126600 + }, + { + "epoch": 0.71, + "learning_rate": 4.288059944799519e-05, + "loss": 0.3332, + "step": 126700 + }, + { + "epoch": 0.71, + "learning_rate": 4.287497821773274e-05, + "loss": 0.3401, + "step": 126800 + }, + { + "epoch": 0.71, + "learning_rate": 4.286935698747028e-05, + "loss": 0.331, + "step": 126900 + }, + { + "epoch": 0.71, + "learning_rate": 4.2863735757207824e-05, + "loss": 0.34, + "step": 127000 + }, + { + "epoch": 0.71, + "learning_rate": 4.285811452694537e-05, + "loss": 0.3364, + "step": 127100 + }, + { + "epoch": 0.72, + "learning_rate": 4.2852493296682916e-05, + "loss": 0.3343, + "step": 127200 + }, + { + "epoch": 0.72, + "learning_rate": 4.2846872066420456e-05, + "loss": 0.3312, + "step": 127300 + }, + { + "epoch": 0.72, + "learning_rate": 4.2841250836158e-05, + "loss": 0.3395, + "step": 127400 + }, + { + "epoch": 0.72, + "learning_rate": 4.283562960589555e-05, + "loss": 0.3333, + "step": 127500 + }, + { + "epoch": 0.72, + "learning_rate": 4.283000837563309e-05, + "loss": 0.3443, + "step": 127600 + }, + { + "epoch": 0.72, + "learning_rate": 4.2824387145370634e-05, + "loss": 0.3233, + "step": 127700 + }, + { + "epoch": 0.72, + "learning_rate": 4.281876591510818e-05, + "loss": 0.3436, + "step": 127800 + }, + { + "epoch": 0.72, + "learning_rate": 4.281314468484573e-05, + "loss": 0.3394, + "step": 127900 + }, + { + "epoch": 0.72, + "learning_rate": 4.2807579666885894e-05, + "loss": 0.3298, + "step": 128000 + }, + { + "epoch": 0.72, + "learning_rate": 4.280195843662344e-05, + "loss": 0.3325, + "step": 128100 + }, + { + "epoch": 0.72, + "learning_rate": 4.279633720636099e-05, + "loss": 0.3477, + "step": 128200 + }, + { + "epoch": 0.72, + "learning_rate": 4.279071597609853e-05, + "loss": 0.3452, + "step": 128300 + }, + { + "epoch": 0.72, + "learning_rate": 4.278509474583607e-05, + "loss": 0.3423, + "step": 128400 + }, + { + "epoch": 0.72, + "learning_rate": 4.277947351557362e-05, + "loss": 0.3298, + "step": 128500 + }, + { + "epoch": 0.72, + "learning_rate": 4.2773852285311165e-05, + "loss": 0.3338, + "step": 128600 + }, + { + "epoch": 0.72, + "learning_rate": 4.2768231055048705e-05, + "loss": 0.3289, + "step": 128700 + }, + { + "epoch": 0.72, + "learning_rate": 4.276260982478625e-05, + "loss": 0.3326, + "step": 128800 + }, + { + "epoch": 0.72, + "learning_rate": 4.27569885945238e-05, + "loss": 0.3273, + "step": 128900 + }, + { + "epoch": 0.73, + "learning_rate": 4.2751367364261344e-05, + "loss": 0.3452, + "step": 129000 + }, + { + "epoch": 0.73, + "learning_rate": 4.274574613399889e-05, + "loss": 0.331, + "step": 129100 + }, + { + "epoch": 0.73, + "learning_rate": 4.2740124903736436e-05, + "loss": 0.3276, + "step": 129200 + }, + { + "epoch": 0.73, + "learning_rate": 4.273450367347398e-05, + "loss": 0.3299, + "step": 129300 + }, + { + "epoch": 0.73, + "learning_rate": 4.272888244321152e-05, + "loss": 0.3374, + "step": 129400 + }, + { + "epoch": 0.73, + "learning_rate": 4.272326121294907e-05, + "loss": 0.3364, + "step": 129500 + }, + { + "epoch": 0.73, + "learning_rate": 4.2717639982686615e-05, + "loss": 0.3362, + "step": 129600 + }, + { + "epoch": 0.73, + "learning_rate": 4.271201875242416e-05, + "loss": 0.3289, + "step": 129700 + }, + { + "epoch": 0.73, + "learning_rate": 4.27063975221617e-05, + "loss": 0.338, + "step": 129800 + }, + { + "epoch": 0.73, + "learning_rate": 4.2700776291899247e-05, + "loss": 0.3362, + "step": 129900 + }, + { + "epoch": 0.73, + "learning_rate": 4.269515506163679e-05, + "loss": 0.3308, + "step": 130000 + }, + { + "epoch": 0.73, + "learning_rate": 4.268953383137433e-05, + "loss": 0.3345, + "step": 130100 + }, + { + "epoch": 0.73, + "learning_rate": 4.268391260111188e-05, + "loss": 0.3348, + "step": 130200 + }, + { + "epoch": 0.73, + "learning_rate": 4.2678291370849425e-05, + "loss": 0.3312, + "step": 130300 + }, + { + "epoch": 0.73, + "learning_rate": 4.267267014058697e-05, + "loss": 0.3332, + "step": 130400 + }, + { + "epoch": 0.73, + "learning_rate": 4.266704891032452e-05, + "loss": 0.3343, + "step": 130500 + }, + { + "epoch": 0.73, + "learning_rate": 4.2661427680062064e-05, + "loss": 0.3245, + "step": 130600 + }, + { + "epoch": 0.73, + "learning_rate": 4.265580644979961e-05, + "loss": 0.3295, + "step": 130700 + }, + { + "epoch": 0.74, + "learning_rate": 4.265018521953715e-05, + "loss": 0.3364, + "step": 130800 + }, + { + "epoch": 0.74, + "learning_rate": 4.2644563989274696e-05, + "loss": 0.3302, + "step": 130900 + }, + { + "epoch": 0.74, + "learning_rate": 4.263894275901224e-05, + "loss": 0.3276, + "step": 131000 + }, + { + "epoch": 0.74, + "learning_rate": 4.263332152874979e-05, + "loss": 0.3304, + "step": 131100 + }, + { + "epoch": 0.74, + "learning_rate": 4.262781272309258e-05, + "loss": 0.3227, + "step": 131200 + }, + { + "epoch": 0.74, + "learning_rate": 4.2622247705132745e-05, + "loss": 0.3405, + "step": 131300 + }, + { + "epoch": 0.74, + "learning_rate": 4.261662647487029e-05, + "loss": 0.3311, + "step": 131400 + }, + { + "epoch": 0.74, + "learning_rate": 4.261100524460784e-05, + "loss": 0.3346, + "step": 131500 + }, + { + "epoch": 0.74, + "learning_rate": 4.2605384014345384e-05, + "loss": 0.3286, + "step": 131600 + }, + { + "epoch": 0.74, + "learning_rate": 4.259976278408292e-05, + "loss": 0.3327, + "step": 131700 + }, + { + "epoch": 0.74, + "learning_rate": 4.259414155382047e-05, + "loss": 0.3367, + "step": 131800 + }, + { + "epoch": 0.74, + "learning_rate": 4.2588520323558016e-05, + "loss": 0.3284, + "step": 131900 + }, + { + "epoch": 0.74, + "learning_rate": 4.2582899093295555e-05, + "loss": 0.3297, + "step": 132000 + }, + { + "epoch": 0.74, + "learning_rate": 4.25772778630331e-05, + "loss": 0.331, + "step": 132100 + }, + { + "epoch": 0.74, + "learning_rate": 4.257165663277065e-05, + "loss": 0.3278, + "step": 132200 + }, + { + "epoch": 0.74, + "learning_rate": 4.2566035402508194e-05, + "loss": 0.3317, + "step": 132300 + }, + { + "epoch": 0.74, + "learning_rate": 4.256041417224574e-05, + "loss": 0.3338, + "step": 132400 + }, + { + "epoch": 0.74, + "learning_rate": 4.255479294198329e-05, + "loss": 0.3362, + "step": 132500 + }, + { + "epoch": 0.75, + "learning_rate": 4.254917171172083e-05, + "loss": 0.33, + "step": 132600 + }, + { + "epoch": 0.75, + "learning_rate": 4.254355048145837e-05, + "loss": 0.3237, + "step": 132700 + }, + { + "epoch": 0.75, + "learning_rate": 4.253792925119592e-05, + "loss": 0.3394, + "step": 132800 + }, + { + "epoch": 0.75, + "learning_rate": 4.2532308020933465e-05, + "loss": 0.3332, + "step": 132900 + }, + { + "epoch": 0.75, + "learning_rate": 4.252668679067101e-05, + "loss": 0.3209, + "step": 133000 + }, + { + "epoch": 0.75, + "learning_rate": 4.252106556040855e-05, + "loss": 0.3358, + "step": 133100 + }, + { + "epoch": 0.75, + "learning_rate": 4.25154443301461e-05, + "loss": 0.3326, + "step": 133200 + }, + { + "epoch": 0.75, + "learning_rate": 4.2509823099883643e-05, + "loss": 0.3217, + "step": 133300 + }, + { + "epoch": 0.75, + "learning_rate": 4.250420186962118e-05, + "loss": 0.3323, + "step": 133400 + }, + { + "epoch": 0.75, + "learning_rate": 4.249858063935873e-05, + "loss": 0.3241, + "step": 133500 + }, + { + "epoch": 0.75, + "learning_rate": 4.2492959409096275e-05, + "loss": 0.3352, + "step": 133600 + }, + { + "epoch": 0.75, + "learning_rate": 4.248733817883382e-05, + "loss": 0.3366, + "step": 133700 + }, + { + "epoch": 0.75, + "learning_rate": 4.248171694857137e-05, + "loss": 0.3395, + "step": 133800 + }, + { + "epoch": 0.75, + "learning_rate": 4.2476151930611536e-05, + "loss": 0.3299, + "step": 133900 + }, + { + "epoch": 0.75, + "learning_rate": 4.247053070034908e-05, + "loss": 0.3262, + "step": 134000 + }, + { + "epoch": 0.75, + "learning_rate": 4.246490947008663e-05, + "loss": 0.335, + "step": 134100 + }, + { + "epoch": 0.75, + "learning_rate": 4.245928823982417e-05, + "loss": 0.3356, + "step": 134200 + }, + { + "epoch": 0.75, + "learning_rate": 4.2453667009561714e-05, + "loss": 0.3264, + "step": 134300 + }, + { + "epoch": 0.76, + "learning_rate": 4.244804577929926e-05, + "loss": 0.3208, + "step": 134400 + }, + { + "epoch": 0.76, + "learning_rate": 4.24424245490368e-05, + "loss": 0.324, + "step": 134500 + }, + { + "epoch": 0.76, + "learning_rate": 4.2436803318774346e-05, + "loss": 0.3382, + "step": 134600 + }, + { + "epoch": 0.76, + "learning_rate": 4.243118208851189e-05, + "loss": 0.3339, + "step": 134700 + }, + { + "epoch": 0.76, + "learning_rate": 4.242556085824944e-05, + "loss": 0.3336, + "step": 134800 + }, + { + "epoch": 0.76, + "learning_rate": 4.2419939627986985e-05, + "loss": 0.3287, + "step": 134900 + }, + { + "epoch": 0.76, + "learning_rate": 4.241431839772453e-05, + "loss": 0.333, + "step": 135000 + }, + { + "epoch": 0.76, + "learning_rate": 4.240869716746208e-05, + "loss": 0.324, + "step": 135100 + }, + { + "epoch": 0.76, + "learning_rate": 4.240307593719962e-05, + "loss": 0.3289, + "step": 135200 + }, + { + "epoch": 0.76, + "learning_rate": 4.239745470693716e-05, + "loss": 0.327, + "step": 135300 + }, + { + "epoch": 0.76, + "learning_rate": 4.239183347667471e-05, + "loss": 0.3261, + "step": 135400 + }, + { + "epoch": 0.76, + "learning_rate": 4.238621224641225e-05, + "loss": 0.3213, + "step": 135500 + }, + { + "epoch": 0.76, + "learning_rate": 4.2380591016149795e-05, + "loss": 0.3367, + "step": 135600 + }, + { + "epoch": 0.76, + "learning_rate": 4.237496978588734e-05, + "loss": 0.3272, + "step": 135700 + }, + { + "epoch": 0.76, + "learning_rate": 4.236934855562489e-05, + "loss": 0.331, + "step": 135800 + }, + { + "epoch": 0.76, + "learning_rate": 4.236372732536243e-05, + "loss": 0.3278, + "step": 135900 + }, + { + "epoch": 0.76, + "learning_rate": 4.2358106095099974e-05, + "loss": 0.3227, + "step": 136000 + }, + { + "epoch": 0.77, + "learning_rate": 4.235248486483752e-05, + "loss": 0.3291, + "step": 136100 + }, + { + "epoch": 0.77, + "learning_rate": 4.234686363457506e-05, + "loss": 0.3321, + "step": 136200 + }, + { + "epoch": 0.77, + "learning_rate": 4.2341242404312606e-05, + "loss": 0.3211, + "step": 136300 + }, + { + "epoch": 0.77, + "learning_rate": 4.233562117405015e-05, + "loss": 0.3348, + "step": 136400 + }, + { + "epoch": 0.77, + "learning_rate": 4.23299999437877e-05, + "loss": 0.328, + "step": 136500 + }, + { + "epoch": 0.77, + "learning_rate": 4.2324378713525244e-05, + "loss": 0.3288, + "step": 136600 + }, + { + "epoch": 0.77, + "learning_rate": 4.231875748326279e-05, + "loss": 0.3227, + "step": 136700 + }, + { + "epoch": 0.77, + "learning_rate": 4.231313625300034e-05, + "loss": 0.3297, + "step": 136800 + }, + { + "epoch": 0.77, + "learning_rate": 4.2307515022737877e-05, + "loss": 0.3348, + "step": 136900 + }, + { + "epoch": 0.77, + "learning_rate": 4.230189379247542e-05, + "loss": 0.3298, + "step": 137000 + }, + { + "epoch": 0.77, + "learning_rate": 4.229627256221297e-05, + "loss": 0.331, + "step": 137100 + }, + { + "epoch": 0.77, + "learning_rate": 4.2290651331950515e-05, + "loss": 0.3297, + "step": 137200 + }, + { + "epoch": 0.77, + "learning_rate": 4.2285030101688055e-05, + "loss": 0.3338, + "step": 137300 + }, + { + "epoch": 0.77, + "learning_rate": 4.22794088714256e-05, + "loss": 0.322, + "step": 137400 + }, + { + "epoch": 0.77, + "learning_rate": 4.227378764116315e-05, + "loss": 0.319, + "step": 137500 + }, + { + "epoch": 0.77, + "learning_rate": 4.226816641090069e-05, + "loss": 0.3226, + "step": 137600 + }, + { + "epoch": 0.77, + "learning_rate": 4.226254518063823e-05, + "loss": 0.3256, + "step": 137700 + }, + { + "epoch": 0.77, + "learning_rate": 4.225692395037578e-05, + "loss": 0.3251, + "step": 137800 + }, + { + "epoch": 0.78, + "learning_rate": 4.2251302720113326e-05, + "loss": 0.3249, + "step": 137900 + }, + { + "epoch": 0.78, + "learning_rate": 4.224568148985087e-05, + "loss": 0.327, + "step": 138000 + }, + { + "epoch": 0.78, + "learning_rate": 4.224006025958842e-05, + "loss": 0.3255, + "step": 138100 + }, + { + "epoch": 0.78, + "learning_rate": 4.2234495241628586e-05, + "loss": 0.3156, + "step": 138200 + }, + { + "epoch": 0.78, + "learning_rate": 4.222887401136613e-05, + "loss": 0.3351, + "step": 138300 + }, + { + "epoch": 0.78, + "learning_rate": 4.222325278110367e-05, + "loss": 0.3214, + "step": 138400 + }, + { + "epoch": 0.78, + "learning_rate": 4.221763155084122e-05, + "loss": 0.3266, + "step": 138500 + }, + { + "epoch": 0.78, + "learning_rate": 4.2212010320578764e-05, + "loss": 0.3271, + "step": 138600 + }, + { + "epoch": 0.78, + "learning_rate": 4.2206389090316304e-05, + "loss": 0.323, + "step": 138700 + }, + { + "epoch": 0.78, + "learning_rate": 4.220076786005385e-05, + "loss": 0.3318, + "step": 138800 + }, + { + "epoch": 0.78, + "learning_rate": 4.2195146629791396e-05, + "loss": 0.3335, + "step": 138900 + }, + { + "epoch": 0.78, + "learning_rate": 4.218952539952894e-05, + "loss": 0.3266, + "step": 139000 + }, + { + "epoch": 0.78, + "learning_rate": 4.218390416926649e-05, + "loss": 0.3273, + "step": 139100 + }, + { + "epoch": 0.78, + "learning_rate": 4.2178282939004035e-05, + "loss": 0.3272, + "step": 139200 + }, + { + "epoch": 0.78, + "learning_rate": 4.217266170874158e-05, + "loss": 0.3309, + "step": 139300 + }, + { + "epoch": 0.78, + "learning_rate": 4.216704047847912e-05, + "loss": 0.3185, + "step": 139400 + }, + { + "epoch": 0.78, + "learning_rate": 4.216141924821667e-05, + "loss": 0.325, + "step": 139500 + }, + { + "epoch": 0.78, + "learning_rate": 4.2155798017954213e-05, + "loss": 0.3273, + "step": 139600 + }, + { + "epoch": 0.79, + "learning_rate": 4.215017678769176e-05, + "loss": 0.3236, + "step": 139700 + }, + { + "epoch": 0.79, + "learning_rate": 4.21445555574293e-05, + "loss": 0.3197, + "step": 139800 + }, + { + "epoch": 0.79, + "learning_rate": 4.2138934327166846e-05, + "loss": 0.3213, + "step": 139900 + }, + { + "epoch": 0.79, + "learning_rate": 4.213331309690439e-05, + "loss": 0.3273, + "step": 140000 + }, + { + "epoch": 0.79, + "learning_rate": 4.212769186664193e-05, + "loss": 0.326, + "step": 140100 + }, + { + "epoch": 0.79, + "learning_rate": 4.212207063637948e-05, + "loss": 0.3236, + "step": 140200 + }, + { + "epoch": 0.79, + "learning_rate": 4.2116449406117024e-05, + "loss": 0.3254, + "step": 140300 + }, + { + "epoch": 0.79, + "learning_rate": 4.211082817585457e-05, + "loss": 0.3272, + "step": 140400 + }, + { + "epoch": 0.79, + "learning_rate": 4.210520694559211e-05, + "loss": 0.3215, + "step": 140500 + }, + { + "epoch": 0.79, + "learning_rate": 4.2099585715329656e-05, + "loss": 0.3243, + "step": 140600 + }, + { + "epoch": 0.79, + "learning_rate": 4.20939644850672e-05, + "loss": 0.3252, + "step": 140700 + }, + { + "epoch": 0.79, + "learning_rate": 4.208834325480475e-05, + "loss": 0.3256, + "step": 140800 + }, + { + "epoch": 0.79, + "learning_rate": 4.2082722024542295e-05, + "loss": 0.3226, + "step": 140900 + }, + { + "epoch": 0.79, + "learning_rate": 4.207710079427984e-05, + "loss": 0.3273, + "step": 141000 + }, + { + "epoch": 0.79, + "learning_rate": 4.207147956401739e-05, + "loss": 0.3333, + "step": 141100 + }, + { + "epoch": 0.79, + "learning_rate": 4.206585833375493e-05, + "loss": 0.3321, + "step": 141200 + }, + { + "epoch": 0.79, + "learning_rate": 4.206023710349247e-05, + "loss": 0.3284, + "step": 141300 + }, + { + "epoch": 0.79, + "learning_rate": 4.205461587323002e-05, + "loss": 0.3278, + "step": 141400 + }, + { + "epoch": 0.8, + "learning_rate": 4.204899464296756e-05, + "loss": 0.3261, + "step": 141500 + }, + { + "epoch": 0.8, + "learning_rate": 4.2043373412705105e-05, + "loss": 0.3258, + "step": 141600 + }, + { + "epoch": 0.8, + "learning_rate": 4.203775218244265e-05, + "loss": 0.3258, + "step": 141700 + }, + { + "epoch": 0.8, + "learning_rate": 4.20321309521802e-05, + "loss": 0.3271, + "step": 141800 + }, + { + "epoch": 0.8, + "learning_rate": 4.202650972191774e-05, + "loss": 0.3261, + "step": 141900 + }, + { + "epoch": 0.8, + "learning_rate": 4.2020888491655283e-05, + "loss": 0.323, + "step": 142000 + }, + { + "epoch": 0.8, + "learning_rate": 4.201526726139283e-05, + "loss": 0.3207, + "step": 142100 + }, + { + "epoch": 0.8, + "learning_rate": 4.2009646031130376e-05, + "loss": 0.3253, + "step": 142200 + }, + { + "epoch": 0.8, + "learning_rate": 4.200402480086792e-05, + "loss": 0.3261, + "step": 142300 + }, + { + "epoch": 0.8, + "learning_rate": 4.199840357060547e-05, + "loss": 0.3185, + "step": 142400 + }, + { + "epoch": 0.8, + "learning_rate": 4.1992782340343015e-05, + "loss": 0.3198, + "step": 142500 + }, + { + "epoch": 0.8, + "learning_rate": 4.1987161110080554e-05, + "loss": 0.319, + "step": 142600 + }, + { + "epoch": 0.8, + "learning_rate": 4.19815398798181e-05, + "loss": 0.3204, + "step": 142700 + }, + { + "epoch": 0.8, + "learning_rate": 4.197591864955565e-05, + "loss": 0.3199, + "step": 142800 + }, + { + "epoch": 0.8, + "learning_rate": 4.1970297419293186e-05, + "loss": 0.3203, + "step": 142900 + }, + { + "epoch": 0.8, + "learning_rate": 4.196467618903073e-05, + "loss": 0.3241, + "step": 143000 + }, + { + "epoch": 0.8, + "learning_rate": 4.195905495876828e-05, + "loss": 0.3238, + "step": 143100 + }, + { + "epoch": 0.8, + "learning_rate": 4.1953433728505825e-05, + "loss": 0.3321, + "step": 143200 + }, + { + "epoch": 0.81, + "learning_rate": 4.1947812498243365e-05, + "loss": 0.3206, + "step": 143300 + }, + { + "epoch": 0.81, + "learning_rate": 4.194219126798091e-05, + "loss": 0.3262, + "step": 143400 + }, + { + "epoch": 0.81, + "learning_rate": 4.193657003771846e-05, + "loss": 0.3166, + "step": 143500 + }, + { + "epoch": 0.81, + "learning_rate": 4.1930948807456e-05, + "loss": 0.3251, + "step": 143600 + }, + { + "epoch": 0.81, + "learning_rate": 4.192532757719354e-05, + "loss": 0.3242, + "step": 143700 + }, + { + "epoch": 0.81, + "learning_rate": 4.191970634693109e-05, + "loss": 0.3181, + "step": 143800 + }, + { + "epoch": 0.81, + "learning_rate": 4.1914085116668636e-05, + "loss": 0.333, + "step": 143900 + }, + { + "epoch": 0.81, + "learning_rate": 4.190846388640618e-05, + "loss": 0.3202, + "step": 144000 + }, + { + "epoch": 0.81, + "learning_rate": 4.190284265614373e-05, + "loss": 0.3258, + "step": 144100 + }, + { + "epoch": 0.81, + "learning_rate": 4.1897221425881274e-05, + "loss": 0.3206, + "step": 144200 + }, + { + "epoch": 0.81, + "learning_rate": 4.1891600195618814e-05, + "loss": 0.3171, + "step": 144300 + }, + { + "epoch": 0.81, + "learning_rate": 4.188597896535636e-05, + "loss": 0.3269, + "step": 144400 + }, + { + "epoch": 0.81, + "learning_rate": 4.1880357735093907e-05, + "loss": 0.3154, + "step": 144500 + }, + { + "epoch": 0.81, + "learning_rate": 4.187473650483145e-05, + "loss": 0.3145, + "step": 144600 + }, + { + "epoch": 0.81, + "learning_rate": 4.1869171486871614e-05, + "loss": 0.324, + "step": 144700 + }, + { + "epoch": 0.81, + "learning_rate": 4.186355025660916e-05, + "loss": 0.3212, + "step": 144800 + }, + { + "epoch": 0.81, + "learning_rate": 4.1857929026346706e-05, + "loss": 0.317, + "step": 144900 + }, + { + "epoch": 0.82, + "learning_rate": 4.185230779608425e-05, + "loss": 0.3184, + "step": 145000 + }, + { + "epoch": 0.82, + "learning_rate": 4.18466865658218e-05, + "loss": 0.3268, + "step": 145100 + }, + { + "epoch": 0.82, + "learning_rate": 4.1841065335559345e-05, + "loss": 0.3273, + "step": 145200 + }, + { + "epoch": 0.82, + "learning_rate": 4.183544410529689e-05, + "loss": 0.3234, + "step": 145300 + }, + { + "epoch": 0.82, + "learning_rate": 4.182987908733705e-05, + "loss": 0.3202, + "step": 145400 + }, + { + "epoch": 0.82, + "learning_rate": 4.18242578570746e-05, + "loss": 0.3237, + "step": 145500 + }, + { + "epoch": 0.82, + "learning_rate": 4.1818636626812145e-05, + "loss": 0.3203, + "step": 145600 + }, + { + "epoch": 0.82, + "learning_rate": 4.181301539654969e-05, + "loss": 0.3145, + "step": 145700 + }, + { + "epoch": 0.82, + "learning_rate": 4.180739416628723e-05, + "loss": 0.3236, + "step": 145800 + }, + { + "epoch": 0.82, + "learning_rate": 4.180177293602478e-05, + "loss": 0.3183, + "step": 145900 + }, + { + "epoch": 0.82, + "learning_rate": 4.179615170576232e-05, + "loss": 0.3176, + "step": 146000 + }, + { + "epoch": 0.82, + "learning_rate": 4.179053047549987e-05, + "loss": 0.3248, + "step": 146100 + }, + { + "epoch": 0.82, + "learning_rate": 4.1784909245237416e-05, + "loss": 0.3201, + "step": 146200 + }, + { + "epoch": 0.82, + "learning_rate": 4.177928801497496e-05, + "loss": 0.3197, + "step": 146300 + }, + { + "epoch": 0.82, + "learning_rate": 4.177366678471251e-05, + "loss": 0.3284, + "step": 146400 + }, + { + "epoch": 0.82, + "learning_rate": 4.176804555445005e-05, + "loss": 0.3115, + "step": 146500 + }, + { + "epoch": 0.82, + "learning_rate": 4.1762424324187594e-05, + "loss": 0.3218, + "step": 146600 + }, + { + "epoch": 0.82, + "learning_rate": 4.175680309392514e-05, + "loss": 0.3137, + "step": 146700 + }, + { + "epoch": 0.83, + "learning_rate": 4.175118186366268e-05, + "loss": 0.3255, + "step": 146800 + }, + { + "epoch": 0.83, + "learning_rate": 4.1745560633400226e-05, + "loss": 0.3236, + "step": 146900 + }, + { + "epoch": 0.83, + "learning_rate": 4.173993940313777e-05, + "loss": 0.3162, + "step": 147000 + }, + { + "epoch": 0.83, + "learning_rate": 4.173431817287532e-05, + "loss": 0.3225, + "step": 147100 + }, + { + "epoch": 0.83, + "learning_rate": 4.172869694261286e-05, + "loss": 0.3126, + "step": 147200 + }, + { + "epoch": 0.83, + "learning_rate": 4.1723075712350404e-05, + "loss": 0.3202, + "step": 147300 + }, + { + "epoch": 0.83, + "learning_rate": 4.171745448208795e-05, + "loss": 0.3198, + "step": 147400 + }, + { + "epoch": 0.83, + "learning_rate": 4.17118332518255e-05, + "loss": 0.3205, + "step": 147500 + }, + { + "epoch": 0.83, + "learning_rate": 4.170621202156304e-05, + "loss": 0.3131, + "step": 147600 + }, + { + "epoch": 0.83, + "learning_rate": 4.170059079130059e-05, + "loss": 0.3103, + "step": 147700 + }, + { + "epoch": 0.83, + "learning_rate": 4.1694969561038136e-05, + "loss": 0.3165, + "step": 147800 + }, + { + "epoch": 0.83, + "learning_rate": 4.1689348330775675e-05, + "loss": 0.3117, + "step": 147900 + }, + { + "epoch": 0.83, + "learning_rate": 4.168372710051322e-05, + "loss": 0.3179, + "step": 148000 + }, + { + "epoch": 0.83, + "learning_rate": 4.167810587025077e-05, + "loss": 0.3159, + "step": 148100 + }, + { + "epoch": 0.83, + "learning_rate": 4.167248463998831e-05, + "loss": 0.3249, + "step": 148200 + }, + { + "epoch": 0.83, + "learning_rate": 4.1666863409725854e-05, + "loss": 0.309, + "step": 148300 + }, + { + "epoch": 0.83, + "learning_rate": 4.16612421794634e-05, + "loss": 0.3195, + "step": 148400 + }, + { + "epoch": 0.83, + "learning_rate": 4.1655620949200946e-05, + "loss": 0.3216, + "step": 148500 + }, + { + "epoch": 0.84, + "learning_rate": 4.1649999718938486e-05, + "loss": 0.3126, + "step": 148600 + }, + { + "epoch": 0.84, + "learning_rate": 4.164437848867603e-05, + "loss": 0.3118, + "step": 148700 + }, + { + "epoch": 0.84, + "learning_rate": 4.1638813470716206e-05, + "loss": 0.32, + "step": 148800 + }, + { + "epoch": 0.84, + "learning_rate": 4.163319224045375e-05, + "loss": 0.3136, + "step": 148900 + }, + { + "epoch": 0.84, + "learning_rate": 4.162757101019129e-05, + "loss": 0.3166, + "step": 149000 + }, + { + "epoch": 0.84, + "learning_rate": 4.162194977992884e-05, + "loss": 0.3172, + "step": 149100 + }, + { + "epoch": 0.84, + "learning_rate": 4.1616328549666385e-05, + "loss": 0.3067, + "step": 149200 + }, + { + "epoch": 0.84, + "learning_rate": 4.1610707319403924e-05, + "loss": 0.3125, + "step": 149300 + }, + { + "epoch": 0.84, + "learning_rate": 4.160508608914147e-05, + "loss": 0.3172, + "step": 149400 + }, + { + "epoch": 0.84, + "learning_rate": 4.159946485887902e-05, + "loss": 0.3221, + "step": 149500 + }, + { + "epoch": 0.84, + "learning_rate": 4.159384362861656e-05, + "loss": 0.3164, + "step": 149600 + }, + { + "epoch": 0.84, + "learning_rate": 4.15882223983541e-05, + "loss": 0.3138, + "step": 149700 + }, + { + "epoch": 0.84, + "learning_rate": 4.158260116809165e-05, + "loss": 0.3164, + "step": 149800 + }, + { + "epoch": 0.84, + "learning_rate": 4.1576979937829195e-05, + "loss": 0.3113, + "step": 149900 + }, + { + "epoch": 0.84, + "learning_rate": 4.1571358707566735e-05, + "loss": 0.3116, + "step": 150000 + }, + { + "epoch": 0.84, + "learning_rate": 4.156573747730428e-05, + "loss": 0.3182, + "step": 150100 + }, + { + "epoch": 0.84, + "learning_rate": 4.156011624704183e-05, + "loss": 0.3136, + "step": 150200 + }, + { + "epoch": 0.84, + "learning_rate": 4.155449501677937e-05, + "loss": 0.3072, + "step": 150300 + }, + { + "epoch": 0.85, + "learning_rate": 4.154887378651692e-05, + "loss": 0.311, + "step": 150400 + }, + { + "epoch": 0.85, + "learning_rate": 4.1543252556254466e-05, + "loss": 0.3104, + "step": 150500 + }, + { + "epoch": 0.85, + "learning_rate": 4.153763132599201e-05, + "loss": 0.3065, + "step": 150600 + }, + { + "epoch": 0.85, + "learning_rate": 4.153201009572955e-05, + "loss": 0.3189, + "step": 150700 + }, + { + "epoch": 0.85, + "learning_rate": 4.15263888654671e-05, + "loss": 0.3161, + "step": 150800 + }, + { + "epoch": 0.85, + "learning_rate": 4.1520767635204644e-05, + "loss": 0.3194, + "step": 150900 + }, + { + "epoch": 0.85, + "learning_rate": 4.151514640494219e-05, + "loss": 0.3179, + "step": 151000 + }, + { + "epoch": 0.85, + "learning_rate": 4.150952517467973e-05, + "loss": 0.3203, + "step": 151100 + }, + { + "epoch": 0.85, + "learning_rate": 4.1503903944417276e-05, + "loss": 0.3189, + "step": 151200 + }, + { + "epoch": 0.85, + "learning_rate": 4.149828271415482e-05, + "loss": 0.3156, + "step": 151300 + }, + { + "epoch": 0.85, + "learning_rate": 4.149266148389236e-05, + "loss": 0.3114, + "step": 151400 + }, + { + "epoch": 0.85, + "learning_rate": 4.148704025362991e-05, + "loss": 0.3108, + "step": 151500 + }, + { + "epoch": 0.85, + "learning_rate": 4.1481419023367455e-05, + "loss": 0.3097, + "step": 151600 + }, + { + "epoch": 0.85, + "learning_rate": 4.1475797793105e-05, + "loss": 0.324, + "step": 151700 + }, + { + "epoch": 0.85, + "learning_rate": 4.147017656284255e-05, + "loss": 0.3139, + "step": 151800 + }, + { + "epoch": 0.85, + "learning_rate": 4.1464555332580093e-05, + "loss": 0.3124, + "step": 151900 + }, + { + "epoch": 0.85, + "learning_rate": 4.145893410231764e-05, + "loss": 0.318, + "step": 152000 + }, + { + "epoch": 0.85, + "learning_rate": 4.145331287205518e-05, + "loss": 0.3108, + "step": 152100 + }, + { + "epoch": 0.86, + "learning_rate": 4.1447691641792726e-05, + "loss": 0.319, + "step": 152200 + }, + { + "epoch": 0.86, + "learning_rate": 4.144207041153027e-05, + "loss": 0.3191, + "step": 152300 + }, + { + "epoch": 0.86, + "learning_rate": 4.143644918126782e-05, + "loss": 0.3115, + "step": 152400 + }, + { + "epoch": 0.86, + "learning_rate": 4.143082795100536e-05, + "loss": 0.3116, + "step": 152500 + }, + { + "epoch": 0.86, + "learning_rate": 4.1425206720742904e-05, + "loss": 0.3197, + "step": 152600 + }, + { + "epoch": 0.86, + "learning_rate": 4.141958549048045e-05, + "loss": 0.3138, + "step": 152700 + }, + { + "epoch": 0.86, + "learning_rate": 4.141396426021799e-05, + "loss": 0.3171, + "step": 152800 + }, + { + "epoch": 0.86, + "learning_rate": 4.1408343029955536e-05, + "loss": 0.3194, + "step": 152900 + }, + { + "epoch": 0.86, + "learning_rate": 4.140272179969308e-05, + "loss": 0.3109, + "step": 153000 + }, + { + "epoch": 0.86, + "learning_rate": 4.139710056943063e-05, + "loss": 0.3133, + "step": 153100 + }, + { + "epoch": 0.86, + "learning_rate": 4.139147933916817e-05, + "loss": 0.3119, + "step": 153200 + }, + { + "epoch": 0.86, + "learning_rate": 4.1385858108905714e-05, + "loss": 0.3038, + "step": 153300 + }, + { + "epoch": 0.86, + "learning_rate": 4.138023687864326e-05, + "loss": 0.3085, + "step": 153400 + }, + { + "epoch": 0.86, + "learning_rate": 4.137461564838081e-05, + "loss": 0.3147, + "step": 153500 + }, + { + "epoch": 0.86, + "learning_rate": 4.136899441811835e-05, + "loss": 0.3172, + "step": 153600 + }, + { + "epoch": 0.86, + "learning_rate": 4.13633731878559e-05, + "loss": 0.3064, + "step": 153700 + }, + { + "epoch": 0.86, + "learning_rate": 4.1357751957593446e-05, + "loss": 0.3089, + "step": 153800 + }, + { + "epoch": 0.87, + "learning_rate": 4.1352186939633606e-05, + "loss": 0.3108, + "step": 153900 + }, + { + "epoch": 0.87, + "learning_rate": 4.134656570937115e-05, + "loss": 0.3099, + "step": 154000 + }, + { + "epoch": 0.87, + "learning_rate": 4.13409444791087e-05, + "loss": 0.3197, + "step": 154100 + }, + { + "epoch": 0.87, + "learning_rate": 4.1335323248846245e-05, + "loss": 0.3101, + "step": 154200 + }, + { + "epoch": 0.87, + "learning_rate": 4.1329702018583785e-05, + "loss": 0.316, + "step": 154300 + }, + { + "epoch": 0.87, + "learning_rate": 4.132408078832133e-05, + "loss": 0.3225, + "step": 154400 + }, + { + "epoch": 0.87, + "learning_rate": 4.131845955805888e-05, + "loss": 0.3074, + "step": 154500 + }, + { + "epoch": 0.87, + "learning_rate": 4.1312838327796424e-05, + "loss": 0.3193, + "step": 154600 + }, + { + "epoch": 0.87, + "learning_rate": 4.130721709753397e-05, + "loss": 0.3122, + "step": 154700 + }, + { + "epoch": 0.87, + "learning_rate": 4.1301595867271516e-05, + "loss": 0.3126, + "step": 154800 + }, + { + "epoch": 0.87, + "learning_rate": 4.129597463700906e-05, + "loss": 0.3118, + "step": 154900 + }, + { + "epoch": 0.87, + "learning_rate": 4.12903534067466e-05, + "loss": 0.3097, + "step": 155000 + }, + { + "epoch": 0.87, + "learning_rate": 4.128473217648415e-05, + "loss": 0.3091, + "step": 155100 + }, + { + "epoch": 0.87, + "learning_rate": 4.1279110946221695e-05, + "loss": 0.3091, + "step": 155200 + }, + { + "epoch": 0.87, + "learning_rate": 4.1273489715959234e-05, + "loss": 0.3102, + "step": 155300 + }, + { + "epoch": 0.87, + "learning_rate": 4.126786848569678e-05, + "loss": 0.3094, + "step": 155400 + }, + { + "epoch": 0.87, + "learning_rate": 4.1262247255434327e-05, + "loss": 0.3121, + "step": 155500 + }, + { + "epoch": 0.87, + "learning_rate": 4.1256626025171866e-05, + "loss": 0.3135, + "step": 155600 + }, + { + "epoch": 0.88, + "learning_rate": 4.125100479490941e-05, + "loss": 0.3175, + "step": 155700 + }, + { + "epoch": 0.88, + "learning_rate": 4.124538356464696e-05, + "loss": 0.3198, + "step": 155800 + }, + { + "epoch": 0.88, + "learning_rate": 4.1239762334384505e-05, + "loss": 0.3069, + "step": 155900 + }, + { + "epoch": 0.88, + "learning_rate": 4.123414110412205e-05, + "loss": 0.3121, + "step": 156000 + }, + { + "epoch": 0.88, + "learning_rate": 4.12285198738596e-05, + "loss": 0.3082, + "step": 156100 + }, + { + "epoch": 0.88, + "learning_rate": 4.1222954855899765e-05, + "loss": 0.3108, + "step": 156200 + }, + { + "epoch": 0.88, + "learning_rate": 4.121733362563731e-05, + "loss": 0.3136, + "step": 156300 + }, + { + "epoch": 0.88, + "learning_rate": 4.121171239537485e-05, + "loss": 0.3043, + "step": 156400 + }, + { + "epoch": 0.88, + "learning_rate": 4.12060911651124e-05, + "loss": 0.3053, + "step": 156500 + }, + { + "epoch": 0.88, + "learning_rate": 4.1200469934849943e-05, + "loss": 0.3173, + "step": 156600 + }, + { + "epoch": 0.88, + "learning_rate": 4.119484870458748e-05, + "loss": 0.3136, + "step": 156700 + }, + { + "epoch": 0.88, + "learning_rate": 4.118922747432503e-05, + "loss": 0.3135, + "step": 156800 + }, + { + "epoch": 0.88, + "learning_rate": 4.1183606244062575e-05, + "loss": 0.3194, + "step": 156900 + }, + { + "epoch": 0.88, + "learning_rate": 4.117798501380012e-05, + "loss": 0.3088, + "step": 157000 + }, + { + "epoch": 0.88, + "learning_rate": 4.117236378353767e-05, + "loss": 0.312, + "step": 157100 + }, + { + "epoch": 0.88, + "learning_rate": 4.1166742553275214e-05, + "loss": 0.3082, + "step": 157200 + }, + { + "epoch": 0.88, + "learning_rate": 4.116112132301276e-05, + "loss": 0.3051, + "step": 157300 + }, + { + "epoch": 0.88, + "learning_rate": 4.11555000927503e-05, + "loss": 0.3092, + "step": 157400 + }, + { + "epoch": 0.89, + "learning_rate": 4.1149878862487846e-05, + "loss": 0.3192, + "step": 157500 + }, + { + "epoch": 0.89, + "learning_rate": 4.114425763222539e-05, + "loss": 0.3201, + "step": 157600 + }, + { + "epoch": 0.89, + "learning_rate": 4.113863640196294e-05, + "loss": 0.3141, + "step": 157700 + }, + { + "epoch": 0.89, + "learning_rate": 4.113301517170048e-05, + "loss": 0.3059, + "step": 157800 + }, + { + "epoch": 0.89, + "learning_rate": 4.1127393941438025e-05, + "loss": 0.3091, + "step": 157900 + }, + { + "epoch": 0.89, + "learning_rate": 4.112177271117557e-05, + "loss": 0.313, + "step": 158000 + }, + { + "epoch": 0.89, + "learning_rate": 4.111615148091311e-05, + "loss": 0.3086, + "step": 158100 + }, + { + "epoch": 0.89, + "learning_rate": 4.111053025065066e-05, + "loss": 0.313, + "step": 158200 + }, + { + "epoch": 0.89, + "learning_rate": 4.11049090203882e-05, + "loss": 0.3101, + "step": 158300 + }, + { + "epoch": 0.89, + "learning_rate": 4.109928779012575e-05, + "loss": 0.313, + "step": 158400 + }, + { + "epoch": 0.89, + "learning_rate": 4.109372277216592e-05, + "loss": 0.3153, + "step": 158500 + }, + { + "epoch": 0.89, + "learning_rate": 4.108810154190346e-05, + "loss": 0.3104, + "step": 158600 + }, + { + "epoch": 0.89, + "learning_rate": 4.108248031164101e-05, + "loss": 0.3086, + "step": 158700 + }, + { + "epoch": 0.89, + "learning_rate": 4.1076859081378556e-05, + "loss": 0.319, + "step": 158800 + }, + { + "epoch": 0.89, + "learning_rate": 4.1071237851116095e-05, + "loss": 0.3078, + "step": 158900 + }, + { + "epoch": 0.89, + "learning_rate": 4.106561662085364e-05, + "loss": 0.3147, + "step": 159000 + }, + { + "epoch": 0.89, + "learning_rate": 4.105999539059119e-05, + "loss": 0.3103, + "step": 159100 + }, + { + "epoch": 0.89, + "learning_rate": 4.105437416032873e-05, + "loss": 0.31, + "step": 159200 + }, + { + "epoch": 0.9, + "learning_rate": 4.1048752930066274e-05, + "loss": 0.3041, + "step": 159300 + }, + { + "epoch": 0.9, + "learning_rate": 4.104313169980382e-05, + "loss": 0.3064, + "step": 159400 + }, + { + "epoch": 0.9, + "learning_rate": 4.1037510469541366e-05, + "loss": 0.3106, + "step": 159500 + }, + { + "epoch": 0.9, + "learning_rate": 4.1031889239278906e-05, + "loss": 0.3125, + "step": 159600 + }, + { + "epoch": 0.9, + "learning_rate": 4.102626800901645e-05, + "loss": 0.3072, + "step": 159700 + }, + { + "epoch": 0.9, + "learning_rate": 4.1020646778754005e-05, + "loss": 0.312, + "step": 159800 + }, + { + "epoch": 0.9, + "learning_rate": 4.1015025548491544e-05, + "loss": 0.3226, + "step": 159900 + }, + { + "epoch": 0.9, + "learning_rate": 4.100940431822909e-05, + "loss": 0.311, + "step": 160000 + }, + { + "epoch": 0.9, + "learning_rate": 4.100378308796664e-05, + "loss": 0.3071, + "step": 160100 + }, + { + "epoch": 0.9, + "learning_rate": 4.099816185770418e-05, + "loss": 0.3092, + "step": 160200 + }, + { + "epoch": 0.9, + "learning_rate": 4.099254062744172e-05, + "loss": 0.3109, + "step": 160300 + }, + { + "epoch": 0.9, + "learning_rate": 4.098691939717927e-05, + "loss": 0.3072, + "step": 160400 + }, + { + "epoch": 0.9, + "learning_rate": 4.098135437921944e-05, + "loss": 0.2962, + "step": 160500 + }, + { + "epoch": 0.9, + "learning_rate": 4.097573314895698e-05, + "loss": 0.2932, + "step": 160600 + }, + { + "epoch": 0.9, + "learning_rate": 4.097011191869453e-05, + "loss": 0.3108, + "step": 160700 + }, + { + "epoch": 0.9, + "learning_rate": 4.0964490688432076e-05, + "loss": 0.297, + "step": 160800 + }, + { + "epoch": 0.9, + "learning_rate": 4.095886945816962e-05, + "loss": 0.3126, + "step": 160900 + }, + { + "epoch": 0.91, + "learning_rate": 4.095324822790716e-05, + "loss": 0.3036, + "step": 161000 + }, + { + "epoch": 0.91, + "learning_rate": 4.094762699764471e-05, + "loss": 0.3062, + "step": 161100 + }, + { + "epoch": 0.91, + "learning_rate": 4.0942005767382254e-05, + "loss": 0.3134, + "step": 161200 + }, + { + "epoch": 0.91, + "learning_rate": 4.09363845371198e-05, + "loss": 0.3054, + "step": 161300 + }, + { + "epoch": 0.91, + "learning_rate": 4.093076330685734e-05, + "loss": 0.303, + "step": 161400 + }, + { + "epoch": 0.91, + "learning_rate": 4.0925142076594886e-05, + "loss": 0.3109, + "step": 161500 + }, + { + "epoch": 0.91, + "learning_rate": 4.091952084633243e-05, + "loss": 0.3111, + "step": 161600 + }, + { + "epoch": 0.91, + "learning_rate": 4.091389961606997e-05, + "loss": 0.3098, + "step": 161700 + }, + { + "epoch": 0.91, + "learning_rate": 4.090827838580752e-05, + "loss": 0.3082, + "step": 161800 + }, + { + "epoch": 0.91, + "learning_rate": 4.0902657155545064e-05, + "loss": 0.3076, + "step": 161900 + }, + { + "epoch": 0.91, + "learning_rate": 4.089703592528261e-05, + "loss": 0.309, + "step": 162000 + }, + { + "epoch": 0.91, + "learning_rate": 4.089141469502015e-05, + "loss": 0.3028, + "step": 162100 + }, + { + "epoch": 0.91, + "learning_rate": 4.0885793464757696e-05, + "loss": 0.3135, + "step": 162200 + }, + { + "epoch": 0.91, + "learning_rate": 4.088022844679787e-05, + "loss": 0.3023, + "step": 162300 + }, + { + "epoch": 0.91, + "learning_rate": 4.087460721653542e-05, + "loss": 0.3119, + "step": 162400 + }, + { + "epoch": 0.91, + "learning_rate": 4.0868985986272957e-05, + "loss": 0.3161, + "step": 162500 + }, + { + "epoch": 0.91, + "learning_rate": 4.08633647560105e-05, + "loss": 0.3108, + "step": 162600 + }, + { + "epoch": 0.91, + "learning_rate": 4.085774352574805e-05, + "loss": 0.3113, + "step": 162700 + }, + { + "epoch": 0.92, + "learning_rate": 4.085212229548559e-05, + "loss": 0.309, + "step": 162800 + }, + { + "epoch": 0.92, + "learning_rate": 4.0846501065223135e-05, + "loss": 0.307, + "step": 162900 + }, + { + "epoch": 0.92, + "learning_rate": 4.084087983496068e-05, + "loss": 0.3095, + "step": 163000 + }, + { + "epoch": 0.92, + "learning_rate": 4.083525860469823e-05, + "loss": 0.3005, + "step": 163100 + }, + { + "epoch": 0.92, + "learning_rate": 4.082963737443577e-05, + "loss": 0.3105, + "step": 163200 + }, + { + "epoch": 0.92, + "learning_rate": 4.082401614417331e-05, + "loss": 0.3021, + "step": 163300 + }, + { + "epoch": 0.92, + "learning_rate": 4.081839491391086e-05, + "loss": 0.2954, + "step": 163400 + }, + { + "epoch": 0.92, + "learning_rate": 4.0812773683648406e-05, + "loss": 0.301, + "step": 163500 + }, + { + "epoch": 0.92, + "learning_rate": 4.080715245338595e-05, + "loss": 0.3, + "step": 163600 + }, + { + "epoch": 0.92, + "learning_rate": 4.08015312231235e-05, + "loss": 0.3089, + "step": 163700 + }, + { + "epoch": 0.92, + "learning_rate": 4.0795909992861045e-05, + "loss": 0.3157, + "step": 163800 + }, + { + "epoch": 0.92, + "learning_rate": 4.0790288762598584e-05, + "loss": 0.3038, + "step": 163900 + }, + { + "epoch": 0.92, + "learning_rate": 4.078466753233613e-05, + "loss": 0.3129, + "step": 164000 + }, + { + "epoch": 0.92, + "learning_rate": 4.077904630207368e-05, + "loss": 0.3095, + "step": 164100 + }, + { + "epoch": 0.92, + "learning_rate": 4.0773425071811216e-05, + "loss": 0.3007, + "step": 164200 + }, + { + "epoch": 0.92, + "learning_rate": 4.076780384154876e-05, + "loss": 0.2909, + "step": 164300 + }, + { + "epoch": 0.92, + "learning_rate": 4.076218261128631e-05, + "loss": 0.3108, + "step": 164400 + }, + { + "epoch": 0.92, + "learning_rate": 4.0756561381023855e-05, + "loss": 0.3043, + "step": 164500 + }, + { + "epoch": 0.93, + "learning_rate": 4.0750940150761394e-05, + "loss": 0.3085, + "step": 164600 + }, + { + "epoch": 0.93, + "learning_rate": 4.074531892049894e-05, + "loss": 0.3072, + "step": 164700 + }, + { + "epoch": 0.93, + "learning_rate": 4.073969769023649e-05, + "loss": 0.3033, + "step": 164800 + }, + { + "epoch": 0.93, + "learning_rate": 4.0734076459974027e-05, + "loss": 0.3106, + "step": 164900 + }, + { + "epoch": 0.93, + "learning_rate": 4.072845522971158e-05, + "loss": 0.3089, + "step": 165000 + }, + { + "epoch": 0.93, + "learning_rate": 4.0722833999449126e-05, + "loss": 0.3056, + "step": 165100 + }, + { + "epoch": 0.93, + "learning_rate": 4.0717212769186665e-05, + "loss": 0.3034, + "step": 165200 + }, + { + "epoch": 0.93, + "learning_rate": 4.071159153892421e-05, + "loss": 0.2933, + "step": 165300 + }, + { + "epoch": 0.93, + "learning_rate": 4.070597030866176e-05, + "loss": 0.302, + "step": 165400 + }, + { + "epoch": 0.93, + "learning_rate": 4.0700349078399304e-05, + "loss": 0.3056, + "step": 165500 + }, + { + "epoch": 0.93, + "learning_rate": 4.0694727848136844e-05, + "loss": 0.307, + "step": 165600 + }, + { + "epoch": 0.93, + "learning_rate": 4.068910661787439e-05, + "loss": 0.3023, + "step": 165700 + }, + { + "epoch": 0.93, + "learning_rate": 4.0683485387611936e-05, + "loss": 0.3057, + "step": 165800 + }, + { + "epoch": 0.93, + "learning_rate": 4.0677864157349476e-05, + "loss": 0.2957, + "step": 165900 + }, + { + "epoch": 0.93, + "learning_rate": 4.067229913938965e-05, + "loss": 0.303, + "step": 166000 + }, + { + "epoch": 0.93, + "learning_rate": 4.0666677909127196e-05, + "loss": 0.3088, + "step": 166100 + }, + { + "epoch": 0.93, + "learning_rate": 4.066105667886474e-05, + "loss": 0.3007, + "step": 166200 + }, + { + "epoch": 0.93, + "learning_rate": 4.065543544860228e-05, + "loss": 0.3011, + "step": 166300 + }, + { + "epoch": 0.94, + "learning_rate": 4.064981421833983e-05, + "loss": 0.3102, + "step": 166400 + }, + { + "epoch": 0.94, + "learning_rate": 4.0644192988077375e-05, + "loss": 0.302, + "step": 166500 + }, + { + "epoch": 0.94, + "learning_rate": 4.063857175781492e-05, + "loss": 0.2988, + "step": 166600 + }, + { + "epoch": 0.94, + "learning_rate": 4.063295052755246e-05, + "loss": 0.3044, + "step": 166700 + }, + { + "epoch": 0.94, + "learning_rate": 4.062732929729001e-05, + "loss": 0.3064, + "step": 166800 + }, + { + "epoch": 0.94, + "learning_rate": 4.062170806702755e-05, + "loss": 0.3075, + "step": 166900 + }, + { + "epoch": 0.94, + "learning_rate": 4.061608683676509e-05, + "loss": 0.3013, + "step": 167000 + }, + { + "epoch": 0.94, + "learning_rate": 4.061046560650264e-05, + "loss": 0.3094, + "step": 167100 + }, + { + "epoch": 0.94, + "learning_rate": 4.0604844376240185e-05, + "loss": 0.3011, + "step": 167200 + }, + { + "epoch": 0.94, + "learning_rate": 4.059922314597773e-05, + "loss": 0.3003, + "step": 167300 + }, + { + "epoch": 0.94, + "learning_rate": 4.059360191571527e-05, + "loss": 0.3008, + "step": 167400 + }, + { + "epoch": 0.94, + "learning_rate": 4.058798068545282e-05, + "loss": 0.3021, + "step": 167500 + }, + { + "epoch": 0.94, + "learning_rate": 4.0582359455190363e-05, + "loss": 0.306, + "step": 167600 + }, + { + "epoch": 0.94, + "learning_rate": 4.057673822492791e-05, + "loss": 0.3079, + "step": 167700 + }, + { + "epoch": 0.94, + "learning_rate": 4.0571116994665456e-05, + "loss": 0.3028, + "step": 167800 + }, + { + "epoch": 0.94, + "learning_rate": 4.0565495764403e-05, + "loss": 0.302, + "step": 167900 + }, + { + "epoch": 0.94, + "learning_rate": 4.055987453414055e-05, + "loss": 0.3083, + "step": 168000 + }, + { + "epoch": 0.94, + "learning_rate": 4.055425330387809e-05, + "loss": 0.3092, + "step": 168100 + }, + { + "epoch": 0.95, + "learning_rate": 4.0548632073615634e-05, + "loss": 0.3064, + "step": 168200 + }, + { + "epoch": 0.95, + "learning_rate": 4.054301084335318e-05, + "loss": 0.2982, + "step": 168300 + }, + { + "epoch": 0.95, + "learning_rate": 4.053738961309072e-05, + "loss": 0.3057, + "step": 168400 + }, + { + "epoch": 0.95, + "learning_rate": 4.0531768382828266e-05, + "loss": 0.3014, + "step": 168500 + }, + { + "epoch": 0.95, + "learning_rate": 4.052614715256581e-05, + "loss": 0.2918, + "step": 168600 + }, + { + "epoch": 0.95, + "learning_rate": 4.052052592230336e-05, + "loss": 0.2942, + "step": 168700 + }, + { + "epoch": 0.95, + "learning_rate": 4.05149046920409e-05, + "loss": 0.3079, + "step": 168800 + }, + { + "epoch": 0.95, + "learning_rate": 4.0509283461778445e-05, + "loss": 0.2968, + "step": 168900 + }, + { + "epoch": 0.95, + "learning_rate": 4.050366223151599e-05, + "loss": 0.3043, + "step": 169000 + }, + { + "epoch": 0.95, + "learning_rate": 4.049804100125353e-05, + "loss": 0.3036, + "step": 169100 + }, + { + "epoch": 0.95, + "learning_rate": 4.0492419770991084e-05, + "loss": 0.2986, + "step": 169200 + }, + { + "epoch": 0.95, + "learning_rate": 4.048679854072863e-05, + "loss": 0.2994, + "step": 169300 + }, + { + "epoch": 0.95, + "learning_rate": 4.0481177310466176e-05, + "loss": 0.3039, + "step": 169400 + }, + { + "epoch": 0.95, + "learning_rate": 4.0475556080203716e-05, + "loss": 0.3091, + "step": 169500 + }, + { + "epoch": 0.95, + "learning_rate": 4.046993484994126e-05, + "loss": 0.3051, + "step": 169600 + }, + { + "epoch": 0.95, + "learning_rate": 4.046431361967881e-05, + "loss": 0.3023, + "step": 169700 + }, + { + "epoch": 0.95, + "learning_rate": 4.045869238941635e-05, + "loss": 0.2971, + "step": 169800 + }, + { + "epoch": 0.96, + "learning_rate": 4.0453071159153894e-05, + "loss": 0.3035, + "step": 169900 + }, + { + "epoch": 0.96, + "learning_rate": 4.044744992889144e-05, + "loss": 0.3042, + "step": 170000 + }, + { + "epoch": 0.96, + "learning_rate": 4.0441828698628987e-05, + "loss": 0.304, + "step": 170100 + }, + { + "epoch": 0.96, + "learning_rate": 4.0436207468366526e-05, + "loss": 0.3095, + "step": 170200 + }, + { + "epoch": 0.96, + "learning_rate": 4.04306424504067e-05, + "loss": 0.3093, + "step": 170300 + }, + { + "epoch": 0.96, + "learning_rate": 4.042502122014425e-05, + "loss": 0.3008, + "step": 170400 + }, + { + "epoch": 0.96, + "learning_rate": 4.041939998988179e-05, + "loss": 0.2969, + "step": 170500 + }, + { + "epoch": 0.96, + "learning_rate": 4.041377875961933e-05, + "loss": 0.2978, + "step": 170600 + }, + { + "epoch": 0.96, + "learning_rate": 4.040815752935688e-05, + "loss": 0.3068, + "step": 170700 + }, + { + "epoch": 0.96, + "learning_rate": 4.0402536299094425e-05, + "loss": 0.3022, + "step": 170800 + }, + { + "epoch": 0.96, + "learning_rate": 4.0396915068831965e-05, + "loss": 0.3008, + "step": 170900 + }, + { + "epoch": 0.96, + "learning_rate": 4.039129383856951e-05, + "loss": 0.3055, + "step": 171000 + }, + { + "epoch": 0.96, + "learning_rate": 4.038567260830706e-05, + "loss": 0.3112, + "step": 171100 + }, + { + "epoch": 0.96, + "learning_rate": 4.03800513780446e-05, + "loss": 0.2976, + "step": 171200 + }, + { + "epoch": 0.96, + "learning_rate": 4.037443014778214e-05, + "loss": 0.2935, + "step": 171300 + }, + { + "epoch": 0.96, + "learning_rate": 4.036880891751969e-05, + "loss": 0.2915, + "step": 171400 + }, + { + "epoch": 0.96, + "learning_rate": 4.0363187687257235e-05, + "loss": 0.2984, + "step": 171500 + }, + { + "epoch": 0.96, + "learning_rate": 4.0357566456994775e-05, + "loss": 0.305, + "step": 171600 + }, + { + "epoch": 0.97, + "learning_rate": 4.035194522673232e-05, + "loss": 0.3036, + "step": 171700 + }, + { + "epoch": 0.97, + "learning_rate": 4.034632399646987e-05, + "loss": 0.3045, + "step": 171800 + }, + { + "epoch": 0.97, + "learning_rate": 4.0340702766207414e-05, + "loss": 0.3038, + "step": 171900 + }, + { + "epoch": 0.97, + "learning_rate": 4.033508153594496e-05, + "loss": 0.2917, + "step": 172000 + }, + { + "epoch": 0.97, + "learning_rate": 4.0329460305682506e-05, + "loss": 0.2975, + "step": 172100 + }, + { + "epoch": 0.97, + "learning_rate": 4.032383907542005e-05, + "loss": 0.3013, + "step": 172200 + }, + { + "epoch": 0.97, + "learning_rate": 4.031821784515759e-05, + "loss": 0.2962, + "step": 172300 + }, + { + "epoch": 0.97, + "learning_rate": 4.031265282719776e-05, + "loss": 0.2981, + "step": 172400 + }, + { + "epoch": 0.97, + "learning_rate": 4.0307031596935306e-05, + "loss": 0.3077, + "step": 172500 + }, + { + "epoch": 0.97, + "learning_rate": 4.030146657897548e-05, + "loss": 0.3012, + "step": 172600 + }, + { + "epoch": 0.97, + "learning_rate": 4.029590156101565e-05, + "loss": 0.3029, + "step": 172700 + }, + { + "epoch": 0.97, + "learning_rate": 4.029028033075319e-05, + "loss": 0.2985, + "step": 172800 + }, + { + "epoch": 0.97, + "learning_rate": 4.0284659100490734e-05, + "loss": 0.3039, + "step": 172900 + }, + { + "epoch": 0.97, + "learning_rate": 4.027903787022828e-05, + "loss": 0.3025, + "step": 173000 + }, + { + "epoch": 0.97, + "learning_rate": 4.0273416639965826e-05, + "loss": 0.3046, + "step": 173100 + }, + { + "epoch": 0.97, + "learning_rate": 4.0267795409703366e-05, + "loss": 0.2997, + "step": 173200 + }, + { + "epoch": 0.97, + "learning_rate": 4.026217417944091e-05, + "loss": 0.3003, + "step": 173300 + }, + { + "epoch": 0.97, + "learning_rate": 4.025655294917846e-05, + "loss": 0.2917, + "step": 173400 + }, + { + "epoch": 0.98, + "learning_rate": 4.0250931718916005e-05, + "loss": 0.3015, + "step": 173500 + }, + { + "epoch": 0.98, + "learning_rate": 4.024531048865355e-05, + "loss": 0.2909, + "step": 173600 + }, + { + "epoch": 0.98, + "learning_rate": 4.02396892583911e-05, + "loss": 0.2998, + "step": 173700 + }, + { + "epoch": 0.98, + "learning_rate": 4.0234068028128644e-05, + "loss": 0.307, + "step": 173800 + }, + { + "epoch": 0.98, + "learning_rate": 4.022844679786618e-05, + "loss": 0.3, + "step": 173900 + }, + { + "epoch": 0.98, + "learning_rate": 4.022282556760373e-05, + "loss": 0.3041, + "step": 174000 + }, + { + "epoch": 0.98, + "learning_rate": 4.0217204337341276e-05, + "loss": 0.3053, + "step": 174100 + }, + { + "epoch": 0.98, + "learning_rate": 4.0211583107078815e-05, + "loss": 0.2979, + "step": 174200 + }, + { + "epoch": 0.98, + "learning_rate": 4.020596187681636e-05, + "loss": 0.2983, + "step": 174300 + }, + { + "epoch": 0.98, + "learning_rate": 4.020034064655391e-05, + "loss": 0.2972, + "step": 174400 + }, + { + "epoch": 0.98, + "learning_rate": 4.0194719416291454e-05, + "loss": 0.3068, + "step": 174500 + }, + { + "epoch": 0.98, + "learning_rate": 4.0189098186028993e-05, + "loss": 0.3, + "step": 174600 + }, + { + "epoch": 0.98, + "learning_rate": 4.018347695576654e-05, + "loss": 0.3026, + "step": 174700 + }, + { + "epoch": 0.98, + "learning_rate": 4.0177855725504086e-05, + "loss": 0.3062, + "step": 174800 + }, + { + "epoch": 0.98, + "learning_rate": 4.0172234495241625e-05, + "loss": 0.2984, + "step": 174900 + }, + { + "epoch": 0.98, + "learning_rate": 4.016661326497917e-05, + "loss": 0.2965, + "step": 175000 + }, + { + "epoch": 0.98, + "learning_rate": 4.016099203471672e-05, + "loss": 0.305, + "step": 175100 + }, + { + "epoch": 0.98, + "learning_rate": 4.0155370804454264e-05, + "loss": 0.3041, + "step": 175200 + }, + { + "epoch": 0.99, + "learning_rate": 4.014974957419181e-05, + "loss": 0.2992, + "step": 175300 + }, + { + "epoch": 0.99, + "learning_rate": 4.014412834392936e-05, + "loss": 0.2906, + "step": 175400 + }, + { + "epoch": 0.99, + "learning_rate": 4.01385071136669e-05, + "loss": 0.2925, + "step": 175500 + }, + { + "epoch": 0.99, + "learning_rate": 4.013288588340444e-05, + "loss": 0.2885, + "step": 175600 + }, + { + "epoch": 0.99, + "learning_rate": 4.012726465314199e-05, + "loss": 0.2931, + "step": 175700 + }, + { + "epoch": 0.99, + "learning_rate": 4.0121643422879535e-05, + "loss": 0.3027, + "step": 175800 + }, + { + "epoch": 0.99, + "learning_rate": 4.0116022192617075e-05, + "loss": 0.2992, + "step": 175900 + }, + { + "epoch": 0.99, + "learning_rate": 4.011040096235462e-05, + "loss": 0.3015, + "step": 176000 + }, + { + "epoch": 0.99, + "learning_rate": 4.010477973209217e-05, + "loss": 0.2976, + "step": 176100 + }, + { + "epoch": 0.99, + "learning_rate": 4.0099158501829714e-05, + "loss": 0.3109, + "step": 176200 + }, + { + "epoch": 0.99, + "learning_rate": 4.009353727156725e-05, + "loss": 0.3013, + "step": 176300 + }, + { + "epoch": 0.99, + "learning_rate": 4.00879160413048e-05, + "loss": 0.2935, + "step": 176400 + }, + { + "epoch": 0.99, + "learning_rate": 4.0082294811042346e-05, + "loss": 0.2984, + "step": 176500 + }, + { + "epoch": 0.99, + "learning_rate": 4.007672979308252e-05, + "loss": 0.3015, + "step": 176600 + }, + { + "epoch": 0.99, + "learning_rate": 4.007110856282006e-05, + "loss": 0.3105, + "step": 176700 + }, + { + "epoch": 0.99, + "learning_rate": 4.0065487332557606e-05, + "loss": 0.3098, + "step": 176800 + }, + { + "epoch": 0.99, + "learning_rate": 4.005986610229515e-05, + "loss": 0.2978, + "step": 176900 + }, + { + "epoch": 0.99, + "learning_rate": 4.005424487203269e-05, + "loss": 0.2867, + "step": 177000 + }, + { + "epoch": 1.0, + "learning_rate": 4.004862364177024e-05, + "loss": 0.2984, + "step": 177100 + }, + { + "epoch": 1.0, + "learning_rate": 4.0043002411507784e-05, + "loss": 0.2934, + "step": 177200 + }, + { + "epoch": 1.0, + "learning_rate": 4.003738118124533e-05, + "loss": 0.2944, + "step": 177300 + }, + { + "epoch": 1.0, + "learning_rate": 4.003175995098287e-05, + "loss": 0.2971, + "step": 177400 + }, + { + "epoch": 1.0, + "learning_rate": 4.0026138720720416e-05, + "loss": 0.2925, + "step": 177500 + }, + { + "epoch": 1.0, + "learning_rate": 4.002051749045796e-05, + "loss": 0.299, + "step": 177600 + }, + { + "epoch": 1.0, + "learning_rate": 4.001489626019551e-05, + "loss": 0.302, + "step": 177700 + }, + { + "epoch": 1.0, + "learning_rate": 4.0009275029933055e-05, + "loss": 0.2972, + "step": 177800 + }, + { + "epoch": 1.0, + "eval_bleu": 73.3024, + "eval_cer": 2.836, + "eval_chrF": 94.14025518241344, + "eval_gen_len": 16.705368, + "eval_loss": 0.5591091513633728, + "eval_runtime": 7040.9934, + "eval_samples_per_second": 35.506, + "eval_steps_per_second": 0.555, + "eval_wer": 14.9407, + "step": 177897 + }, + { + "epoch": 1.0, + "learning_rate": 4.00036537996706e-05, + "loss": 0.3027, + "step": 177900 + }, + { + "epoch": 1.0, + "learning_rate": 3.999803256940815e-05, + "loss": 0.2924, + "step": 178000 + }, + { + "epoch": 1.0, + "learning_rate": 3.999241133914569e-05, + "loss": 0.2863, + "step": 178100 + }, + { + "epoch": 1.0, + "learning_rate": 3.998679010888323e-05, + "loss": 0.2799, + "step": 178200 + }, + { + "epoch": 1.0, + "learning_rate": 3.998116887862078e-05, + "loss": 0.2742, + "step": 178300 + }, + { + "epoch": 1.0, + "learning_rate": 3.997554764835832e-05, + "loss": 0.2857, + "step": 178400 + }, + { + "epoch": 1.0, + "learning_rate": 3.9969926418095865e-05, + "loss": 0.2903, + "step": 178500 + }, + { + "epoch": 1.0, + "learning_rate": 3.996430518783341e-05, + "loss": 0.2842, + "step": 178600 + }, + { + "epoch": 1.0, + "learning_rate": 3.995868395757096e-05, + "loss": 0.2902, + "step": 178700 + }, + { + "epoch": 1.01, + "learning_rate": 3.99530627273085e-05, + "loss": 0.284, + "step": 178800 + }, + { + "epoch": 1.01, + "learning_rate": 3.9947441497046044e-05, + "loss": 0.2848, + "step": 178900 + }, + { + "epoch": 1.01, + "learning_rate": 3.994182026678359e-05, + "loss": 0.2899, + "step": 179000 + }, + { + "epoch": 1.01, + "learning_rate": 3.993619903652113e-05, + "loss": 0.285, + "step": 179100 + }, + { + "epoch": 1.01, + "learning_rate": 3.9930577806258676e-05, + "loss": 0.2824, + "step": 179200 + }, + { + "epoch": 1.01, + "learning_rate": 3.992495657599622e-05, + "loss": 0.2885, + "step": 179300 + }, + { + "epoch": 1.01, + "learning_rate": 3.991933534573377e-05, + "loss": 0.2879, + "step": 179400 + }, + { + "epoch": 1.01, + "learning_rate": 3.9913714115471315e-05, + "loss": 0.29, + "step": 179500 + }, + { + "epoch": 1.01, + "learning_rate": 3.990809288520886e-05, + "loss": 0.2791, + "step": 179600 + }, + { + "epoch": 1.01, + "learning_rate": 3.990247165494641e-05, + "loss": 0.2746, + "step": 179700 + }, + { + "epoch": 1.01, + "learning_rate": 3.989685042468395e-05, + "loss": 0.2774, + "step": 179800 + }, + { + "epoch": 1.01, + "learning_rate": 3.989122919442149e-05, + "loss": 0.2824, + "step": 179900 + }, + { + "epoch": 1.01, + "learning_rate": 3.988560796415904e-05, + "loss": 0.2811, + "step": 180000 + }, + { + "epoch": 1.01, + "learning_rate": 3.9879986733896585e-05, + "loss": 0.2759, + "step": 180100 + }, + { + "epoch": 1.01, + "learning_rate": 3.9874365503634125e-05, + "loss": 0.2748, + "step": 180200 + }, + { + "epoch": 1.01, + "learning_rate": 3.986874427337167e-05, + "loss": 0.2849, + "step": 180300 + }, + { + "epoch": 1.01, + "learning_rate": 3.986312304310922e-05, + "loss": 0.2896, + "step": 180400 + }, + { + "epoch": 1.01, + "learning_rate": 3.985750181284676e-05, + "loss": 0.282, + "step": 180500 + }, + { + "epoch": 1.02, + "learning_rate": 3.98518805825843e-05, + "loss": 0.2777, + "step": 180600 + }, + { + "epoch": 1.02, + "learning_rate": 3.984625935232185e-05, + "loss": 0.2771, + "step": 180700 + }, + { + "epoch": 1.02, + "learning_rate": 3.9840638122059396e-05, + "loss": 0.2809, + "step": 180800 + }, + { + "epoch": 1.02, + "learning_rate": 3.9835073104099563e-05, + "loss": 0.2821, + "step": 180900 + }, + { + "epoch": 1.02, + "learning_rate": 3.982945187383711e-05, + "loss": 0.2863, + "step": 181000 + }, + { + "epoch": 1.02, + "learning_rate": 3.9823830643574656e-05, + "loss": 0.2818, + "step": 181100 + }, + { + "epoch": 1.02, + "learning_rate": 3.98182094133122e-05, + "loss": 0.2838, + "step": 181200 + }, + { + "epoch": 1.02, + "learning_rate": 3.981258818304974e-05, + "loss": 0.285, + "step": 181300 + }, + { + "epoch": 1.02, + "learning_rate": 3.980696695278729e-05, + "loss": 0.2736, + "step": 181400 + }, + { + "epoch": 1.02, + "learning_rate": 3.9801345722524834e-05, + "loss": 0.2821, + "step": 181500 + }, + { + "epoch": 1.02, + "learning_rate": 3.9795724492262374e-05, + "loss": 0.2817, + "step": 181600 + }, + { + "epoch": 1.02, + "learning_rate": 3.979010326199992e-05, + "loss": 0.2905, + "step": 181700 + }, + { + "epoch": 1.02, + "learning_rate": 3.9784482031737466e-05, + "loss": 0.2865, + "step": 181800 + }, + { + "epoch": 1.02, + "learning_rate": 3.977886080147501e-05, + "loss": 0.2769, + "step": 181900 + }, + { + "epoch": 1.02, + "learning_rate": 3.977323957121256e-05, + "loss": 0.2862, + "step": 182000 + }, + { + "epoch": 1.02, + "learning_rate": 3.9767618340950105e-05, + "loss": 0.2845, + "step": 182100 + }, + { + "epoch": 1.02, + "learning_rate": 3.976199711068765e-05, + "loss": 0.2818, + "step": 182200 + }, + { + "epoch": 1.02, + "learning_rate": 3.975637588042519e-05, + "loss": 0.2781, + "step": 182300 + }, + { + "epoch": 1.03, + "learning_rate": 3.975075465016274e-05, + "loss": 0.2883, + "step": 182400 + }, + { + "epoch": 1.03, + "learning_rate": 3.9745189632202905e-05, + "loss": 0.2735, + "step": 182500 + }, + { + "epoch": 1.03, + "learning_rate": 3.973956840194045e-05, + "loss": 0.2864, + "step": 182600 + }, + { + "epoch": 1.03, + "learning_rate": 3.973394717167799e-05, + "loss": 0.2758, + "step": 182700 + }, + { + "epoch": 1.03, + "learning_rate": 3.972832594141554e-05, + "loss": 0.2924, + "step": 182800 + }, + { + "epoch": 1.03, + "learning_rate": 3.972270471115308e-05, + "loss": 0.2872, + "step": 182900 + }, + { + "epoch": 1.03, + "learning_rate": 3.971708348089063e-05, + "loss": 0.2796, + "step": 183000 + }, + { + "epoch": 1.03, + "learning_rate": 3.9711462250628176e-05, + "loss": 0.2797, + "step": 183100 + }, + { + "epoch": 1.03, + "learning_rate": 3.9705897232668343e-05, + "loss": 0.2843, + "step": 183200 + }, + { + "epoch": 1.03, + "learning_rate": 3.970027600240589e-05, + "loss": 0.2785, + "step": 183300 + }, + { + "epoch": 1.03, + "learning_rate": 3.9694654772143436e-05, + "loss": 0.2867, + "step": 183400 + }, + { + "epoch": 1.03, + "learning_rate": 3.9689033541880976e-05, + "loss": 0.2883, + "step": 183500 + }, + { + "epoch": 1.03, + "learning_rate": 3.968341231161852e-05, + "loss": 0.2757, + "step": 183600 + }, + { + "epoch": 1.03, + "learning_rate": 3.967779108135607e-05, + "loss": 0.2897, + "step": 183700 + }, + { + "epoch": 1.03, + "learning_rate": 3.967216985109361e-05, + "loss": 0.2756, + "step": 183800 + }, + { + "epoch": 1.03, + "learning_rate": 3.9666548620831154e-05, + "loss": 0.2814, + "step": 183900 + }, + { + "epoch": 1.03, + "learning_rate": 3.96609273905687e-05, + "loss": 0.2811, + "step": 184000 + }, + { + "epoch": 1.03, + "learning_rate": 3.9655306160306246e-05, + "loss": 0.2859, + "step": 184100 + }, + { + "epoch": 1.04, + "learning_rate": 3.964968493004379e-05, + "loss": 0.2804, + "step": 184200 + }, + { + "epoch": 1.04, + "learning_rate": 3.964406369978134e-05, + "loss": 0.285, + "step": 184300 + }, + { + "epoch": 1.04, + "learning_rate": 3.9638442469518885e-05, + "loss": 0.2808, + "step": 184400 + }, + { + "epoch": 1.04, + "learning_rate": 3.9632821239256425e-05, + "loss": 0.2852, + "step": 184500 + }, + { + "epoch": 1.04, + "learning_rate": 3.962720000899397e-05, + "loss": 0.2849, + "step": 184600 + }, + { + "epoch": 1.04, + "learning_rate": 3.962157877873152e-05, + "loss": 0.2803, + "step": 184700 + }, + { + "epoch": 1.04, + "learning_rate": 3.9615957548469064e-05, + "loss": 0.2839, + "step": 184800 + }, + { + "epoch": 1.04, + "learning_rate": 3.96103363182066e-05, + "loss": 0.2885, + "step": 184900 + }, + { + "epoch": 1.04, + "learning_rate": 3.960471508794415e-05, + "loss": 0.2711, + "step": 185000 + }, + { + "epoch": 1.04, + "learning_rate": 3.9599093857681696e-05, + "loss": 0.2886, + "step": 185100 + }, + { + "epoch": 1.04, + "learning_rate": 3.9593472627419235e-05, + "loss": 0.2865, + "step": 185200 + }, + { + "epoch": 1.04, + "learning_rate": 3.958790760945941e-05, + "loss": 0.2875, + "step": 185300 + }, + { + "epoch": 1.04, + "learning_rate": 3.9582286379196956e-05, + "loss": 0.2854, + "step": 185400 + }, + { + "epoch": 1.04, + "learning_rate": 3.95766651489345e-05, + "loss": 0.2813, + "step": 185500 + }, + { + "epoch": 1.04, + "learning_rate": 3.957104391867204e-05, + "loss": 0.2836, + "step": 185600 + }, + { + "epoch": 1.04, + "learning_rate": 3.956542268840959e-05, + "loss": 0.2871, + "step": 185700 + }, + { + "epoch": 1.04, + "learning_rate": 3.9559801458147134e-05, + "loss": 0.2872, + "step": 185800 + }, + { + "epoch": 1.04, + "learning_rate": 3.9554180227884674e-05, + "loss": 0.2797, + "step": 185900 + }, + { + "epoch": 1.05, + "learning_rate": 3.954855899762222e-05, + "loss": 0.2819, + "step": 186000 + }, + { + "epoch": 1.05, + "learning_rate": 3.9542937767359766e-05, + "loss": 0.2818, + "step": 186100 + }, + { + "epoch": 1.05, + "learning_rate": 3.953731653709731e-05, + "loss": 0.2821, + "step": 186200 + }, + { + "epoch": 1.05, + "learning_rate": 3.953169530683485e-05, + "loss": 0.2777, + "step": 186300 + }, + { + "epoch": 1.05, + "learning_rate": 3.95260740765724e-05, + "loss": 0.2802, + "step": 186400 + }, + { + "epoch": 1.05, + "learning_rate": 3.9520452846309945e-05, + "loss": 0.282, + "step": 186500 + }, + { + "epoch": 1.05, + "learning_rate": 3.951483161604749e-05, + "loss": 0.2844, + "step": 186600 + }, + { + "epoch": 1.05, + "learning_rate": 3.950921038578504e-05, + "loss": 0.2829, + "step": 186700 + }, + { + "epoch": 1.05, + "learning_rate": 3.950358915552258e-05, + "loss": 0.2778, + "step": 186800 + }, + { + "epoch": 1.05, + "learning_rate": 3.949796792526013e-05, + "loss": 0.2861, + "step": 186900 + }, + { + "epoch": 1.05, + "learning_rate": 3.949234669499767e-05, + "loss": 0.2835, + "step": 187000 + }, + { + "epoch": 1.05, + "learning_rate": 3.9486725464735215e-05, + "loss": 0.2827, + "step": 187100 + }, + { + "epoch": 1.05, + "learning_rate": 3.948110423447276e-05, + "loss": 0.2886, + "step": 187200 + }, + { + "epoch": 1.05, + "learning_rate": 3.94754830042103e-05, + "loss": 0.2764, + "step": 187300 + }, + { + "epoch": 1.05, + "learning_rate": 3.946986177394785e-05, + "loss": 0.2807, + "step": 187400 + }, + { + "epoch": 1.05, + "learning_rate": 3.9464240543685394e-05, + "loss": 0.2849, + "step": 187500 + }, + { + "epoch": 1.05, + "learning_rate": 3.945861931342294e-05, + "loss": 0.2846, + "step": 187600 + }, + { + "epoch": 1.06, + "learning_rate": 3.945299808316048e-05, + "loss": 0.2836, + "step": 187700 + }, + { + "epoch": 1.06, + "learning_rate": 3.9447376852898026e-05, + "loss": 0.2829, + "step": 187800 + }, + { + "epoch": 1.06, + "learning_rate": 3.944175562263557e-05, + "loss": 0.2847, + "step": 187900 + }, + { + "epoch": 1.06, + "learning_rate": 3.943613439237311e-05, + "loss": 0.2825, + "step": 188000 + }, + { + "epoch": 1.06, + "learning_rate": 3.943051316211066e-05, + "loss": 0.2894, + "step": 188100 + }, + { + "epoch": 1.06, + "learning_rate": 3.9424891931848204e-05, + "loss": 0.2843, + "step": 188200 + }, + { + "epoch": 1.06, + "learning_rate": 3.941927070158575e-05, + "loss": 0.2798, + "step": 188300 + }, + { + "epoch": 1.06, + "learning_rate": 3.94136494713233e-05, + "loss": 0.2863, + "step": 188400 + }, + { + "epoch": 1.06, + "learning_rate": 3.940802824106084e-05, + "loss": 0.2782, + "step": 188500 + }, + { + "epoch": 1.06, + "learning_rate": 3.940240701079839e-05, + "loss": 0.2852, + "step": 188600 + }, + { + "epoch": 1.06, + "learning_rate": 3.939678578053593e-05, + "loss": 0.2784, + "step": 188700 + }, + { + "epoch": 1.06, + "learning_rate": 3.9391164550273475e-05, + "loss": 0.2853, + "step": 188800 + }, + { + "epoch": 1.06, + "learning_rate": 3.938554332001102e-05, + "loss": 0.278, + "step": 188900 + }, + { + "epoch": 1.06, + "learning_rate": 3.937992208974857e-05, + "loss": 0.2874, + "step": 189000 + }, + { + "epoch": 1.06, + "learning_rate": 3.937430085948611e-05, + "loss": 0.2824, + "step": 189100 + }, + { + "epoch": 1.06, + "learning_rate": 3.936867962922365e-05, + "loss": 0.2816, + "step": 189200 + }, + { + "epoch": 1.06, + "learning_rate": 3.93630583989612e-05, + "loss": 0.2794, + "step": 189300 + }, + { + "epoch": 1.06, + "learning_rate": 3.935743716869874e-05, + "loss": 0.2841, + "step": 189400 + }, + { + "epoch": 1.07, + "learning_rate": 3.9351815938436285e-05, + "loss": 0.279, + "step": 189500 + }, + { + "epoch": 1.07, + "learning_rate": 3.934619470817383e-05, + "loss": 0.2812, + "step": 189600 + }, + { + "epoch": 1.07, + "learning_rate": 3.934057347791138e-05, + "loss": 0.273, + "step": 189700 + }, + { + "epoch": 1.07, + "learning_rate": 3.933495224764892e-05, + "loss": 0.2898, + "step": 189800 + }, + { + "epoch": 1.07, + "learning_rate": 3.9329331017386464e-05, + "loss": 0.2819, + "step": 189900 + }, + { + "epoch": 1.07, + "learning_rate": 3.932370978712402e-05, + "loss": 0.2775, + "step": 190000 + }, + { + "epoch": 1.07, + "learning_rate": 3.9318088556861556e-05, + "loss": 0.2866, + "step": 190100 + }, + { + "epoch": 1.07, + "learning_rate": 3.93124673265991e-05, + "loss": 0.2874, + "step": 190200 + }, + { + "epoch": 1.07, + "learning_rate": 3.930684609633665e-05, + "loss": 0.2833, + "step": 190300 + }, + { + "epoch": 1.07, + "learning_rate": 3.9301224866074195e-05, + "loss": 0.2823, + "step": 190400 + }, + { + "epoch": 1.07, + "learning_rate": 3.9295603635811735e-05, + "loss": 0.2851, + "step": 190500 + }, + { + "epoch": 1.07, + "learning_rate": 3.928998240554928e-05, + "loss": 0.2856, + "step": 190600 + }, + { + "epoch": 1.07, + "learning_rate": 3.928436117528683e-05, + "loss": 0.2784, + "step": 190700 + }, + { + "epoch": 1.07, + "learning_rate": 3.927873994502437e-05, + "loss": 0.2831, + "step": 190800 + }, + { + "epoch": 1.07, + "learning_rate": 3.927311871476191e-05, + "loss": 0.2805, + "step": 190900 + }, + { + "epoch": 1.07, + "learning_rate": 3.926749748449946e-05, + "loss": 0.2806, + "step": 191000 + }, + { + "epoch": 1.07, + "learning_rate": 3.9261876254237005e-05, + "loss": 0.2799, + "step": 191100 + }, + { + "epoch": 1.07, + "learning_rate": 3.9256255023974545e-05, + "loss": 0.282, + "step": 191200 + }, + { + "epoch": 1.08, + "learning_rate": 3.925063379371209e-05, + "loss": 0.2842, + "step": 191300 + }, + { + "epoch": 1.08, + "learning_rate": 3.924501256344964e-05, + "loss": 0.2793, + "step": 191400 + }, + { + "epoch": 1.08, + "learning_rate": 3.9239391333187184e-05, + "loss": 0.2864, + "step": 191500 + }, + { + "epoch": 1.08, + "learning_rate": 3.923377010292473e-05, + "loss": 0.2811, + "step": 191600 + }, + { + "epoch": 1.08, + "learning_rate": 3.9228148872662276e-05, + "loss": 0.2871, + "step": 191700 + }, + { + "epoch": 1.08, + "learning_rate": 3.922252764239982e-05, + "loss": 0.2762, + "step": 191800 + }, + { + "epoch": 1.08, + "learning_rate": 3.921690641213736e-05, + "loss": 0.2763, + "step": 191900 + }, + { + "epoch": 1.08, + "learning_rate": 3.921128518187491e-05, + "loss": 0.2758, + "step": 192000 + }, + { + "epoch": 1.08, + "learning_rate": 3.9205663951612455e-05, + "loss": 0.2815, + "step": 192100 + }, + { + "epoch": 1.08, + "learning_rate": 3.9200042721349994e-05, + "loss": 0.2806, + "step": 192200 + }, + { + "epoch": 1.08, + "learning_rate": 3.919442149108754e-05, + "loss": 0.2847, + "step": 192300 + }, + { + "epoch": 1.08, + "learning_rate": 3.918880026082509e-05, + "loss": 0.2811, + "step": 192400 + }, + { + "epoch": 1.08, + "learning_rate": 3.918317903056263e-05, + "loss": 0.2897, + "step": 192500 + }, + { + "epoch": 1.08, + "learning_rate": 3.917755780030017e-05, + "loss": 0.2782, + "step": 192600 + }, + { + "epoch": 1.08, + "learning_rate": 3.917193657003772e-05, + "loss": 0.2783, + "step": 192700 + }, + { + "epoch": 1.08, + "learning_rate": 3.9166315339775265e-05, + "loss": 0.2786, + "step": 192800 + }, + { + "epoch": 1.08, + "learning_rate": 3.9160694109512805e-05, + "loss": 0.2782, + "step": 192900 + }, + { + "epoch": 1.08, + "learning_rate": 3.915507287925035e-05, + "loss": 0.2796, + "step": 193000 + }, + { + "epoch": 1.09, + "learning_rate": 3.91494516489879e-05, + "loss": 0.2732, + "step": 193100 + }, + { + "epoch": 1.09, + "learning_rate": 3.9143830418725443e-05, + "loss": 0.2854, + "step": 193200 + }, + { + "epoch": 1.09, + "learning_rate": 3.913820918846299e-05, + "loss": 0.2881, + "step": 193300 + }, + { + "epoch": 1.09, + "learning_rate": 3.9132587958200536e-05, + "loss": 0.276, + "step": 193400 + }, + { + "epoch": 1.09, + "learning_rate": 3.912696672793808e-05, + "loss": 0.282, + "step": 193500 + }, + { + "epoch": 1.09, + "learning_rate": 3.912140170997825e-05, + "loss": 0.2811, + "step": 193600 + }, + { + "epoch": 1.09, + "learning_rate": 3.911578047971579e-05, + "loss": 0.2894, + "step": 193700 + }, + { + "epoch": 1.09, + "learning_rate": 3.9110159249453336e-05, + "loss": 0.2815, + "step": 193800 + }, + { + "epoch": 1.09, + "learning_rate": 3.910453801919088e-05, + "loss": 0.2757, + "step": 193900 + }, + { + "epoch": 1.09, + "learning_rate": 3.909891678892842e-05, + "loss": 0.2751, + "step": 194000 + }, + { + "epoch": 1.09, + "learning_rate": 3.909329555866597e-05, + "loss": 0.2815, + "step": 194100 + }, + { + "epoch": 1.09, + "learning_rate": 3.908767432840352e-05, + "loss": 0.2799, + "step": 194200 + }, + { + "epoch": 1.09, + "learning_rate": 3.908205309814107e-05, + "loss": 0.2748, + "step": 194300 + }, + { + "epoch": 1.09, + "learning_rate": 3.9076431867878607e-05, + "loss": 0.2768, + "step": 194400 + }, + { + "epoch": 1.09, + "learning_rate": 3.907081063761615e-05, + "loss": 0.2816, + "step": 194500 + }, + { + "epoch": 1.09, + "learning_rate": 3.90651894073537e-05, + "loss": 0.2813, + "step": 194600 + }, + { + "epoch": 1.09, + "learning_rate": 3.905962438939387e-05, + "loss": 0.2795, + "step": 194700 + }, + { + "epoch": 1.1, + "learning_rate": 3.9054003159131406e-05, + "loss": 0.2765, + "step": 194800 + }, + { + "epoch": 1.1, + "learning_rate": 3.904838192886895e-05, + "loss": 0.2779, + "step": 194900 + }, + { + "epoch": 1.1, + "learning_rate": 3.90427606986065e-05, + "loss": 0.2779, + "step": 195000 + }, + { + "epoch": 1.1, + "learning_rate": 3.903713946834404e-05, + "loss": 0.2783, + "step": 195100 + }, + { + "epoch": 1.1, + "learning_rate": 3.903151823808159e-05, + "loss": 0.2718, + "step": 195200 + }, + { + "epoch": 1.1, + "learning_rate": 3.902589700781914e-05, + "loss": 0.2881, + "step": 195300 + }, + { + "epoch": 1.1, + "learning_rate": 3.902027577755668e-05, + "loss": 0.2825, + "step": 195400 + }, + { + "epoch": 1.1, + "learning_rate": 3.9014654547294223e-05, + "loss": 0.278, + "step": 195500 + }, + { + "epoch": 1.1, + "learning_rate": 3.900903331703177e-05, + "loss": 0.2716, + "step": 195600 + }, + { + "epoch": 1.1, + "learning_rate": 3.9003412086769316e-05, + "loss": 0.276, + "step": 195700 + }, + { + "epoch": 1.1, + "learning_rate": 3.8997790856506855e-05, + "loss": 0.2759, + "step": 195800 + }, + { + "epoch": 1.1, + "learning_rate": 3.89921696262444e-05, + "loss": 0.2912, + "step": 195900 + }, + { + "epoch": 1.1, + "learning_rate": 3.898654839598195e-05, + "loss": 0.2797, + "step": 196000 + }, + { + "epoch": 1.1, + "learning_rate": 3.898092716571949e-05, + "loss": 0.2723, + "step": 196100 + }, + { + "epoch": 1.1, + "learning_rate": 3.8975305935457034e-05, + "loss": 0.2759, + "step": 196200 + }, + { + "epoch": 1.1, + "learning_rate": 3.896968470519458e-05, + "loss": 0.2746, + "step": 196300 + }, + { + "epoch": 1.1, + "learning_rate": 3.8964063474932126e-05, + "loss": 0.2796, + "step": 196400 + }, + { + "epoch": 1.1, + "learning_rate": 3.8958442244669666e-05, + "loss": 0.2752, + "step": 196500 + }, + { + "epoch": 1.11, + "learning_rate": 3.895282101440721e-05, + "loss": 0.278, + "step": 196600 + }, + { + "epoch": 1.11, + "learning_rate": 3.894719978414476e-05, + "loss": 0.274, + "step": 196700 + }, + { + "epoch": 1.11, + "learning_rate": 3.8941578553882305e-05, + "loss": 0.2767, + "step": 196800 + }, + { + "epoch": 1.11, + "learning_rate": 3.893595732361985e-05, + "loss": 0.2791, + "step": 196900 + }, + { + "epoch": 1.11, + "learning_rate": 3.89303360933574e-05, + "loss": 0.2781, + "step": 197000 + }, + { + "epoch": 1.11, + "learning_rate": 3.8924714863094944e-05, + "loss": 0.2882, + "step": 197100 + }, + { + "epoch": 1.11, + "learning_rate": 3.891909363283248e-05, + "loss": 0.2808, + "step": 197200 + }, + { + "epoch": 1.11, + "learning_rate": 3.891347240257003e-05, + "loss": 0.2745, + "step": 197300 + }, + { + "epoch": 1.11, + "learning_rate": 3.8907851172307576e-05, + "loss": 0.2792, + "step": 197400 + }, + { + "epoch": 1.11, + "learning_rate": 3.8902229942045115e-05, + "loss": 0.2796, + "step": 197500 + }, + { + "epoch": 1.11, + "learning_rate": 3.889660871178266e-05, + "loss": 0.2686, + "step": 197600 + }, + { + "epoch": 1.11, + "learning_rate": 3.889098748152021e-05, + "loss": 0.2774, + "step": 197700 + }, + { + "epoch": 1.11, + "learning_rate": 3.8885366251257754e-05, + "loss": 0.2822, + "step": 197800 + }, + { + "epoch": 1.11, + "learning_rate": 3.8879745020995293e-05, + "loss": 0.2792, + "step": 197900 + }, + { + "epoch": 1.11, + "learning_rate": 3.887412379073284e-05, + "loss": 0.2729, + "step": 198000 + }, + { + "epoch": 1.11, + "learning_rate": 3.8868502560470386e-05, + "loss": 0.2798, + "step": 198100 + }, + { + "epoch": 1.11, + "learning_rate": 3.8862881330207925e-05, + "loss": 0.2804, + "step": 198200 + }, + { + "epoch": 1.11, + "learning_rate": 3.885726009994547e-05, + "loss": 0.2771, + "step": 198300 + }, + { + "epoch": 1.12, + "learning_rate": 3.885163886968302e-05, + "loss": 0.2778, + "step": 198400 + }, + { + "epoch": 1.12, + "learning_rate": 3.884601763942057e-05, + "loss": 0.282, + "step": 198500 + }, + { + "epoch": 1.12, + "learning_rate": 3.884039640915811e-05, + "loss": 0.2775, + "step": 198600 + }, + { + "epoch": 1.12, + "learning_rate": 3.883477517889566e-05, + "loss": 0.2767, + "step": 198700 + }, + { + "epoch": 1.12, + "learning_rate": 3.88291539486332e-05, + "loss": 0.2742, + "step": 198800 + }, + { + "epoch": 1.12, + "learning_rate": 3.882353271837074e-05, + "loss": 0.2802, + "step": 198900 + }, + { + "epoch": 1.12, + "learning_rate": 3.881791148810829e-05, + "loss": 0.2818, + "step": 199000 + }, + { + "epoch": 1.12, + "learning_rate": 3.8812290257845835e-05, + "loss": 0.2772, + "step": 199100 + }, + { + "epoch": 1.12, + "learning_rate": 3.8806725239886e-05, + "loss": 0.2739, + "step": 199200 + }, + { + "epoch": 1.12, + "learning_rate": 3.880110400962354e-05, + "loss": 0.2774, + "step": 199300 + }, + { + "epoch": 1.12, + "learning_rate": 3.8795482779361095e-05, + "loss": 0.2772, + "step": 199400 + }, + { + "epoch": 1.12, + "learning_rate": 3.878986154909864e-05, + "loss": 0.2802, + "step": 199500 + }, + { + "epoch": 1.12, + "learning_rate": 3.878424031883619e-05, + "loss": 0.2746, + "step": 199600 + }, + { + "epoch": 1.12, + "learning_rate": 3.877861908857373e-05, + "loss": 0.2773, + "step": 199700 + }, + { + "epoch": 1.12, + "learning_rate": 3.8772997858311274e-05, + "loss": 0.2817, + "step": 199800 + }, + { + "epoch": 1.12, + "learning_rate": 3.876737662804882e-05, + "loss": 0.2814, + "step": 199900 + }, + { + "epoch": 1.12, + "learning_rate": 3.876175539778636e-05, + "loss": 0.2741, + "step": 200000 + }, + { + "epoch": 1.12, + "learning_rate": 3.8756134167523906e-05, + "loss": 0.2765, + "step": 200100 + }, + { + "epoch": 1.13, + "learning_rate": 3.875051293726145e-05, + "loss": 0.2738, + "step": 200200 + }, + { + "epoch": 1.13, + "learning_rate": 3.8744891706999e-05, + "loss": 0.2794, + "step": 200300 + }, + { + "epoch": 1.13, + "learning_rate": 3.873927047673654e-05, + "loss": 0.275, + "step": 200400 + }, + { + "epoch": 1.13, + "learning_rate": 3.8733649246474084e-05, + "loss": 0.2679, + "step": 200500 + }, + { + "epoch": 1.13, + "learning_rate": 3.872802801621163e-05, + "loss": 0.2781, + "step": 200600 + }, + { + "epoch": 1.13, + "learning_rate": 3.872240678594917e-05, + "loss": 0.2747, + "step": 200700 + }, + { + "epoch": 1.13, + "learning_rate": 3.8716785555686716e-05, + "loss": 0.2794, + "step": 200800 + }, + { + "epoch": 1.13, + "learning_rate": 3.871122053772689e-05, + "loss": 0.2838, + "step": 200900 + }, + { + "epoch": 1.13, + "learning_rate": 3.870559930746444e-05, + "loss": 0.2697, + "step": 201000 + }, + { + "epoch": 1.13, + "learning_rate": 3.8699978077201976e-05, + "loss": 0.2834, + "step": 201100 + }, + { + "epoch": 1.13, + "learning_rate": 3.869435684693952e-05, + "loss": 0.268, + "step": 201200 + }, + { + "epoch": 1.13, + "learning_rate": 3.868873561667707e-05, + "loss": 0.2834, + "step": 201300 + }, + { + "epoch": 1.13, + "learning_rate": 3.8683114386414615e-05, + "loss": 0.2749, + "step": 201400 + }, + { + "epoch": 1.13, + "learning_rate": 3.8677493156152155e-05, + "loss": 0.2797, + "step": 201500 + }, + { + "epoch": 1.13, + "learning_rate": 3.86718719258897e-05, + "loss": 0.2799, + "step": 201600 + }, + { + "epoch": 1.13, + "learning_rate": 3.866625069562725e-05, + "loss": 0.274, + "step": 201700 + }, + { + "epoch": 1.13, + "learning_rate": 3.866062946536479e-05, + "loss": 0.279, + "step": 201800 + }, + { + "epoch": 1.13, + "learning_rate": 3.865500823510233e-05, + "loss": 0.2747, + "step": 201900 + }, + { + "epoch": 1.14, + "learning_rate": 3.864938700483988e-05, + "loss": 0.2799, + "step": 202000 + }, + { + "epoch": 1.14, + "learning_rate": 3.8643765774577426e-05, + "loss": 0.2792, + "step": 202100 + }, + { + "epoch": 1.14, + "learning_rate": 3.863814454431497e-05, + "loss": 0.2707, + "step": 202200 + }, + { + "epoch": 1.14, + "learning_rate": 3.863252331405252e-05, + "loss": 0.2757, + "step": 202300 + }, + { + "epoch": 1.14, + "learning_rate": 3.8626902083790064e-05, + "loss": 0.2736, + "step": 202400 + }, + { + "epoch": 1.14, + "learning_rate": 3.8621280853527604e-05, + "loss": 0.2831, + "step": 202500 + }, + { + "epoch": 1.14, + "learning_rate": 3.861565962326515e-05, + "loss": 0.2812, + "step": 202600 + }, + { + "epoch": 1.14, + "learning_rate": 3.8610038393002696e-05, + "loss": 0.2722, + "step": 202700 + }, + { + "epoch": 1.14, + "learning_rate": 3.860441716274024e-05, + "loss": 0.2679, + "step": 202800 + }, + { + "epoch": 1.14, + "learning_rate": 3.859879593247778e-05, + "loss": 0.2818, + "step": 202900 + }, + { + "epoch": 1.14, + "learning_rate": 3.859317470221533e-05, + "loss": 0.2746, + "step": 203000 + }, + { + "epoch": 1.14, + "learning_rate": 3.8587553471952875e-05, + "loss": 0.2683, + "step": 203100 + }, + { + "epoch": 1.14, + "learning_rate": 3.8581932241690414e-05, + "loss": 0.2777, + "step": 203200 + }, + { + "epoch": 1.14, + "learning_rate": 3.857631101142796e-05, + "loss": 0.2751, + "step": 203300 + }, + { + "epoch": 1.14, + "learning_rate": 3.857068978116551e-05, + "loss": 0.2683, + "step": 203400 + }, + { + "epoch": 1.14, + "learning_rate": 3.856506855090305e-05, + "loss": 0.2828, + "step": 203500 + }, + { + "epoch": 1.14, + "learning_rate": 3.855944732064059e-05, + "loss": 0.2756, + "step": 203600 + }, + { + "epoch": 1.15, + "learning_rate": 3.8553826090378146e-05, + "loss": 0.2755, + "step": 203700 + }, + { + "epoch": 1.15, + "learning_rate": 3.854820486011569e-05, + "loss": 0.2796, + "step": 203800 + }, + { + "epoch": 1.15, + "learning_rate": 3.854258362985323e-05, + "loss": 0.269, + "step": 203900 + }, + { + "epoch": 1.15, + "learning_rate": 3.853696239959078e-05, + "loss": 0.2729, + "step": 204000 + }, + { + "epoch": 1.15, + "learning_rate": 3.8531341169328324e-05, + "loss": 0.2754, + "step": 204100 + }, + { + "epoch": 1.15, + "learning_rate": 3.852571993906587e-05, + "loss": 0.2814, + "step": 204200 + }, + { + "epoch": 1.15, + "learning_rate": 3.852009870880341e-05, + "loss": 0.2739, + "step": 204300 + }, + { + "epoch": 1.15, + "learning_rate": 3.8514477478540956e-05, + "loss": 0.28, + "step": 204400 + }, + { + "epoch": 1.15, + "learning_rate": 3.85088562482785e-05, + "loss": 0.276, + "step": 204500 + }, + { + "epoch": 1.15, + "learning_rate": 3.850323501801604e-05, + "loss": 0.2823, + "step": 204600 + }, + { + "epoch": 1.15, + "learning_rate": 3.849761378775359e-05, + "loss": 0.2703, + "step": 204700 + }, + { + "epoch": 1.15, + "learning_rate": 3.8491992557491134e-05, + "loss": 0.2764, + "step": 204800 + }, + { + "epoch": 1.15, + "learning_rate": 3.848637132722868e-05, + "loss": 0.2731, + "step": 204900 + }, + { + "epoch": 1.15, + "learning_rate": 3.848075009696622e-05, + "loss": 0.2729, + "step": 205000 + }, + { + "epoch": 1.15, + "learning_rate": 3.8475128866703766e-05, + "loss": 0.2767, + "step": 205100 + }, + { + "epoch": 1.15, + "learning_rate": 3.846956384874394e-05, + "loss": 0.279, + "step": 205200 + }, + { + "epoch": 1.15, + "learning_rate": 3.846394261848148e-05, + "loss": 0.2738, + "step": 205300 + }, + { + "epoch": 1.15, + "learning_rate": 3.8458321388219027e-05, + "loss": 0.2719, + "step": 205400 + }, + { + "epoch": 1.16, + "learning_rate": 3.845270015795657e-05, + "loss": 0.2738, + "step": 205500 + }, + { + "epoch": 1.16, + "learning_rate": 3.844707892769412e-05, + "loss": 0.2721, + "step": 205600 + }, + { + "epoch": 1.16, + "learning_rate": 3.844145769743166e-05, + "loss": 0.2765, + "step": 205700 + }, + { + "epoch": 1.16, + "learning_rate": 3.8435836467169205e-05, + "loss": 0.2705, + "step": 205800 + }, + { + "epoch": 1.16, + "learning_rate": 3.843021523690675e-05, + "loss": 0.2824, + "step": 205900 + }, + { + "epoch": 1.16, + "learning_rate": 3.842459400664429e-05, + "loss": 0.2779, + "step": 206000 + }, + { + "epoch": 1.16, + "learning_rate": 3.841897277638184e-05, + "loss": 0.2676, + "step": 206100 + }, + { + "epoch": 1.16, + "learning_rate": 3.841335154611938e-05, + "loss": 0.2688, + "step": 206200 + }, + { + "epoch": 1.16, + "learning_rate": 3.840773031585693e-05, + "loss": 0.271, + "step": 206300 + }, + { + "epoch": 1.16, + "learning_rate": 3.8402109085594476e-05, + "loss": 0.2851, + "step": 206400 + }, + { + "epoch": 1.16, + "learning_rate": 3.839648785533202e-05, + "loss": 0.2722, + "step": 206500 + }, + { + "epoch": 1.16, + "learning_rate": 3.839086662506957e-05, + "loss": 0.2709, + "step": 206600 + }, + { + "epoch": 1.16, + "learning_rate": 3.838524539480711e-05, + "loss": 0.2705, + "step": 206700 + }, + { + "epoch": 1.16, + "learning_rate": 3.8379624164544654e-05, + "loss": 0.2728, + "step": 206800 + }, + { + "epoch": 1.16, + "learning_rate": 3.83740029342822e-05, + "loss": 0.2732, + "step": 206900 + }, + { + "epoch": 1.16, + "learning_rate": 3.836838170401975e-05, + "loss": 0.28, + "step": 207000 + }, + { + "epoch": 1.16, + "learning_rate": 3.8362760473757286e-05, + "loss": 0.275, + "step": 207100 + }, + { + "epoch": 1.16, + "learning_rate": 3.835713924349483e-05, + "loss": 0.2771, + "step": 207200 + }, + { + "epoch": 1.17, + "learning_rate": 3.835151801323238e-05, + "loss": 0.2692, + "step": 207300 + }, + { + "epoch": 1.17, + "learning_rate": 3.834589678296992e-05, + "loss": 0.2747, + "step": 207400 + }, + { + "epoch": 1.17, + "learning_rate": 3.8340275552707465e-05, + "loss": 0.2742, + "step": 207500 + }, + { + "epoch": 1.17, + "learning_rate": 3.833465432244501e-05, + "loss": 0.2779, + "step": 207600 + }, + { + "epoch": 1.17, + "learning_rate": 3.832903309218256e-05, + "loss": 0.2772, + "step": 207700 + }, + { + "epoch": 1.17, + "learning_rate": 3.8323411861920097e-05, + "loss": 0.2741, + "step": 207800 + }, + { + "epoch": 1.17, + "learning_rate": 3.831779063165765e-05, + "loss": 0.2755, + "step": 207900 + }, + { + "epoch": 1.17, + "learning_rate": 3.8312169401395196e-05, + "loss": 0.2723, + "step": 208000 + }, + { + "epoch": 1.17, + "learning_rate": 3.8306548171132735e-05, + "loss": 0.2778, + "step": 208100 + }, + { + "epoch": 1.17, + "learning_rate": 3.83009831531729e-05, + "loss": 0.2775, + "step": 208200 + }, + { + "epoch": 1.17, + "learning_rate": 3.829536192291045e-05, + "loss": 0.2672, + "step": 208300 + }, + { + "epoch": 1.17, + "learning_rate": 3.8289740692647996e-05, + "loss": 0.2726, + "step": 208400 + }, + { + "epoch": 1.17, + "learning_rate": 3.8284119462385535e-05, + "loss": 0.2756, + "step": 208500 + }, + { + "epoch": 1.17, + "learning_rate": 3.827849823212308e-05, + "loss": 0.2726, + "step": 208600 + }, + { + "epoch": 1.17, + "learning_rate": 3.827287700186063e-05, + "loss": 0.2709, + "step": 208700 + }, + { + "epoch": 1.17, + "learning_rate": 3.8267255771598174e-05, + "loss": 0.2737, + "step": 208800 + }, + { + "epoch": 1.17, + "learning_rate": 3.826163454133572e-05, + "loss": 0.267, + "step": 208900 + }, + { + "epoch": 1.17, + "learning_rate": 3.8256013311073266e-05, + "loss": 0.2748, + "step": 209000 + }, + { + "epoch": 1.18, + "learning_rate": 3.825039208081081e-05, + "loss": 0.2797, + "step": 209100 + }, + { + "epoch": 1.18, + "learning_rate": 3.824477085054835e-05, + "loss": 0.2726, + "step": 209200 + }, + { + "epoch": 1.18, + "learning_rate": 3.82391496202859e-05, + "loss": 0.2735, + "step": 209300 + }, + { + "epoch": 1.18, + "learning_rate": 3.8233528390023445e-05, + "loss": 0.2807, + "step": 209400 + }, + { + "epoch": 1.18, + "learning_rate": 3.822790715976099e-05, + "loss": 0.2674, + "step": 209500 + }, + { + "epoch": 1.18, + "learning_rate": 3.822228592949853e-05, + "loss": 0.2769, + "step": 209600 + }, + { + "epoch": 1.18, + "learning_rate": 3.821666469923608e-05, + "loss": 0.2698, + "step": 209700 + }, + { + "epoch": 1.18, + "learning_rate": 3.821104346897362e-05, + "loss": 0.2708, + "step": 209800 + }, + { + "epoch": 1.18, + "learning_rate": 3.820542223871116e-05, + "loss": 0.2769, + "step": 209900 + }, + { + "epoch": 1.18, + "learning_rate": 3.819980100844871e-05, + "loss": 0.2785, + "step": 210000 + }, + { + "epoch": 1.18, + "learning_rate": 3.819423599048888e-05, + "loss": 0.2744, + "step": 210100 + }, + { + "epoch": 1.18, + "learning_rate": 3.818861476022643e-05, + "loss": 0.2811, + "step": 210200 + }, + { + "epoch": 1.18, + "learning_rate": 3.818299352996397e-05, + "loss": 0.274, + "step": 210300 + }, + { + "epoch": 1.18, + "learning_rate": 3.8177372299701515e-05, + "loss": 0.2709, + "step": 210400 + }, + { + "epoch": 1.18, + "learning_rate": 3.817175106943906e-05, + "loss": 0.2778, + "step": 210500 + }, + { + "epoch": 1.18, + "learning_rate": 3.816612983917661e-05, + "loss": 0.2695, + "step": 210600 + }, + { + "epoch": 1.18, + "learning_rate": 3.816050860891415e-05, + "loss": 0.2703, + "step": 210700 + }, + { + "epoch": 1.18, + "learning_rate": 3.8154887378651694e-05, + "loss": 0.2696, + "step": 210800 + }, + { + "epoch": 1.19, + "learning_rate": 3.814926614838924e-05, + "loss": 0.2696, + "step": 210900 + }, + { + "epoch": 1.19, + "learning_rate": 3.814364491812678e-05, + "loss": 0.2717, + "step": 211000 + }, + { + "epoch": 1.19, + "learning_rate": 3.8138023687864326e-05, + "loss": 0.2759, + "step": 211100 + }, + { + "epoch": 1.19, + "learning_rate": 3.813240245760187e-05, + "loss": 0.2667, + "step": 211200 + }, + { + "epoch": 1.19, + "learning_rate": 3.812678122733942e-05, + "loss": 0.2674, + "step": 211300 + }, + { + "epoch": 1.19, + "learning_rate": 3.812115999707696e-05, + "loss": 0.2709, + "step": 211400 + }, + { + "epoch": 1.19, + "learning_rate": 3.8115538766814504e-05, + "loss": 0.2684, + "step": 211500 + }, + { + "epoch": 1.19, + "learning_rate": 3.810991753655205e-05, + "loss": 0.2744, + "step": 211600 + }, + { + "epoch": 1.19, + "learning_rate": 3.81042963062896e-05, + "loss": 0.2756, + "step": 211700 + }, + { + "epoch": 1.19, + "learning_rate": 3.809867507602714e-05, + "loss": 0.2722, + "step": 211800 + }, + { + "epoch": 1.19, + "learning_rate": 3.809305384576469e-05, + "loss": 0.2725, + "step": 211900 + }, + { + "epoch": 1.19, + "learning_rate": 3.8087432615502236e-05, + "loss": 0.2755, + "step": 212000 + }, + { + "epoch": 1.19, + "learning_rate": 3.8081811385239775e-05, + "loss": 0.2751, + "step": 212100 + }, + { + "epoch": 1.19, + "learning_rate": 3.807619015497732e-05, + "loss": 0.2702, + "step": 212200 + }, + { + "epoch": 1.19, + "learning_rate": 3.807056892471487e-05, + "loss": 0.2659, + "step": 212300 + }, + { + "epoch": 1.19, + "learning_rate": 3.806494769445241e-05, + "loss": 0.2724, + "step": 212400 + }, + { + "epoch": 1.19, + "learning_rate": 3.805932646418995e-05, + "loss": 0.2653, + "step": 212500 + }, + { + "epoch": 1.2, + "learning_rate": 3.80537052339275e-05, + "loss": 0.2696, + "step": 212600 + }, + { + "epoch": 1.2, + "learning_rate": 3.8048084003665046e-05, + "loss": 0.2748, + "step": 212700 + }, + { + "epoch": 1.2, + "learning_rate": 3.8042462773402585e-05, + "loss": 0.2761, + "step": 212800 + }, + { + "epoch": 1.2, + "learning_rate": 3.803684154314013e-05, + "loss": 0.2695, + "step": 212900 + }, + { + "epoch": 1.2, + "learning_rate": 3.803122031287768e-05, + "loss": 0.2775, + "step": 213000 + }, + { + "epoch": 1.2, + "learning_rate": 3.8025599082615224e-05, + "loss": 0.2709, + "step": 213100 + }, + { + "epoch": 1.2, + "learning_rate": 3.801997785235277e-05, + "loss": 0.2661, + "step": 213200 + }, + { + "epoch": 1.2, + "learning_rate": 3.801435662209032e-05, + "loss": 0.2677, + "step": 213300 + }, + { + "epoch": 1.2, + "learning_rate": 3.800873539182786e-05, + "loss": 0.2683, + "step": 213400 + }, + { + "epoch": 1.2, + "learning_rate": 3.80031141615654e-05, + "loss": 0.2767, + "step": 213500 + }, + { + "epoch": 1.2, + "learning_rate": 3.799749293130295e-05, + "loss": 0.2723, + "step": 213600 + }, + { + "epoch": 1.2, + "learning_rate": 3.7991871701040495e-05, + "loss": 0.28, + "step": 213700 + }, + { + "epoch": 1.2, + "learning_rate": 3.7986250470778035e-05, + "loss": 0.2732, + "step": 213800 + }, + { + "epoch": 1.2, + "learning_rate": 3.798062924051558e-05, + "loss": 0.2735, + "step": 213900 + }, + { + "epoch": 1.2, + "learning_rate": 3.797500801025313e-05, + "loss": 0.2653, + "step": 214000 + }, + { + "epoch": 1.2, + "learning_rate": 3.7969386779990673e-05, + "loss": 0.2734, + "step": 214100 + }, + { + "epoch": 1.2, + "learning_rate": 3.796376554972821e-05, + "loss": 0.2723, + "step": 214200 + }, + { + "epoch": 1.2, + "learning_rate": 3.795814431946576e-05, + "loss": 0.28, + "step": 214300 + }, + { + "epoch": 1.21, + "learning_rate": 3.7952523089203305e-05, + "loss": 0.2746, + "step": 214400 + }, + { + "epoch": 1.21, + "learning_rate": 3.7946901858940845e-05, + "loss": 0.2669, + "step": 214500 + }, + { + "epoch": 1.21, + "learning_rate": 3.794128062867839e-05, + "loss": 0.2753, + "step": 214600 + }, + { + "epoch": 1.21, + "learning_rate": 3.793565939841594e-05, + "loss": 0.2681, + "step": 214700 + }, + { + "epoch": 1.21, + "learning_rate": 3.7930038168153484e-05, + "loss": 0.2693, + "step": 214800 + }, + { + "epoch": 1.21, + "learning_rate": 3.792441693789103e-05, + "loss": 0.2739, + "step": 214900 + }, + { + "epoch": 1.21, + "learning_rate": 3.7918795707628576e-05, + "loss": 0.2698, + "step": 215000 + }, + { + "epoch": 1.21, + "learning_rate": 3.791317447736612e-05, + "loss": 0.2755, + "step": 215100 + }, + { + "epoch": 1.21, + "learning_rate": 3.790755324710366e-05, + "loss": 0.2746, + "step": 215200 + }, + { + "epoch": 1.21, + "learning_rate": 3.790193201684121e-05, + "loss": 0.2719, + "step": 215300 + }, + { + "epoch": 1.21, + "learning_rate": 3.7896310786578755e-05, + "loss": 0.2796, + "step": 215400 + }, + { + "epoch": 1.21, + "learning_rate": 3.7890689556316294e-05, + "loss": 0.2741, + "step": 215500 + }, + { + "epoch": 1.21, + "learning_rate": 3.788506832605384e-05, + "loss": 0.2684, + "step": 215600 + }, + { + "epoch": 1.21, + "learning_rate": 3.787944709579139e-05, + "loss": 0.2702, + "step": 215700 + }, + { + "epoch": 1.21, + "learning_rate": 3.787382586552893e-05, + "loss": 0.2658, + "step": 215800 + }, + { + "epoch": 1.21, + "learning_rate": 3.786820463526647e-05, + "loss": 0.2768, + "step": 215900 + }, + { + "epoch": 1.21, + "learning_rate": 3.786258340500402e-05, + "loss": 0.2756, + "step": 216000 + }, + { + "epoch": 1.21, + "learning_rate": 3.7856962174741565e-05, + "loss": 0.276, + "step": 216100 + }, + { + "epoch": 1.22, + "learning_rate": 3.7851340944479105e-05, + "loss": 0.2694, + "step": 216200 + }, + { + "epoch": 1.22, + "learning_rate": 3.784571971421665e-05, + "loss": 0.2781, + "step": 216300 + }, + { + "epoch": 1.22, + "learning_rate": 3.78400984839542e-05, + "loss": 0.2668, + "step": 216400 + }, + { + "epoch": 1.22, + "learning_rate": 3.783453346599437e-05, + "loss": 0.2741, + "step": 216500 + }, + { + "epoch": 1.22, + "learning_rate": 3.782891223573191e-05, + "loss": 0.2682, + "step": 216600 + }, + { + "epoch": 1.22, + "learning_rate": 3.782329100546946e-05, + "loss": 0.273, + "step": 216700 + }, + { + "epoch": 1.22, + "learning_rate": 3.7817669775207004e-05, + "loss": 0.2694, + "step": 216800 + }, + { + "epoch": 1.22, + "learning_rate": 3.781204854494455e-05, + "loss": 0.2721, + "step": 216900 + }, + { + "epoch": 1.22, + "learning_rate": 3.780642731468209e-05, + "loss": 0.2658, + "step": 217000 + }, + { + "epoch": 1.22, + "learning_rate": 3.7800862296722264e-05, + "loss": 0.2699, + "step": 217100 + }, + { + "epoch": 1.22, + "learning_rate": 3.779524106645981e-05, + "loss": 0.266, + "step": 217200 + }, + { + "epoch": 1.22, + "learning_rate": 3.7789619836197356e-05, + "loss": 0.2701, + "step": 217300 + }, + { + "epoch": 1.22, + "learning_rate": 3.7783998605934896e-05, + "loss": 0.2733, + "step": 217400 + }, + { + "epoch": 1.22, + "learning_rate": 3.777837737567244e-05, + "loss": 0.2717, + "step": 217500 + }, + { + "epoch": 1.22, + "learning_rate": 3.777275614540999e-05, + "loss": 0.2775, + "step": 217600 + }, + { + "epoch": 1.22, + "learning_rate": 3.776713491514753e-05, + "loss": 0.2774, + "step": 217700 + }, + { + "epoch": 1.22, + "learning_rate": 3.7761513684885074e-05, + "loss": 0.271, + "step": 217800 + }, + { + "epoch": 1.22, + "learning_rate": 3.775589245462262e-05, + "loss": 0.2728, + "step": 217900 + }, + { + "epoch": 1.23, + "learning_rate": 3.775027122436017e-05, + "loss": 0.2701, + "step": 218000 + }, + { + "epoch": 1.23, + "learning_rate": 3.7744649994097706e-05, + "loss": 0.2731, + "step": 218100 + }, + { + "epoch": 1.23, + "learning_rate": 3.773902876383525e-05, + "loss": 0.2674, + "step": 218200 + }, + { + "epoch": 1.23, + "learning_rate": 3.77334075335728e-05, + "loss": 0.2681, + "step": 218300 + }, + { + "epoch": 1.23, + "learning_rate": 3.7727786303310345e-05, + "loss": 0.2729, + "step": 218400 + }, + { + "epoch": 1.23, + "learning_rate": 3.772216507304789e-05, + "loss": 0.2779, + "step": 218500 + }, + { + "epoch": 1.23, + "learning_rate": 3.771654384278544e-05, + "loss": 0.268, + "step": 218600 + }, + { + "epoch": 1.23, + "learning_rate": 3.7710922612522984e-05, + "loss": 0.2738, + "step": 218700 + }, + { + "epoch": 1.23, + "learning_rate": 3.7705301382260523e-05, + "loss": 0.2645, + "step": 218800 + }, + { + "epoch": 1.23, + "learning_rate": 3.769968015199807e-05, + "loss": 0.2741, + "step": 218900 + }, + { + "epoch": 1.23, + "learning_rate": 3.7694058921735616e-05, + "loss": 0.2652, + "step": 219000 + }, + { + "epoch": 1.23, + "learning_rate": 3.7688493903775784e-05, + "loss": 0.2736, + "step": 219100 + }, + { + "epoch": 1.23, + "learning_rate": 3.768287267351332e-05, + "loss": 0.2659, + "step": 219200 + }, + { + "epoch": 1.23, + "learning_rate": 3.767725144325087e-05, + "loss": 0.2686, + "step": 219300 + }, + { + "epoch": 1.23, + "learning_rate": 3.7671630212988416e-05, + "loss": 0.2737, + "step": 219400 + }, + { + "epoch": 1.23, + "learning_rate": 3.766600898272596e-05, + "loss": 0.262, + "step": 219500 + }, + { + "epoch": 1.23, + "learning_rate": 3.766038775246351e-05, + "loss": 0.2702, + "step": 219600 + }, + { + "epoch": 1.23, + "learning_rate": 3.7654766522201054e-05, + "loss": 0.2677, + "step": 219700 + }, + { + "epoch": 1.24, + "learning_rate": 3.76491452919386e-05, + "loss": 0.2695, + "step": 219800 + }, + { + "epoch": 1.24, + "learning_rate": 3.764352406167614e-05, + "loss": 0.2674, + "step": 219900 + }, + { + "epoch": 1.24, + "learning_rate": 3.7637902831413687e-05, + "loss": 0.2704, + "step": 220000 + }, + { + "epoch": 1.24, + "learning_rate": 3.763228160115123e-05, + "loss": 0.2725, + "step": 220100 + }, + { + "epoch": 1.24, + "learning_rate": 3.762666037088877e-05, + "loss": 0.2683, + "step": 220200 + }, + { + "epoch": 1.24, + "learning_rate": 3.762103914062632e-05, + "loss": 0.2773, + "step": 220300 + }, + { + "epoch": 1.24, + "learning_rate": 3.7615417910363865e-05, + "loss": 0.268, + "step": 220400 + }, + { + "epoch": 1.24, + "learning_rate": 3.760979668010141e-05, + "loss": 0.27, + "step": 220500 + }, + { + "epoch": 1.24, + "learning_rate": 3.760417544983895e-05, + "loss": 0.28, + "step": 220600 + }, + { + "epoch": 1.24, + "learning_rate": 3.75985542195765e-05, + "loss": 0.262, + "step": 220700 + }, + { + "epoch": 1.24, + "learning_rate": 3.759298920161667e-05, + "loss": 0.2692, + "step": 220800 + }, + { + "epoch": 1.24, + "learning_rate": 3.758736797135422e-05, + "loss": 0.2713, + "step": 220900 + }, + { + "epoch": 1.24, + "learning_rate": 3.758174674109176e-05, + "loss": 0.2637, + "step": 221000 + }, + { + "epoch": 1.24, + "learning_rate": 3.75761255108293e-05, + "loss": 0.273, + "step": 221100 + }, + { + "epoch": 1.24, + "learning_rate": 3.757050428056685e-05, + "loss": 0.2671, + "step": 221200 + }, + { + "epoch": 1.24, + "learning_rate": 3.756488305030439e-05, + "loss": 0.2687, + "step": 221300 + }, + { + "epoch": 1.24, + "learning_rate": 3.7559261820041935e-05, + "loss": 0.2684, + "step": 221400 + }, + { + "epoch": 1.25, + "learning_rate": 3.755364058977948e-05, + "loss": 0.2631, + "step": 221500 + }, + { + "epoch": 1.25, + "learning_rate": 3.754801935951703e-05, + "loss": 0.2704, + "step": 221600 + }, + { + "epoch": 1.25, + "learning_rate": 3.754239812925457e-05, + "loss": 0.2691, + "step": 221700 + }, + { + "epoch": 1.25, + "learning_rate": 3.7536776898992114e-05, + "loss": 0.2632, + "step": 221800 + }, + { + "epoch": 1.25, + "learning_rate": 3.753115566872966e-05, + "loss": 0.2665, + "step": 221900 + }, + { + "epoch": 1.25, + "learning_rate": 3.75255344384672e-05, + "loss": 0.2711, + "step": 222000 + }, + { + "epoch": 1.25, + "learning_rate": 3.7519913208204746e-05, + "loss": 0.2665, + "step": 222100 + }, + { + "epoch": 1.25, + "learning_rate": 3.751429197794229e-05, + "loss": 0.2712, + "step": 222200 + }, + { + "epoch": 1.25, + "learning_rate": 3.750867074767984e-05, + "loss": 0.2683, + "step": 222300 + }, + { + "epoch": 1.25, + "learning_rate": 3.7503049517417385e-05, + "loss": 0.2734, + "step": 222400 + }, + { + "epoch": 1.25, + "learning_rate": 3.749742828715493e-05, + "loss": 0.2728, + "step": 222500 + }, + { + "epoch": 1.25, + "learning_rate": 3.749180705689248e-05, + "loss": 0.2735, + "step": 222600 + }, + { + "epoch": 1.25, + "learning_rate": 3.748618582663002e-05, + "loss": 0.2708, + "step": 222700 + }, + { + "epoch": 1.25, + "learning_rate": 3.748056459636756e-05, + "loss": 0.2682, + "step": 222800 + }, + { + "epoch": 1.25, + "learning_rate": 3.747494336610511e-05, + "loss": 0.2717, + "step": 222900 + }, + { + "epoch": 1.25, + "learning_rate": 3.7469322135842656e-05, + "loss": 0.2626, + "step": 223000 + }, + { + "epoch": 1.25, + "learning_rate": 3.7463700905580195e-05, + "loss": 0.2743, + "step": 223100 + }, + { + "epoch": 1.25, + "learning_rate": 3.745807967531774e-05, + "loss": 0.2618, + "step": 223200 + }, + { + "epoch": 1.26, + "learning_rate": 3.745245844505529e-05, + "loss": 0.2702, + "step": 223300 + }, + { + "epoch": 1.26, + "learning_rate": 3.744683721479283e-05, + "loss": 0.2709, + "step": 223400 + }, + { + "epoch": 1.26, + "learning_rate": 3.744121598453037e-05, + "loss": 0.2699, + "step": 223500 + }, + { + "epoch": 1.26, + "learning_rate": 3.743565096657055e-05, + "loss": 0.2732, + "step": 223600 + }, + { + "epoch": 1.26, + "learning_rate": 3.7430029736308094e-05, + "loss": 0.2628, + "step": 223700 + }, + { + "epoch": 1.26, + "learning_rate": 3.7424408506045634e-05, + "loss": 0.2651, + "step": 223800 + }, + { + "epoch": 1.26, + "learning_rate": 3.741878727578318e-05, + "loss": 0.2756, + "step": 223900 + }, + { + "epoch": 1.26, + "learning_rate": 3.7413166045520726e-05, + "loss": 0.264, + "step": 224000 + }, + { + "epoch": 1.26, + "learning_rate": 3.740754481525827e-05, + "loss": 0.2691, + "step": 224100 + }, + { + "epoch": 1.26, + "learning_rate": 3.740192358499581e-05, + "loss": 0.2731, + "step": 224200 + }, + { + "epoch": 1.26, + "learning_rate": 3.739630235473336e-05, + "loss": 0.261, + "step": 224300 + }, + { + "epoch": 1.26, + "learning_rate": 3.7390681124470904e-05, + "loss": 0.2745, + "step": 224400 + }, + { + "epoch": 1.26, + "learning_rate": 3.7385059894208444e-05, + "loss": 0.2701, + "step": 224500 + }, + { + "epoch": 1.26, + "learning_rate": 3.737943866394599e-05, + "loss": 0.2732, + "step": 224600 + }, + { + "epoch": 1.26, + "learning_rate": 3.7373817433683536e-05, + "loss": 0.2617, + "step": 224700 + }, + { + "epoch": 1.26, + "learning_rate": 3.736819620342108e-05, + "loss": 0.273, + "step": 224800 + }, + { + "epoch": 1.26, + "learning_rate": 3.736257497315863e-05, + "loss": 0.2622, + "step": 224900 + }, + { + "epoch": 1.26, + "learning_rate": 3.7356953742896175e-05, + "loss": 0.2659, + "step": 225000 + }, + { + "epoch": 1.27, + "learning_rate": 3.735133251263372e-05, + "loss": 0.2657, + "step": 225100 + }, + { + "epoch": 1.27, + "learning_rate": 3.734571128237126e-05, + "loss": 0.2657, + "step": 225200 + }, + { + "epoch": 1.27, + "learning_rate": 3.734009005210881e-05, + "loss": 0.2721, + "step": 225300 + }, + { + "epoch": 1.27, + "learning_rate": 3.7334468821846354e-05, + "loss": 0.2735, + "step": 225400 + }, + { + "epoch": 1.27, + "learning_rate": 3.732890380388652e-05, + "loss": 0.2671, + "step": 225500 + }, + { + "epoch": 1.27, + "learning_rate": 3.732328257362406e-05, + "loss": 0.2641, + "step": 225600 + }, + { + "epoch": 1.27, + "learning_rate": 3.731766134336161e-05, + "loss": 0.2643, + "step": 225700 + }, + { + "epoch": 1.27, + "learning_rate": 3.731204011309915e-05, + "loss": 0.2714, + "step": 225800 + }, + { + "epoch": 1.27, + "learning_rate": 3.73064188828367e-05, + "loss": 0.2706, + "step": 225900 + }, + { + "epoch": 1.27, + "learning_rate": 3.7300797652574246e-05, + "loss": 0.2683, + "step": 226000 + }, + { + "epoch": 1.27, + "learning_rate": 3.729517642231179e-05, + "loss": 0.2758, + "step": 226100 + }, + { + "epoch": 1.27, + "learning_rate": 3.728955519204934e-05, + "loss": 0.272, + "step": 226200 + }, + { + "epoch": 1.27, + "learning_rate": 3.728393396178688e-05, + "loss": 0.2739, + "step": 226300 + }, + { + "epoch": 1.27, + "learning_rate": 3.7278312731524424e-05, + "loss": 0.2648, + "step": 226400 + }, + { + "epoch": 1.27, + "learning_rate": 3.727269150126197e-05, + "loss": 0.269, + "step": 226500 + }, + { + "epoch": 1.27, + "learning_rate": 3.726707027099951e-05, + "loss": 0.2596, + "step": 226600 + }, + { + "epoch": 1.27, + "learning_rate": 3.7261449040737056e-05, + "loss": 0.2685, + "step": 226700 + }, + { + "epoch": 1.27, + "learning_rate": 3.72558278104746e-05, + "loss": 0.2625, + "step": 226800 + }, + { + "epoch": 1.28, + "learning_rate": 3.725020658021215e-05, + "loss": 0.2683, + "step": 226900 + }, + { + "epoch": 1.28, + "learning_rate": 3.724458534994969e-05, + "loss": 0.2687, + "step": 227000 + }, + { + "epoch": 1.28, + "learning_rate": 3.7238964119687235e-05, + "loss": 0.2635, + "step": 227100 + }, + { + "epoch": 1.28, + "learning_rate": 3.723334288942478e-05, + "loss": 0.2708, + "step": 227200 + }, + { + "epoch": 1.28, + "learning_rate": 3.722772165916232e-05, + "loss": 0.2696, + "step": 227300 + }, + { + "epoch": 1.28, + "learning_rate": 3.722210042889987e-05, + "loss": 0.2631, + "step": 227400 + }, + { + "epoch": 1.28, + "learning_rate": 3.721647919863741e-05, + "loss": 0.2661, + "step": 227500 + }, + { + "epoch": 1.28, + "learning_rate": 3.721085796837496e-05, + "loss": 0.2724, + "step": 227600 + }, + { + "epoch": 1.28, + "learning_rate": 3.7205236738112506e-05, + "loss": 0.2718, + "step": 227700 + }, + { + "epoch": 1.28, + "learning_rate": 3.719961550785005e-05, + "loss": 0.2676, + "step": 227800 + }, + { + "epoch": 1.28, + "learning_rate": 3.71939942775876e-05, + "loss": 0.2719, + "step": 227900 + }, + { + "epoch": 1.28, + "learning_rate": 3.718837304732514e-05, + "loss": 0.2727, + "step": 228000 + }, + { + "epoch": 1.28, + "learning_rate": 3.7182751817062684e-05, + "loss": 0.2648, + "step": 228100 + }, + { + "epoch": 1.28, + "learning_rate": 3.717713058680023e-05, + "loss": 0.2683, + "step": 228200 + }, + { + "epoch": 1.28, + "learning_rate": 3.7171509356537776e-05, + "loss": 0.2627, + "step": 228300 + }, + { + "epoch": 1.28, + "learning_rate": 3.7165888126275316e-05, + "loss": 0.2649, + "step": 228400 + }, + { + "epoch": 1.28, + "learning_rate": 3.716026689601286e-05, + "loss": 0.2669, + "step": 228500 + }, + { + "epoch": 1.29, + "learning_rate": 3.715464566575041e-05, + "loss": 0.2648, + "step": 228600 + }, + { + "epoch": 1.29, + "learning_rate": 3.714902443548795e-05, + "loss": 0.2687, + "step": 228700 + }, + { + "epoch": 1.29, + "learning_rate": 3.7143403205225494e-05, + "loss": 0.2653, + "step": 228800 + }, + { + "epoch": 1.29, + "learning_rate": 3.713778197496304e-05, + "loss": 0.2689, + "step": 228900 + }, + { + "epoch": 1.29, + "learning_rate": 3.713216074470059e-05, + "loss": 0.2677, + "step": 229000 + }, + { + "epoch": 1.29, + "learning_rate": 3.712653951443813e-05, + "loss": 0.2703, + "step": 229100 + }, + { + "epoch": 1.29, + "learning_rate": 3.712091828417568e-05, + "loss": 0.2671, + "step": 229200 + }, + { + "epoch": 1.29, + "learning_rate": 3.7115297053913226e-05, + "loss": 0.2701, + "step": 229300 + }, + { + "epoch": 1.29, + "learning_rate": 3.7109675823650765e-05, + "loss": 0.2696, + "step": 229400 + }, + { + "epoch": 1.29, + "learning_rate": 3.710405459338831e-05, + "loss": 0.2633, + "step": 229500 + }, + { + "epoch": 1.29, + "learning_rate": 3.709843336312586e-05, + "loss": 0.2724, + "step": 229600 + }, + { + "epoch": 1.29, + "learning_rate": 3.7092812132863404e-05, + "loss": 0.2603, + "step": 229700 + }, + { + "epoch": 1.29, + "learning_rate": 3.7087190902600943e-05, + "loss": 0.2637, + "step": 229800 + }, + { + "epoch": 1.29, + "learning_rate": 3.708156967233849e-05, + "loss": 0.2704, + "step": 229900 + }, + { + "epoch": 1.29, + "learning_rate": 3.7075948442076036e-05, + "loss": 0.259, + "step": 230000 + }, + { + "epoch": 1.29, + "learning_rate": 3.7070327211813575e-05, + "loss": 0.2741, + "step": 230100 + }, + { + "epoch": 1.29, + "learning_rate": 3.706470598155112e-05, + "loss": 0.2771, + "step": 230200 + }, + { + "epoch": 1.29, + "learning_rate": 3.705908475128867e-05, + "loss": 0.2673, + "step": 230300 + }, + { + "epoch": 1.3, + "learning_rate": 3.7053463521026214e-05, + "loss": 0.2692, + "step": 230400 + }, + { + "epoch": 1.3, + "learning_rate": 3.7047842290763754e-05, + "loss": 0.2708, + "step": 230500 + }, + { + "epoch": 1.3, + "learning_rate": 3.70422210605013e-05, + "loss": 0.2718, + "step": 230600 + }, + { + "epoch": 1.3, + "learning_rate": 3.7036599830238846e-05, + "loss": 0.2695, + "step": 230700 + }, + { + "epoch": 1.3, + "learning_rate": 3.703097859997639e-05, + "loss": 0.2619, + "step": 230800 + }, + { + "epoch": 1.3, + "learning_rate": 3.702535736971394e-05, + "loss": 0.2672, + "step": 230900 + }, + { + "epoch": 1.3, + "learning_rate": 3.7019736139451485e-05, + "loss": 0.2721, + "step": 231000 + }, + { + "epoch": 1.3, + "learning_rate": 3.701411490918903e-05, + "loss": 0.2647, + "step": 231100 + }, + { + "epoch": 1.3, + "learning_rate": 3.700849367892657e-05, + "loss": 0.2674, + "step": 231200 + }, + { + "epoch": 1.3, + "learning_rate": 3.700287244866412e-05, + "loss": 0.2685, + "step": 231300 + }, + { + "epoch": 1.3, + "learning_rate": 3.6997251218401664e-05, + "loss": 0.2663, + "step": 231400 + }, + { + "epoch": 1.3, + "learning_rate": 3.69916299881392e-05, + "loss": 0.2666, + "step": 231500 + }, + { + "epoch": 1.3, + "learning_rate": 3.698600875787675e-05, + "loss": 0.2675, + "step": 231600 + }, + { + "epoch": 1.3, + "learning_rate": 3.6980387527614296e-05, + "loss": 0.2645, + "step": 231700 + }, + { + "epoch": 1.3, + "learning_rate": 3.697476629735184e-05, + "loss": 0.263, + "step": 231800 + }, + { + "epoch": 1.3, + "learning_rate": 3.696914506708938e-05, + "loss": 0.2688, + "step": 231900 + }, + { + "epoch": 1.3, + "learning_rate": 3.696352383682693e-05, + "loss": 0.2603, + "step": 232000 + }, + { + "epoch": 1.3, + "learning_rate": 3.6957902606564474e-05, + "loss": 0.2594, + "step": 232100 + }, + { + "epoch": 1.31, + "learning_rate": 3.695228137630202e-05, + "loss": 0.2651, + "step": 232200 + }, + { + "epoch": 1.31, + "learning_rate": 3.6946660146039566e-05, + "loss": 0.2709, + "step": 232300 + }, + { + "epoch": 1.31, + "learning_rate": 3.694103891577711e-05, + "loss": 0.2628, + "step": 232400 + }, + { + "epoch": 1.31, + "learning_rate": 3.693541768551466e-05, + "loss": 0.2658, + "step": 232500 + }, + { + "epoch": 1.31, + "learning_rate": 3.69297964552522e-05, + "loss": 0.2659, + "step": 232600 + }, + { + "epoch": 1.31, + "learning_rate": 3.6924175224989745e-05, + "loss": 0.2706, + "step": 232700 + }, + { + "epoch": 1.31, + "learning_rate": 3.691855399472729e-05, + "loss": 0.2716, + "step": 232800 + }, + { + "epoch": 1.31, + "learning_rate": 3.691293276446483e-05, + "loss": 0.2725, + "step": 232900 + }, + { + "epoch": 1.31, + "learning_rate": 3.690731153420238e-05, + "loss": 0.2668, + "step": 233000 + }, + { + "epoch": 1.31, + "learning_rate": 3.690169030393992e-05, + "loss": 0.2674, + "step": 233100 + }, + { + "epoch": 1.31, + "learning_rate": 3.689606907367747e-05, + "loss": 0.2656, + "step": 233200 + }, + { + "epoch": 1.31, + "learning_rate": 3.689050405571764e-05, + "loss": 0.2592, + "step": 233300 + }, + { + "epoch": 1.31, + "learning_rate": 3.688488282545518e-05, + "loss": 0.2704, + "step": 233400 + }, + { + "epoch": 1.31, + "learning_rate": 3.687926159519273e-05, + "loss": 0.2711, + "step": 233500 + }, + { + "epoch": 1.31, + "learning_rate": 3.6873640364930276e-05, + "loss": 0.2644, + "step": 233600 + }, + { + "epoch": 1.31, + "learning_rate": 3.6868019134667815e-05, + "loss": 0.2566, + "step": 233700 + }, + { + "epoch": 1.31, + "learning_rate": 3.686239790440536e-05, + "loss": 0.2709, + "step": 233800 + }, + { + "epoch": 1.31, + "learning_rate": 3.685677667414291e-05, + "loss": 0.2674, + "step": 233900 + }, + { + "epoch": 1.32, + "learning_rate": 3.685115544388045e-05, + "loss": 0.2683, + "step": 234000 + }, + { + "epoch": 1.32, + "learning_rate": 3.6845534213617994e-05, + "loss": 0.2678, + "step": 234100 + }, + { + "epoch": 1.32, + "learning_rate": 3.683991298335554e-05, + "loss": 0.2633, + "step": 234200 + }, + { + "epoch": 1.32, + "learning_rate": 3.6834291753093086e-05, + "loss": 0.2572, + "step": 234300 + }, + { + "epoch": 1.32, + "learning_rate": 3.6828670522830626e-05, + "loss": 0.2596, + "step": 234400 + }, + { + "epoch": 1.32, + "learning_rate": 3.682304929256817e-05, + "loss": 0.2708, + "step": 234500 + }, + { + "epoch": 1.32, + "learning_rate": 3.681742806230572e-05, + "loss": 0.2639, + "step": 234600 + }, + { + "epoch": 1.32, + "learning_rate": 3.681180683204326e-05, + "loss": 0.2723, + "step": 234700 + }, + { + "epoch": 1.32, + "learning_rate": 3.680624181408343e-05, + "loss": 0.2648, + "step": 234800 + }, + { + "epoch": 1.32, + "learning_rate": 3.680062058382098e-05, + "loss": 0.2674, + "step": 234900 + }, + { + "epoch": 1.32, + "learning_rate": 3.6794999353558525e-05, + "loss": 0.2689, + "step": 235000 + }, + { + "epoch": 1.32, + "learning_rate": 3.6789378123296064e-05, + "loss": 0.2667, + "step": 235100 + }, + { + "epoch": 1.32, + "learning_rate": 3.678375689303361e-05, + "loss": 0.2716, + "step": 235200 + }, + { + "epoch": 1.32, + "learning_rate": 3.677813566277116e-05, + "loss": 0.261, + "step": 235300 + }, + { + "epoch": 1.32, + "learning_rate": 3.6772514432508696e-05, + "loss": 0.2671, + "step": 235400 + }, + { + "epoch": 1.32, + "learning_rate": 3.676689320224624e-05, + "loss": 0.264, + "step": 235500 + }, + { + "epoch": 1.32, + "learning_rate": 3.676127197198379e-05, + "loss": 0.2676, + "step": 235600 + }, + { + "epoch": 1.32, + "learning_rate": 3.6755650741721335e-05, + "loss": 0.2671, + "step": 235700 + }, + { + "epoch": 1.33, + "learning_rate": 3.6750029511458875e-05, + "loss": 0.2653, + "step": 235800 + }, + { + "epoch": 1.33, + "learning_rate": 3.674440828119642e-05, + "loss": 0.2624, + "step": 235900 + }, + { + "epoch": 1.33, + "learning_rate": 3.673878705093397e-05, + "loss": 0.2727, + "step": 236000 + }, + { + "epoch": 1.33, + "learning_rate": 3.6733165820671514e-05, + "loss": 0.2706, + "step": 236100 + }, + { + "epoch": 1.33, + "learning_rate": 3.672754459040906e-05, + "loss": 0.2691, + "step": 236200 + }, + { + "epoch": 1.33, + "learning_rate": 3.6721923360146606e-05, + "loss": 0.2686, + "step": 236300 + }, + { + "epoch": 1.33, + "learning_rate": 3.671630212988415e-05, + "loss": 0.2714, + "step": 236400 + }, + { + "epoch": 1.33, + "learning_rate": 3.671068089962169e-05, + "loss": 0.2612, + "step": 236500 + }, + { + "epoch": 1.33, + "learning_rate": 3.670505966935924e-05, + "loss": 0.2621, + "step": 236600 + }, + { + "epoch": 1.33, + "learning_rate": 3.6699438439096784e-05, + "loss": 0.2714, + "step": 236700 + }, + { + "epoch": 1.33, + "learning_rate": 3.6693817208834324e-05, + "loss": 0.259, + "step": 236800 + }, + { + "epoch": 1.33, + "learning_rate": 3.668819597857187e-05, + "loss": 0.2591, + "step": 236900 + }, + { + "epoch": 1.33, + "learning_rate": 3.6682574748309416e-05, + "loss": 0.2652, + "step": 237000 + }, + { + "epoch": 1.33, + "learning_rate": 3.667695351804696e-05, + "loss": 0.2612, + "step": 237100 + }, + { + "epoch": 1.33, + "learning_rate": 3.66713322877845e-05, + "loss": 0.2757, + "step": 237200 + }, + { + "epoch": 1.33, + "learning_rate": 3.666571105752205e-05, + "loss": 0.2678, + "step": 237300 + }, + { + "epoch": 1.33, + "learning_rate": 3.6660089827259595e-05, + "loss": 0.2704, + "step": 237400 + }, + { + "epoch": 1.34, + "learning_rate": 3.665446859699714e-05, + "loss": 0.2663, + "step": 237500 + }, + { + "epoch": 1.34, + "learning_rate": 3.664884736673469e-05, + "loss": 0.2609, + "step": 237600 + }, + { + "epoch": 1.34, + "learning_rate": 3.6643226136472234e-05, + "loss": 0.2643, + "step": 237700 + }, + { + "epoch": 1.34, + "learning_rate": 3.663760490620978e-05, + "loss": 0.2748, + "step": 237800 + }, + { + "epoch": 1.34, + "learning_rate": 3.663198367594732e-05, + "loss": 0.2551, + "step": 237900 + }, + { + "epoch": 1.34, + "learning_rate": 3.6626362445684866e-05, + "loss": 0.2671, + "step": 238000 + }, + { + "epoch": 1.34, + "learning_rate": 3.662074121542241e-05, + "loss": 0.2663, + "step": 238100 + }, + { + "epoch": 1.34, + "learning_rate": 3.661511998515995e-05, + "loss": 0.2624, + "step": 238200 + }, + { + "epoch": 1.34, + "learning_rate": 3.66094987548975e-05, + "loss": 0.2627, + "step": 238300 + }, + { + "epoch": 1.34, + "learning_rate": 3.6603877524635044e-05, + "loss": 0.2679, + "step": 238400 + }, + { + "epoch": 1.34, + "learning_rate": 3.659825629437259e-05, + "loss": 0.267, + "step": 238500 + }, + { + "epoch": 1.34, + "learning_rate": 3.659263506411013e-05, + "loss": 0.2641, + "step": 238600 + }, + { + "epoch": 1.34, + "learning_rate": 3.6587013833847676e-05, + "loss": 0.2673, + "step": 238700 + }, + { + "epoch": 1.34, + "learning_rate": 3.658139260358522e-05, + "loss": 0.267, + "step": 238800 + }, + { + "epoch": 1.34, + "learning_rate": 3.657577137332276e-05, + "loss": 0.2596, + "step": 238900 + }, + { + "epoch": 1.34, + "learning_rate": 3.657015014306031e-05, + "loss": 0.2659, + "step": 239000 + }, + { + "epoch": 1.34, + "learning_rate": 3.6564528912797854e-05, + "loss": 0.2647, + "step": 239100 + }, + { + "epoch": 1.34, + "learning_rate": 3.65589076825354e-05, + "loss": 0.2636, + "step": 239200 + }, + { + "epoch": 1.35, + "learning_rate": 3.655328645227295e-05, + "loss": 0.2604, + "step": 239300 + }, + { + "epoch": 1.35, + "learning_rate": 3.654766522201049e-05, + "loss": 0.2616, + "step": 239400 + }, + { + "epoch": 1.35, + "learning_rate": 3.654204399174804e-05, + "loss": 0.2639, + "step": 239500 + }, + { + "epoch": 1.35, + "learning_rate": 3.653642276148558e-05, + "loss": 0.2656, + "step": 239600 + }, + { + "epoch": 1.35, + "learning_rate": 3.6530801531223125e-05, + "loss": 0.265, + "step": 239700 + }, + { + "epoch": 1.35, + "learning_rate": 3.652518030096067e-05, + "loss": 0.2671, + "step": 239800 + }, + { + "epoch": 1.35, + "learning_rate": 3.651955907069822e-05, + "loss": 0.2573, + "step": 239900 + }, + { + "epoch": 1.35, + "learning_rate": 3.651393784043576e-05, + "loss": 0.2601, + "step": 240000 + }, + { + "epoch": 1.35, + "learning_rate": 3.6508316610173304e-05, + "loss": 0.255, + "step": 240100 + }, + { + "epoch": 1.35, + "learning_rate": 3.650269537991085e-05, + "loss": 0.2675, + "step": 240200 + }, + { + "epoch": 1.35, + "learning_rate": 3.649707414964839e-05, + "loss": 0.2566, + "step": 240300 + }, + { + "epoch": 1.35, + "learning_rate": 3.6491452919385936e-05, + "loss": 0.2633, + "step": 240400 + }, + { + "epoch": 1.35, + "learning_rate": 3.648583168912348e-05, + "loss": 0.2582, + "step": 240500 + }, + { + "epoch": 1.35, + "learning_rate": 3.648021045886103e-05, + "loss": 0.258, + "step": 240600 + }, + { + "epoch": 1.35, + "learning_rate": 3.6474589228598575e-05, + "loss": 0.2637, + "step": 240700 + }, + { + "epoch": 1.35, + "learning_rate": 3.646896799833612e-05, + "loss": 0.264, + "step": 240800 + }, + { + "epoch": 1.35, + "learning_rate": 3.646340298037629e-05, + "loss": 0.2593, + "step": 240900 + }, + { + "epoch": 1.35, + "learning_rate": 3.6457781750113835e-05, + "loss": 0.2617, + "step": 241000 + }, + { + "epoch": 1.36, + "learning_rate": 3.6452160519851374e-05, + "loss": 0.263, + "step": 241100 + }, + { + "epoch": 1.36, + "learning_rate": 3.644653928958892e-05, + "loss": 0.2656, + "step": 241200 + }, + { + "epoch": 1.36, + "learning_rate": 3.644091805932647e-05, + "loss": 0.2614, + "step": 241300 + }, + { + "epoch": 1.36, + "learning_rate": 3.6435296829064006e-05, + "loss": 0.2576, + "step": 241400 + }, + { + "epoch": 1.36, + "learning_rate": 3.642967559880155e-05, + "loss": 0.2641, + "step": 241500 + }, + { + "epoch": 1.36, + "learning_rate": 3.64240543685391e-05, + "loss": 0.2615, + "step": 241600 + }, + { + "epoch": 1.36, + "learning_rate": 3.6418433138276645e-05, + "loss": 0.263, + "step": 241700 + }, + { + "epoch": 1.36, + "learning_rate": 3.641281190801419e-05, + "loss": 0.2599, + "step": 241800 + }, + { + "epoch": 1.36, + "learning_rate": 3.640719067775174e-05, + "loss": 0.265, + "step": 241900 + }, + { + "epoch": 1.36, + "learning_rate": 3.6401569447489284e-05, + "loss": 0.2592, + "step": 242000 + }, + { + "epoch": 1.36, + "learning_rate": 3.6395948217226823e-05, + "loss": 0.2578, + "step": 242100 + }, + { + "epoch": 1.36, + "learning_rate": 3.639032698696437e-05, + "loss": 0.2645, + "step": 242200 + }, + { + "epoch": 1.36, + "learning_rate": 3.6384705756701916e-05, + "loss": 0.2566, + "step": 242300 + }, + { + "epoch": 1.36, + "learning_rate": 3.6379140738742084e-05, + "loss": 0.2678, + "step": 242400 + }, + { + "epoch": 1.36, + "learning_rate": 3.637351950847962e-05, + "loss": 0.2609, + "step": 242500 + }, + { + "epoch": 1.36, + "learning_rate": 3.636789827821717e-05, + "loss": 0.257, + "step": 242600 + }, + { + "epoch": 1.36, + "learning_rate": 3.6362277047954716e-05, + "loss": 0.2578, + "step": 242700 + }, + { + "epoch": 1.36, + "learning_rate": 3.635665581769226e-05, + "loss": 0.2596, + "step": 242800 + }, + { + "epoch": 1.37, + "learning_rate": 3.635103458742981e-05, + "loss": 0.266, + "step": 242900 + }, + { + "epoch": 1.37, + "learning_rate": 3.6345413357167354e-05, + "loss": 0.2628, + "step": 243000 + }, + { + "epoch": 1.37, + "learning_rate": 3.63397921269049e-05, + "loss": 0.2582, + "step": 243100 + }, + { + "epoch": 1.37, + "learning_rate": 3.633417089664244e-05, + "loss": 0.259, + "step": 243200 + }, + { + "epoch": 1.37, + "learning_rate": 3.6328549666379987e-05, + "loss": 0.2602, + "step": 243300 + }, + { + "epoch": 1.37, + "learning_rate": 3.632292843611753e-05, + "loss": 0.2667, + "step": 243400 + }, + { + "epoch": 1.37, + "learning_rate": 3.631730720585508e-05, + "loss": 0.2608, + "step": 243500 + }, + { + "epoch": 1.37, + "learning_rate": 3.631168597559262e-05, + "loss": 0.2622, + "step": 243600 + }, + { + "epoch": 1.37, + "learning_rate": 3.6306064745330165e-05, + "loss": 0.2571, + "step": 243700 + }, + { + "epoch": 1.37, + "learning_rate": 3.630044351506771e-05, + "loss": 0.2564, + "step": 243800 + }, + { + "epoch": 1.37, + "learning_rate": 3.629482228480525e-05, + "loss": 0.2531, + "step": 243900 + }, + { + "epoch": 1.37, + "learning_rate": 3.62892010545428e-05, + "loss": 0.2694, + "step": 244000 + }, + { + "epoch": 1.37, + "learning_rate": 3.628357982428034e-05, + "loss": 0.2712, + "step": 244100 + }, + { + "epoch": 1.37, + "learning_rate": 3.627795859401789e-05, + "loss": 0.2606, + "step": 244200 + }, + { + "epoch": 1.37, + "learning_rate": 3.627233736375543e-05, + "loss": 0.2639, + "step": 244300 + }, + { + "epoch": 1.37, + "learning_rate": 3.6266716133492975e-05, + "loss": 0.257, + "step": 244400 + }, + { + "epoch": 1.37, + "learning_rate": 3.626109490323052e-05, + "loss": 0.2628, + "step": 244500 + }, + { + "epoch": 1.37, + "learning_rate": 3.625547367296807e-05, + "loss": 0.2585, + "step": 244600 + }, + { + "epoch": 1.38, + "learning_rate": 3.6249852442705614e-05, + "loss": 0.2585, + "step": 244700 + }, + { + "epoch": 1.38, + "learning_rate": 3.624423121244316e-05, + "loss": 0.2577, + "step": 244800 + }, + { + "epoch": 1.38, + "learning_rate": 3.623860998218071e-05, + "loss": 0.2573, + "step": 244900 + }, + { + "epoch": 1.38, + "learning_rate": 3.6232988751918246e-05, + "loss": 0.2592, + "step": 245000 + }, + { + "epoch": 1.38, + "learning_rate": 3.622736752165579e-05, + "loss": 0.2628, + "step": 245100 + }, + { + "epoch": 1.38, + "learning_rate": 3.622174629139334e-05, + "loss": 0.2663, + "step": 245200 + }, + { + "epoch": 1.38, + "learning_rate": 3.621612506113088e-05, + "loss": 0.2728, + "step": 245300 + }, + { + "epoch": 1.38, + "learning_rate": 3.6210503830868424e-05, + "loss": 0.2714, + "step": 245400 + }, + { + "epoch": 1.38, + "learning_rate": 3.620488260060597e-05, + "loss": 0.2633, + "step": 245500 + }, + { + "epoch": 1.38, + "learning_rate": 3.619926137034351e-05, + "loss": 0.2677, + "step": 245600 + }, + { + "epoch": 1.38, + "learning_rate": 3.6193640140081057e-05, + "loss": 0.2615, + "step": 245700 + }, + { + "epoch": 1.38, + "learning_rate": 3.61880189098186e-05, + "loss": 0.2643, + "step": 245800 + }, + { + "epoch": 1.38, + "learning_rate": 3.618239767955615e-05, + "loss": 0.267, + "step": 245900 + }, + { + "epoch": 1.38, + "learning_rate": 3.6176776449293695e-05, + "loss": 0.2601, + "step": 246000 + }, + { + "epoch": 1.38, + "learning_rate": 3.617115521903124e-05, + "loss": 0.2664, + "step": 246100 + }, + { + "epoch": 1.38, + "learning_rate": 3.616553398876879e-05, + "loss": 0.265, + "step": 246200 + }, + { + "epoch": 1.38, + "learning_rate": 3.615991275850633e-05, + "loss": 0.2555, + "step": 246300 + }, + { + "epoch": 1.39, + "learning_rate": 3.6154347740546495e-05, + "loss": 0.262, + "step": 246400 + }, + { + "epoch": 1.39, + "learning_rate": 3.614872651028404e-05, + "loss": 0.2652, + "step": 246500 + }, + { + "epoch": 1.39, + "learning_rate": 3.614310528002159e-05, + "loss": 0.2585, + "step": 246600 + }, + { + "epoch": 1.39, + "learning_rate": 3.613748404975913e-05, + "loss": 0.262, + "step": 246700 + }, + { + "epoch": 1.39, + "learning_rate": 3.613186281949667e-05, + "loss": 0.2604, + "step": 246800 + }, + { + "epoch": 1.39, + "learning_rate": 3.612624158923422e-05, + "loss": 0.2559, + "step": 246900 + }, + { + "epoch": 1.39, + "learning_rate": 3.6120620358971766e-05, + "loss": 0.2584, + "step": 247000 + }, + { + "epoch": 1.39, + "learning_rate": 3.611499912870931e-05, + "loss": 0.2578, + "step": 247100 + }, + { + "epoch": 1.39, + "learning_rate": 3.610937789844686e-05, + "loss": 0.2568, + "step": 247200 + }, + { + "epoch": 1.39, + "learning_rate": 3.6103756668184405e-05, + "loss": 0.2638, + "step": 247300 + }, + { + "epoch": 1.39, + "learning_rate": 3.6098135437921944e-05, + "loss": 0.2587, + "step": 247400 + }, + { + "epoch": 1.39, + "learning_rate": 3.609251420765949e-05, + "loss": 0.258, + "step": 247500 + }, + { + "epoch": 1.39, + "learning_rate": 3.608689297739704e-05, + "loss": 0.2598, + "step": 247600 + }, + { + "epoch": 1.39, + "learning_rate": 3.608127174713458e-05, + "loss": 0.2621, + "step": 247700 + }, + { + "epoch": 1.39, + "learning_rate": 3.607565051687212e-05, + "loss": 0.2592, + "step": 247800 + }, + { + "epoch": 1.39, + "learning_rate": 3.607002928660967e-05, + "loss": 0.2585, + "step": 247900 + }, + { + "epoch": 1.39, + "learning_rate": 3.6064408056347215e-05, + "loss": 0.2647, + "step": 248000 + }, + { + "epoch": 1.39, + "learning_rate": 3.6058786826084755e-05, + "loss": 0.2593, + "step": 248100 + }, + { + "epoch": 1.4, + "learning_rate": 3.60531655958223e-05, + "loss": 0.2599, + "step": 248200 + }, + { + "epoch": 1.4, + "learning_rate": 3.604754436555985e-05, + "loss": 0.269, + "step": 248300 + }, + { + "epoch": 1.4, + "learning_rate": 3.6041923135297393e-05, + "loss": 0.259, + "step": 248400 + }, + { + "epoch": 1.4, + "learning_rate": 3.603630190503493e-05, + "loss": 0.2596, + "step": 248500 + }, + { + "epoch": 1.4, + "learning_rate": 3.603068067477248e-05, + "loss": 0.2567, + "step": 248600 + }, + { + "epoch": 1.4, + "learning_rate": 3.6025059444510026e-05, + "loss": 0.2594, + "step": 248700 + }, + { + "epoch": 1.4, + "learning_rate": 3.601943821424757e-05, + "loss": 0.2669, + "step": 248800 + }, + { + "epoch": 1.4, + "learning_rate": 3.601381698398512e-05, + "loss": 0.2732, + "step": 248900 + }, + { + "epoch": 1.4, + "learning_rate": 3.6008195753722664e-05, + "loss": 0.2666, + "step": 249000 + }, + { + "epoch": 1.4, + "learning_rate": 3.600257452346021e-05, + "loss": 0.2607, + "step": 249100 + }, + { + "epoch": 1.4, + "learning_rate": 3.599695329319775e-05, + "loss": 0.2636, + "step": 249200 + }, + { + "epoch": 1.4, + "learning_rate": 3.5991332062935296e-05, + "loss": 0.2628, + "step": 249300 + }, + { + "epoch": 1.4, + "learning_rate": 3.598571083267284e-05, + "loss": 0.2589, + "step": 249400 + }, + { + "epoch": 1.4, + "learning_rate": 3.598008960241038e-05, + "loss": 0.2633, + "step": 249500 + }, + { + "epoch": 1.4, + "learning_rate": 3.597446837214793e-05, + "loss": 0.2577, + "step": 249600 + }, + { + "epoch": 1.4, + "learning_rate": 3.5968847141885475e-05, + "loss": 0.2595, + "step": 249700 + }, + { + "epoch": 1.4, + "learning_rate": 3.596322591162302e-05, + "loss": 0.2609, + "step": 249800 + }, + { + "epoch": 1.4, + "learning_rate": 3.595760468136056e-05, + "loss": 0.2592, + "step": 249900 + }, + { + "epoch": 1.41, + "learning_rate": 3.595198345109811e-05, + "loss": 0.2585, + "step": 250000 + }, + { + "epoch": 1.41, + "learning_rate": 3.594636222083565e-05, + "loss": 0.2531, + "step": 250100 + }, + { + "epoch": 1.41, + "learning_rate": 3.59407409905732e-05, + "loss": 0.2561, + "step": 250200 + }, + { + "epoch": 1.41, + "learning_rate": 3.5935119760310746e-05, + "loss": 0.2609, + "step": 250300 + }, + { + "epoch": 1.41, + "learning_rate": 3.592949853004829e-05, + "loss": 0.2557, + "step": 250400 + }, + { + "epoch": 1.41, + "learning_rate": 3.592387729978584e-05, + "loss": 0.2606, + "step": 250500 + }, + { + "epoch": 1.41, + "learning_rate": 3.591825606952338e-05, + "loss": 0.2686, + "step": 250600 + }, + { + "epoch": 1.41, + "learning_rate": 3.5912634839260924e-05, + "loss": 0.2629, + "step": 250700 + }, + { + "epoch": 1.41, + "learning_rate": 3.590701360899847e-05, + "loss": 0.2605, + "step": 250800 + }, + { + "epoch": 1.41, + "learning_rate": 3.590139237873601e-05, + "loss": 0.2556, + "step": 250900 + }, + { + "epoch": 1.41, + "learning_rate": 3.5895771148473556e-05, + "loss": 0.2656, + "step": 251000 + }, + { + "epoch": 1.41, + "learning_rate": 3.58901499182111e-05, + "loss": 0.2661, + "step": 251100 + }, + { + "epoch": 1.41, + "learning_rate": 3.588452868794865e-05, + "loss": 0.2577, + "step": 251200 + }, + { + "epoch": 1.41, + "learning_rate": 3.587890745768619e-05, + "loss": 0.2598, + "step": 251300 + }, + { + "epoch": 1.41, + "learning_rate": 3.5873286227423734e-05, + "loss": 0.2575, + "step": 251400 + }, + { + "epoch": 1.41, + "learning_rate": 3.586766499716128e-05, + "loss": 0.2641, + "step": 251500 + }, + { + "epoch": 1.41, + "learning_rate": 3.586204376689882e-05, + "loss": 0.2632, + "step": 251600 + }, + { + "epoch": 1.41, + "learning_rate": 3.5856422536636366e-05, + "loss": 0.258, + "step": 251700 + }, + { + "epoch": 1.42, + "learning_rate": 3.585080130637391e-05, + "loss": 0.2643, + "step": 251800 + }, + { + "epoch": 1.42, + "learning_rate": 3.584518007611146e-05, + "loss": 0.2595, + "step": 251900 + }, + { + "epoch": 1.42, + "learning_rate": 3.5839558845849005e-05, + "loss": 0.259, + "step": 252000 + }, + { + "epoch": 1.42, + "learning_rate": 3.583393761558655e-05, + "loss": 0.2546, + "step": 252100 + }, + { + "epoch": 1.42, + "learning_rate": 3.58283163853241e-05, + "loss": 0.2658, + "step": 252200 + }, + { + "epoch": 1.42, + "learning_rate": 3.582269515506164e-05, + "loss": 0.2659, + "step": 252300 + }, + { + "epoch": 1.42, + "learning_rate": 3.5817130137101805e-05, + "loss": 0.2593, + "step": 252400 + }, + { + "epoch": 1.42, + "learning_rate": 3.581150890683935e-05, + "loss": 0.254, + "step": 252500 + }, + { + "epoch": 1.42, + "learning_rate": 3.58058876765769e-05, + "loss": 0.2608, + "step": 252600 + }, + { + "epoch": 1.42, + "learning_rate": 3.580026644631444e-05, + "loss": 0.263, + "step": 252700 + }, + { + "epoch": 1.42, + "learning_rate": 3.579464521605198e-05, + "loss": 0.2632, + "step": 252800 + }, + { + "epoch": 1.42, + "learning_rate": 3.578902398578953e-05, + "loss": 0.2697, + "step": 252900 + }, + { + "epoch": 1.42, + "learning_rate": 3.5783402755527076e-05, + "loss": 0.2601, + "step": 253000 + }, + { + "epoch": 1.42, + "learning_rate": 3.577778152526462e-05, + "loss": 0.2593, + "step": 253100 + }, + { + "epoch": 1.42, + "learning_rate": 3.577216029500217e-05, + "loss": 0.2612, + "step": 253200 + }, + { + "epoch": 1.42, + "learning_rate": 3.5766539064739715e-05, + "loss": 0.2568, + "step": 253300 + }, + { + "epoch": 1.42, + "learning_rate": 3.5760917834477254e-05, + "loss": 0.2555, + "step": 253400 + }, + { + "epoch": 1.42, + "learning_rate": 3.575535281651742e-05, + "loss": 0.2647, + "step": 253500 + }, + { + "epoch": 1.43, + "learning_rate": 3.574973158625497e-05, + "loss": 0.2594, + "step": 253600 + }, + { + "epoch": 1.43, + "learning_rate": 3.5744110355992514e-05, + "loss": 0.2597, + "step": 253700 + }, + { + "epoch": 1.43, + "learning_rate": 3.5738489125730054e-05, + "loss": 0.2639, + "step": 253800 + }, + { + "epoch": 1.43, + "learning_rate": 3.57328678954676e-05, + "loss": 0.2527, + "step": 253900 + }, + { + "epoch": 1.43, + "learning_rate": 3.5727246665205146e-05, + "loss": 0.2603, + "step": 254000 + }, + { + "epoch": 1.43, + "learning_rate": 3.572162543494269e-05, + "loss": 0.2658, + "step": 254100 + }, + { + "epoch": 1.43, + "learning_rate": 3.571600420468024e-05, + "loss": 0.2563, + "step": 254200 + }, + { + "epoch": 1.43, + "learning_rate": 3.5710382974417785e-05, + "loss": 0.2687, + "step": 254300 + }, + { + "epoch": 1.43, + "learning_rate": 3.570476174415533e-05, + "loss": 0.2589, + "step": 254400 + }, + { + "epoch": 1.43, + "learning_rate": 3.569914051389287e-05, + "loss": 0.2582, + "step": 254500 + }, + { + "epoch": 1.43, + "learning_rate": 3.569351928363042e-05, + "loss": 0.2595, + "step": 254600 + }, + { + "epoch": 1.43, + "learning_rate": 3.5687898053367964e-05, + "loss": 0.2596, + "step": 254700 + }, + { + "epoch": 1.43, + "learning_rate": 3.568227682310551e-05, + "loss": 0.2545, + "step": 254800 + }, + { + "epoch": 1.43, + "learning_rate": 3.567665559284305e-05, + "loss": 0.2574, + "step": 254900 + }, + { + "epoch": 1.43, + "learning_rate": 3.5671034362580596e-05, + "loss": 0.2545, + "step": 255000 + }, + { + "epoch": 1.43, + "learning_rate": 3.566541313231814e-05, + "loss": 0.2554, + "step": 255100 + }, + { + "epoch": 1.43, + "learning_rate": 3.565979190205568e-05, + "loss": 0.2544, + "step": 255200 + }, + { + "epoch": 1.44, + "learning_rate": 3.565417067179323e-05, + "loss": 0.2631, + "step": 255300 + }, + { + "epoch": 1.44, + "learning_rate": 3.5648549441530774e-05, + "loss": 0.2672, + "step": 255400 + }, + { + "epoch": 1.44, + "learning_rate": 3.564292821126832e-05, + "loss": 0.2625, + "step": 255500 + }, + { + "epoch": 1.44, + "learning_rate": 3.5637306981005866e-05, + "loss": 0.2583, + "step": 255600 + }, + { + "epoch": 1.44, + "learning_rate": 3.563168575074341e-05, + "loss": 0.2532, + "step": 255700 + }, + { + "epoch": 1.44, + "learning_rate": 3.562606452048096e-05, + "loss": 0.258, + "step": 255800 + }, + { + "epoch": 1.44, + "learning_rate": 3.56204432902185e-05, + "loss": 0.2612, + "step": 255900 + }, + { + "epoch": 1.44, + "learning_rate": 3.5614822059956045e-05, + "loss": 0.2594, + "step": 256000 + }, + { + "epoch": 1.44, + "learning_rate": 3.560920082969359e-05, + "loss": 0.2579, + "step": 256100 + }, + { + "epoch": 1.44, + "learning_rate": 3.560357959943113e-05, + "loss": 0.2599, + "step": 256200 + }, + { + "epoch": 1.44, + "learning_rate": 3.55980145814713e-05, + "loss": 0.2597, + "step": 256300 + }, + { + "epoch": 1.44, + "learning_rate": 3.5592393351208845e-05, + "loss": 0.2614, + "step": 256400 + }, + { + "epoch": 1.44, + "learning_rate": 3.558677212094639e-05, + "loss": 0.2553, + "step": 256500 + }, + { + "epoch": 1.44, + "learning_rate": 3.558115089068394e-05, + "loss": 0.2604, + "step": 256600 + }, + { + "epoch": 1.44, + "learning_rate": 3.557552966042148e-05, + "loss": 0.256, + "step": 256700 + }, + { + "epoch": 1.44, + "learning_rate": 3.556990843015903e-05, + "loss": 0.2585, + "step": 256800 + }, + { + "epoch": 1.44, + "learning_rate": 3.5564287199896576e-05, + "loss": 0.2573, + "step": 256900 + }, + { + "epoch": 1.44, + "learning_rate": 3.5558665969634115e-05, + "loss": 0.2622, + "step": 257000 + }, + { + "epoch": 1.45, + "learning_rate": 3.555304473937166e-05, + "loss": 0.2609, + "step": 257100 + }, + { + "epoch": 1.45, + "learning_rate": 3.554742350910921e-05, + "loss": 0.2576, + "step": 257200 + }, + { + "epoch": 1.45, + "learning_rate": 3.554180227884675e-05, + "loss": 0.262, + "step": 257300 + }, + { + "epoch": 1.45, + "learning_rate": 3.5536181048584294e-05, + "loss": 0.2604, + "step": 257400 + }, + { + "epoch": 1.45, + "learning_rate": 3.553055981832184e-05, + "loss": 0.2576, + "step": 257500 + }, + { + "epoch": 1.45, + "learning_rate": 3.5524938588059386e-05, + "loss": 0.2579, + "step": 257600 + }, + { + "epoch": 1.45, + "learning_rate": 3.5519317357796926e-05, + "loss": 0.2604, + "step": 257700 + }, + { + "epoch": 1.45, + "learning_rate": 3.551369612753447e-05, + "loss": 0.2518, + "step": 257800 + }, + { + "epoch": 1.45, + "learning_rate": 3.550807489727202e-05, + "loss": 0.2555, + "step": 257900 + }, + { + "epoch": 1.45, + "learning_rate": 3.550245366700956e-05, + "loss": 0.2518, + "step": 258000 + }, + { + "epoch": 1.45, + "learning_rate": 3.5496832436747104e-05, + "loss": 0.2563, + "step": 258100 + }, + { + "epoch": 1.45, + "learning_rate": 3.549121120648465e-05, + "loss": 0.2579, + "step": 258200 + }, + { + "epoch": 1.45, + "learning_rate": 3.54855899762222e-05, + "loss": 0.2564, + "step": 258300 + }, + { + "epoch": 1.45, + "learning_rate": 3.547996874595974e-05, + "loss": 0.2618, + "step": 258400 + }, + { + "epoch": 1.45, + "learning_rate": 3.547434751569729e-05, + "loss": 0.2565, + "step": 258500 + }, + { + "epoch": 1.45, + "learning_rate": 3.546878249773746e-05, + "loss": 0.2564, + "step": 258600 + }, + { + "epoch": 1.45, + "learning_rate": 3.5463161267475e-05, + "loss": 0.2595, + "step": 258700 + }, + { + "epoch": 1.45, + "learning_rate": 3.545754003721254e-05, + "loss": 0.261, + "step": 258800 + }, + { + "epoch": 1.46, + "learning_rate": 3.545191880695009e-05, + "loss": 0.2618, + "step": 258900 + }, + { + "epoch": 1.46, + "learning_rate": 3.5446297576687635e-05, + "loss": 0.2552, + "step": 259000 + }, + { + "epoch": 1.46, + "learning_rate": 3.5440676346425175e-05, + "loss": 0.2607, + "step": 259100 + }, + { + "epoch": 1.46, + "learning_rate": 3.543505511616272e-05, + "loss": 0.244, + "step": 259200 + }, + { + "epoch": 1.46, + "learning_rate": 3.542943388590027e-05, + "loss": 0.2654, + "step": 259300 + }, + { + "epoch": 1.46, + "learning_rate": 3.542381265563782e-05, + "loss": 0.2576, + "step": 259400 + }, + { + "epoch": 1.46, + "learning_rate": 3.541819142537536e-05, + "loss": 0.2522, + "step": 259500 + }, + { + "epoch": 1.46, + "learning_rate": 3.5412570195112906e-05, + "loss": 0.2582, + "step": 259600 + }, + { + "epoch": 1.46, + "learning_rate": 3.540694896485045e-05, + "loss": 0.2612, + "step": 259700 + }, + { + "epoch": 1.46, + "learning_rate": 3.540132773458799e-05, + "loss": 0.2568, + "step": 259800 + }, + { + "epoch": 1.46, + "learning_rate": 3.539570650432554e-05, + "loss": 0.2584, + "step": 259900 + }, + { + "epoch": 1.46, + "learning_rate": 3.5390085274063084e-05, + "loss": 0.2572, + "step": 260000 + }, + { + "epoch": 1.46, + "learning_rate": 3.538446404380063e-05, + "loss": 0.2563, + "step": 260100 + }, + { + "epoch": 1.46, + "learning_rate": 3.537884281353817e-05, + "loss": 0.2604, + "step": 260200 + }, + { + "epoch": 1.46, + "learning_rate": 3.5373221583275716e-05, + "loss": 0.253, + "step": 260300 + }, + { + "epoch": 1.46, + "learning_rate": 3.536760035301326e-05, + "loss": 0.2636, + "step": 260400 + }, + { + "epoch": 1.46, + "learning_rate": 3.53619791227508e-05, + "loss": 0.2547, + "step": 260500 + }, + { + "epoch": 1.46, + "learning_rate": 3.535635789248835e-05, + "loss": 0.2626, + "step": 260600 + }, + { + "epoch": 1.47, + "learning_rate": 3.5350736662225895e-05, + "loss": 0.2614, + "step": 260700 + }, + { + "epoch": 1.47, + "learning_rate": 3.534511543196344e-05, + "loss": 0.2568, + "step": 260800 + }, + { + "epoch": 1.47, + "learning_rate": 3.533949420170099e-05, + "loss": 0.2539, + "step": 260900 + }, + { + "epoch": 1.47, + "learning_rate": 3.5333872971438534e-05, + "loss": 0.2501, + "step": 261000 + }, + { + "epoch": 1.47, + "learning_rate": 3.532825174117608e-05, + "loss": 0.2577, + "step": 261100 + }, + { + "epoch": 1.47, + "learning_rate": 3.532263051091362e-05, + "loss": 0.2554, + "step": 261200 + }, + { + "epoch": 1.47, + "learning_rate": 3.5317009280651166e-05, + "loss": 0.2571, + "step": 261300 + }, + { + "epoch": 1.47, + "learning_rate": 3.531138805038871e-05, + "loss": 0.2554, + "step": 261400 + }, + { + "epoch": 1.47, + "learning_rate": 3.530576682012626e-05, + "loss": 0.2577, + "step": 261500 + }, + { + "epoch": 1.47, + "learning_rate": 3.53001455898638e-05, + "loss": 0.2576, + "step": 261600 + }, + { + "epoch": 1.47, + "learning_rate": 3.5294524359601344e-05, + "loss": 0.2491, + "step": 261700 + }, + { + "epoch": 1.47, + "learning_rate": 3.528890312933889e-05, + "loss": 0.2637, + "step": 261800 + }, + { + "epoch": 1.47, + "learning_rate": 3.528328189907643e-05, + "loss": 0.2555, + "step": 261900 + }, + { + "epoch": 1.47, + "learning_rate": 3.5277660668813976e-05, + "loss": 0.2526, + "step": 262000 + }, + { + "epoch": 1.47, + "learning_rate": 3.527203943855152e-05, + "loss": 0.2594, + "step": 262100 + }, + { + "epoch": 1.47, + "learning_rate": 3.526641820828907e-05, + "loss": 0.2555, + "step": 262200 + }, + { + "epoch": 1.47, + "learning_rate": 3.526079697802661e-05, + "loss": 0.2531, + "step": 262300 + }, + { + "epoch": 1.48, + "learning_rate": 3.5255175747764154e-05, + "loss": 0.2608, + "step": 262400 + }, + { + "epoch": 1.48, + "learning_rate": 3.52495545175017e-05, + "loss": 0.2542, + "step": 262500 + }, + { + "epoch": 1.48, + "learning_rate": 3.524393328723925e-05, + "loss": 0.2657, + "step": 262600 + }, + { + "epoch": 1.48, + "learning_rate": 3.5238368269279415e-05, + "loss": 0.2558, + "step": 262700 + }, + { + "epoch": 1.48, + "learning_rate": 3.523274703901696e-05, + "loss": 0.2648, + "step": 262800 + }, + { + "epoch": 1.48, + "learning_rate": 3.522712580875451e-05, + "loss": 0.2605, + "step": 262900 + }, + { + "epoch": 1.48, + "learning_rate": 3.522150457849205e-05, + "loss": 0.2502, + "step": 263000 + }, + { + "epoch": 1.48, + "learning_rate": 3.521588334822959e-05, + "loss": 0.2528, + "step": 263100 + }, + { + "epoch": 1.48, + "learning_rate": 3.521026211796714e-05, + "loss": 0.2573, + "step": 263200 + }, + { + "epoch": 1.48, + "learning_rate": 3.5204640887704685e-05, + "loss": 0.2513, + "step": 263300 + }, + { + "epoch": 1.48, + "learning_rate": 3.5199019657442225e-05, + "loss": 0.2562, + "step": 263400 + }, + { + "epoch": 1.48, + "learning_rate": 3.519339842717977e-05, + "loss": 0.2506, + "step": 263500 + }, + { + "epoch": 1.48, + "learning_rate": 3.518777719691732e-05, + "loss": 0.2542, + "step": 263600 + }, + { + "epoch": 1.48, + "learning_rate": 3.5182155966654864e-05, + "loss": 0.2515, + "step": 263700 + }, + { + "epoch": 1.48, + "learning_rate": 3.517653473639241e-05, + "loss": 0.2627, + "step": 263800 + }, + { + "epoch": 1.48, + "learning_rate": 3.5170913506129956e-05, + "loss": 0.2548, + "step": 263900 + }, + { + "epoch": 1.48, + "learning_rate": 3.51652922758675e-05, + "loss": 0.2553, + "step": 264000 + }, + { + "epoch": 1.48, + "learning_rate": 3.515967104560504e-05, + "loss": 0.2591, + "step": 264100 + }, + { + "epoch": 1.49, + "learning_rate": 3.515404981534259e-05, + "loss": 0.2563, + "step": 264200 + }, + { + "epoch": 1.49, + "learning_rate": 3.5148428585080135e-05, + "loss": 0.2575, + "step": 264300 + }, + { + "epoch": 1.49, + "learning_rate": 3.5142807354817674e-05, + "loss": 0.2534, + "step": 264400 + }, + { + "epoch": 1.49, + "learning_rate": 3.513718612455522e-05, + "loss": 0.2613, + "step": 264500 + }, + { + "epoch": 1.49, + "learning_rate": 3.513156489429277e-05, + "loss": 0.2509, + "step": 264600 + }, + { + "epoch": 1.49, + "learning_rate": 3.512599987633294e-05, + "loss": 0.2547, + "step": 264700 + }, + { + "epoch": 1.49, + "learning_rate": 3.512037864607048e-05, + "loss": 0.2613, + "step": 264800 + }, + { + "epoch": 1.49, + "learning_rate": 3.511475741580803e-05, + "loss": 0.2616, + "step": 264900 + }, + { + "epoch": 1.49, + "learning_rate": 3.510913618554557e-05, + "loss": 0.2528, + "step": 265000 + }, + { + "epoch": 1.49, + "learning_rate": 3.510351495528311e-05, + "loss": 0.2598, + "step": 265100 + }, + { + "epoch": 1.49, + "learning_rate": 3.509789372502066e-05, + "loss": 0.2557, + "step": 265200 + }, + { + "epoch": 1.49, + "learning_rate": 3.5092272494758205e-05, + "loss": 0.2582, + "step": 265300 + }, + { + "epoch": 1.49, + "learning_rate": 3.508665126449575e-05, + "loss": 0.2499, + "step": 265400 + }, + { + "epoch": 1.49, + "learning_rate": 3.508103003423329e-05, + "loss": 0.2501, + "step": 265500 + }, + { + "epoch": 1.49, + "learning_rate": 3.507540880397084e-05, + "loss": 0.2542, + "step": 265600 + }, + { + "epoch": 1.49, + "learning_rate": 3.5069787573708384e-05, + "loss": 0.2572, + "step": 265700 + }, + { + "epoch": 1.49, + "learning_rate": 3.506416634344592e-05, + "loss": 0.2547, + "step": 265800 + }, + { + "epoch": 1.49, + "learning_rate": 3.505854511318347e-05, + "loss": 0.2533, + "step": 265900 + }, + { + "epoch": 1.5, + "learning_rate": 3.5052923882921016e-05, + "loss": 0.2534, + "step": 266000 + }, + { + "epoch": 1.5, + "learning_rate": 3.504730265265856e-05, + "loss": 0.2545, + "step": 266100 + }, + { + "epoch": 1.5, + "learning_rate": 3.504168142239611e-05, + "loss": 0.2531, + "step": 266200 + }, + { + "epoch": 1.5, + "learning_rate": 3.5036060192133654e-05, + "loss": 0.2562, + "step": 266300 + }, + { + "epoch": 1.5, + "learning_rate": 3.50304389618712e-05, + "loss": 0.2537, + "step": 266400 + }, + { + "epoch": 1.5, + "learning_rate": 3.502481773160874e-05, + "loss": 0.2553, + "step": 266500 + }, + { + "epoch": 1.5, + "learning_rate": 3.5019196501346287e-05, + "loss": 0.2505, + "step": 266600 + }, + { + "epoch": 1.5, + "learning_rate": 3.501357527108383e-05, + "loss": 0.2583, + "step": 266700 + }, + { + "epoch": 1.5, + "learning_rate": 3.500795404082138e-05, + "loss": 0.2475, + "step": 266800 + }, + { + "epoch": 1.5, + "learning_rate": 3.500238902286154e-05, + "loss": 0.2632, + "step": 266900 + }, + { + "epoch": 1.5, + "learning_rate": 3.4996767792599086e-05, + "loss": 0.259, + "step": 267000 + }, + { + "epoch": 1.5, + "learning_rate": 3.499114656233663e-05, + "loss": 0.2537, + "step": 267100 + }, + { + "epoch": 1.5, + "learning_rate": 3.498552533207418e-05, + "loss": 0.2529, + "step": 267200 + }, + { + "epoch": 1.5, + "learning_rate": 3.4979904101811725e-05, + "loss": 0.2503, + "step": 267300 + }, + { + "epoch": 1.5, + "learning_rate": 3.497428287154927e-05, + "loss": 0.2625, + "step": 267400 + }, + { + "epoch": 1.5, + "learning_rate": 3.496871785358944e-05, + "loss": 0.25, + "step": 267500 + }, + { + "epoch": 1.5, + "learning_rate": 3.4963096623326985e-05, + "loss": 0.2485, + "step": 267600 + }, + { + "epoch": 1.5, + "learning_rate": 3.4957475393064525e-05, + "loss": 0.2481, + "step": 267700 + }, + { + "epoch": 1.51, + "learning_rate": 3.495185416280207e-05, + "loss": 0.2568, + "step": 267800 + }, + { + "epoch": 1.51, + "learning_rate": 3.494623293253962e-05, + "loss": 0.2549, + "step": 267900 + }, + { + "epoch": 1.51, + "learning_rate": 3.494061170227716e-05, + "loss": 0.2516, + "step": 268000 + }, + { + "epoch": 1.51, + "learning_rate": 3.49349904720147e-05, + "loss": 0.2646, + "step": 268100 + }, + { + "epoch": 1.51, + "learning_rate": 3.492936924175225e-05, + "loss": 0.2534, + "step": 268200 + }, + { + "epoch": 1.51, + "learning_rate": 3.4923748011489796e-05, + "loss": 0.2581, + "step": 268300 + }, + { + "epoch": 1.51, + "learning_rate": 3.491812678122734e-05, + "loss": 0.2553, + "step": 268400 + }, + { + "epoch": 1.51, + "learning_rate": 3.491250555096489e-05, + "loss": 0.2585, + "step": 268500 + }, + { + "epoch": 1.51, + "learning_rate": 3.4906884320702434e-05, + "loss": 0.2578, + "step": 268600 + }, + { + "epoch": 1.51, + "learning_rate": 3.4901263090439974e-05, + "loss": 0.2521, + "step": 268700 + }, + { + "epoch": 1.51, + "learning_rate": 3.489564186017752e-05, + "loss": 0.253, + "step": 268800 + }, + { + "epoch": 1.51, + "learning_rate": 3.4890020629915067e-05, + "loss": 0.2537, + "step": 268900 + }, + { + "epoch": 1.51, + "learning_rate": 3.488439939965261e-05, + "loss": 0.2533, + "step": 269000 + }, + { + "epoch": 1.51, + "learning_rate": 3.487877816939015e-05, + "loss": 0.2554, + "step": 269100 + }, + { + "epoch": 1.51, + "learning_rate": 3.48731569391277e-05, + "loss": 0.2612, + "step": 269200 + }, + { + "epoch": 1.51, + "learning_rate": 3.4867535708865245e-05, + "loss": 0.2588, + "step": 269300 + }, + { + "epoch": 1.51, + "learning_rate": 3.4861914478602784e-05, + "loss": 0.2542, + "step": 269400 + }, + { + "epoch": 1.51, + "learning_rate": 3.485629324834033e-05, + "loss": 0.2548, + "step": 269500 + }, + { + "epoch": 1.52, + "learning_rate": 3.485067201807788e-05, + "loss": 0.26, + "step": 269600 + }, + { + "epoch": 1.52, + "learning_rate": 3.484505078781542e-05, + "loss": 0.2551, + "step": 269700 + }, + { + "epoch": 1.52, + "learning_rate": 3.483942955755297e-05, + "loss": 0.2541, + "step": 269800 + }, + { + "epoch": 1.52, + "learning_rate": 3.483386453959314e-05, + "loss": 0.2581, + "step": 269900 + }, + { + "epoch": 1.52, + "learning_rate": 3.482824330933068e-05, + "loss": 0.2539, + "step": 270000 + }, + { + "epoch": 1.52, + "learning_rate": 3.482262207906823e-05, + "loss": 0.2528, + "step": 270100 + }, + { + "epoch": 1.52, + "learning_rate": 3.481700084880577e-05, + "loss": 0.2546, + "step": 270200 + }, + { + "epoch": 1.52, + "learning_rate": 3.4811379618543315e-05, + "loss": 0.252, + "step": 270300 + }, + { + "epoch": 1.52, + "learning_rate": 3.480575838828086e-05, + "loss": 0.2559, + "step": 270400 + }, + { + "epoch": 1.52, + "learning_rate": 3.48001371580184e-05, + "loss": 0.2595, + "step": 270500 + }, + { + "epoch": 1.52, + "learning_rate": 3.479451592775595e-05, + "loss": 0.2555, + "step": 270600 + }, + { + "epoch": 1.52, + "learning_rate": 3.4788894697493494e-05, + "loss": 0.2518, + "step": 270700 + }, + { + "epoch": 1.52, + "learning_rate": 3.478327346723104e-05, + "loss": 0.258, + "step": 270800 + }, + { + "epoch": 1.52, + "learning_rate": 3.4777652236968586e-05, + "loss": 0.2481, + "step": 270900 + }, + { + "epoch": 1.52, + "learning_rate": 3.477203100670613e-05, + "loss": 0.2579, + "step": 271000 + }, + { + "epoch": 1.52, + "learning_rate": 3.476640977644368e-05, + "loss": 0.2572, + "step": 271100 + }, + { + "epoch": 1.52, + "learning_rate": 3.476078854618122e-05, + "loss": 0.2547, + "step": 271200 + }, + { + "epoch": 1.53, + "learning_rate": 3.4755167315918765e-05, + "loss": 0.2521, + "step": 271300 + }, + { + "epoch": 1.53, + "learning_rate": 3.474954608565631e-05, + "loss": 0.2507, + "step": 271400 + }, + { + "epoch": 1.53, + "learning_rate": 3.474392485539386e-05, + "loss": 0.2595, + "step": 271500 + }, + { + "epoch": 1.53, + "learning_rate": 3.47383036251314e-05, + "loss": 0.2581, + "step": 271600 + }, + { + "epoch": 1.53, + "learning_rate": 3.473268239486894e-05, + "loss": 0.2592, + "step": 271700 + }, + { + "epoch": 1.53, + "learning_rate": 3.472706116460649e-05, + "loss": 0.2488, + "step": 271800 + }, + { + "epoch": 1.53, + "learning_rate": 3.472143993434403e-05, + "loss": 0.2593, + "step": 271900 + }, + { + "epoch": 1.53, + "learning_rate": 3.4715818704081575e-05, + "loss": 0.2521, + "step": 272000 + }, + { + "epoch": 1.53, + "learning_rate": 3.471019747381912e-05, + "loss": 0.251, + "step": 272100 + }, + { + "epoch": 1.53, + "learning_rate": 3.470457624355667e-05, + "loss": 0.2547, + "step": 272200 + }, + { + "epoch": 1.53, + "learning_rate": 3.469895501329421e-05, + "loss": 0.256, + "step": 272300 + }, + { + "epoch": 1.53, + "learning_rate": 3.469338999533438e-05, + "loss": 0.2573, + "step": 272400 + }, + { + "epoch": 1.53, + "learning_rate": 3.468776876507193e-05, + "loss": 0.2537, + "step": 272500 + }, + { + "epoch": 1.53, + "learning_rate": 3.4682147534809474e-05, + "loss": 0.2543, + "step": 272600 + }, + { + "epoch": 1.53, + "learning_rate": 3.4676526304547014e-05, + "loss": 0.2564, + "step": 272700 + }, + { + "epoch": 1.53, + "learning_rate": 3.467090507428456e-05, + "loss": 0.2505, + "step": 272800 + }, + { + "epoch": 1.53, + "learning_rate": 3.4665283844022106e-05, + "loss": 0.2566, + "step": 272900 + }, + { + "epoch": 1.53, + "learning_rate": 3.4659662613759646e-05, + "loss": 0.2573, + "step": 273000 + }, + { + "epoch": 1.54, + "learning_rate": 3.465404138349719e-05, + "loss": 0.2546, + "step": 273100 + }, + { + "epoch": 1.54, + "learning_rate": 3.464842015323474e-05, + "loss": 0.2566, + "step": 273200 + }, + { + "epoch": 1.54, + "learning_rate": 3.4642798922972284e-05, + "loss": 0.2564, + "step": 273300 + }, + { + "epoch": 1.54, + "learning_rate": 3.4637177692709824e-05, + "loss": 0.2545, + "step": 273400 + }, + { + "epoch": 1.54, + "learning_rate": 3.463155646244737e-05, + "loss": 0.2593, + "step": 273500 + }, + { + "epoch": 1.54, + "learning_rate": 3.4625935232184916e-05, + "loss": 0.2529, + "step": 273600 + }, + { + "epoch": 1.54, + "learning_rate": 3.462031400192246e-05, + "loss": 0.2524, + "step": 273700 + }, + { + "epoch": 1.54, + "learning_rate": 3.461469277166001e-05, + "loss": 0.2533, + "step": 273800 + }, + { + "epoch": 1.54, + "learning_rate": 3.4609071541397555e-05, + "loss": 0.2545, + "step": 273900 + }, + { + "epoch": 1.54, + "learning_rate": 3.46034503111351e-05, + "loss": 0.2456, + "step": 274000 + }, + { + "epoch": 1.54, + "learning_rate": 3.459782908087264e-05, + "loss": 0.2489, + "step": 274100 + }, + { + "epoch": 1.54, + "learning_rate": 3.459220785061019e-05, + "loss": 0.2513, + "step": 274200 + }, + { + "epoch": 1.54, + "learning_rate": 3.4586586620347734e-05, + "loss": 0.2594, + "step": 274300 + }, + { + "epoch": 1.54, + "learning_rate": 3.458096539008527e-05, + "loss": 0.2549, + "step": 274400 + }, + { + "epoch": 1.54, + "learning_rate": 3.457534415982282e-05, + "loss": 0.246, + "step": 274500 + }, + { + "epoch": 1.54, + "learning_rate": 3.4569722929560366e-05, + "loss": 0.2474, + "step": 274600 + }, + { + "epoch": 1.54, + "learning_rate": 3.456410169929791e-05, + "loss": 0.2515, + "step": 274700 + }, + { + "epoch": 1.54, + "learning_rate": 3.455848046903545e-05, + "loss": 0.2555, + "step": 274800 + }, + { + "epoch": 1.55, + "learning_rate": 3.4552859238773e-05, + "loss": 0.2469, + "step": 274900 + }, + { + "epoch": 1.55, + "learning_rate": 3.454729422081317e-05, + "loss": 0.2455, + "step": 275000 + }, + { + "epoch": 1.55, + "learning_rate": 3.454167299055071e-05, + "loss": 0.251, + "step": 275100 + }, + { + "epoch": 1.55, + "learning_rate": 3.453605176028826e-05, + "loss": 0.2529, + "step": 275200 + }, + { + "epoch": 1.55, + "learning_rate": 3.4530430530025804e-05, + "loss": 0.2507, + "step": 275300 + }, + { + "epoch": 1.55, + "learning_rate": 3.452480929976335e-05, + "loss": 0.2517, + "step": 275400 + }, + { + "epoch": 1.55, + "learning_rate": 3.451918806950089e-05, + "loss": 0.2537, + "step": 275500 + }, + { + "epoch": 1.55, + "learning_rate": 3.4513566839238436e-05, + "loss": 0.2551, + "step": 275600 + }, + { + "epoch": 1.55, + "learning_rate": 3.450794560897598e-05, + "loss": 0.2583, + "step": 275700 + }, + { + "epoch": 1.55, + "learning_rate": 3.450232437871352e-05, + "loss": 0.2419, + "step": 275800 + }, + { + "epoch": 1.55, + "learning_rate": 3.449670314845107e-05, + "loss": 0.2598, + "step": 275900 + }, + { + "epoch": 1.55, + "learning_rate": 3.4491081918188615e-05, + "loss": 0.2509, + "step": 276000 + }, + { + "epoch": 1.55, + "learning_rate": 3.448546068792616e-05, + "loss": 0.2509, + "step": 276100 + }, + { + "epoch": 1.55, + "learning_rate": 3.447983945766371e-05, + "loss": 0.2493, + "step": 276200 + }, + { + "epoch": 1.55, + "learning_rate": 3.4474218227401253e-05, + "loss": 0.2522, + "step": 276300 + }, + { + "epoch": 1.55, + "learning_rate": 3.44685969971388e-05, + "loss": 0.2544, + "step": 276400 + }, + { + "epoch": 1.55, + "learning_rate": 3.446297576687634e-05, + "loss": 0.2543, + "step": 276500 + }, + { + "epoch": 1.55, + "learning_rate": 3.4457354536613885e-05, + "loss": 0.2535, + "step": 276600 + }, + { + "epoch": 1.56, + "learning_rate": 3.445173330635143e-05, + "loss": 0.2554, + "step": 276700 + }, + { + "epoch": 1.56, + "learning_rate": 3.444611207608898e-05, + "loss": 0.2542, + "step": 276800 + }, + { + "epoch": 1.56, + "learning_rate": 3.444049084582652e-05, + "loss": 0.2469, + "step": 276900 + }, + { + "epoch": 1.56, + "learning_rate": 3.4434869615564064e-05, + "loss": 0.2468, + "step": 277000 + }, + { + "epoch": 1.56, + "learning_rate": 3.442924838530161e-05, + "loss": 0.2531, + "step": 277100 + }, + { + "epoch": 1.56, + "learning_rate": 3.442362715503915e-05, + "loss": 0.2531, + "step": 277200 + }, + { + "epoch": 1.56, + "learning_rate": 3.4418005924776696e-05, + "loss": 0.2528, + "step": 277300 + }, + { + "epoch": 1.56, + "learning_rate": 3.441238469451424e-05, + "loss": 0.2501, + "step": 277400 + }, + { + "epoch": 1.56, + "learning_rate": 3.440676346425179e-05, + "loss": 0.2585, + "step": 277500 + }, + { + "epoch": 1.56, + "learning_rate": 3.440114223398933e-05, + "loss": 0.2611, + "step": 277600 + }, + { + "epoch": 1.56, + "learning_rate": 3.4395521003726874e-05, + "loss": 0.2515, + "step": 277700 + }, + { + "epoch": 1.56, + "learning_rate": 3.438989977346442e-05, + "loss": 0.2523, + "step": 277800 + }, + { + "epoch": 1.56, + "learning_rate": 3.438427854320197e-05, + "loss": 0.2522, + "step": 277900 + }, + { + "epoch": 1.56, + "learning_rate": 3.437865731293951e-05, + "loss": 0.261, + "step": 278000 + }, + { + "epoch": 1.56, + "learning_rate": 3.437303608267706e-05, + "loss": 0.2558, + "step": 278100 + }, + { + "epoch": 1.56, + "learning_rate": 3.4367414852414606e-05, + "loss": 0.2533, + "step": 278200 + }, + { + "epoch": 1.56, + "learning_rate": 3.4361793622152145e-05, + "loss": 0.252, + "step": 278300 + }, + { + "epoch": 1.56, + "learning_rate": 3.435617239188969e-05, + "loss": 0.2537, + "step": 278400 + }, + { + "epoch": 1.57, + "learning_rate": 3.435055116162724e-05, + "loss": 0.2431, + "step": 278500 + }, + { + "epoch": 1.57, + "learning_rate": 3.434492993136478e-05, + "loss": 0.2536, + "step": 278600 + }, + { + "epoch": 1.57, + "learning_rate": 3.4339308701102323e-05, + "loss": 0.2533, + "step": 278700 + }, + { + "epoch": 1.57, + "learning_rate": 3.433368747083987e-05, + "loss": 0.2483, + "step": 278800 + }, + { + "epoch": 1.57, + "learning_rate": 3.4328066240577416e-05, + "loss": 0.246, + "step": 278900 + }, + { + "epoch": 1.57, + "learning_rate": 3.4322445010314955e-05, + "loss": 0.251, + "step": 279000 + }, + { + "epoch": 1.57, + "learning_rate": 3.431687999235513e-05, + "loss": 0.2589, + "step": 279100 + }, + { + "epoch": 1.57, + "learning_rate": 3.4311258762092676e-05, + "loss": 0.2535, + "step": 279200 + }, + { + "epoch": 1.57, + "learning_rate": 3.430563753183022e-05, + "loss": 0.2511, + "step": 279300 + }, + { + "epoch": 1.57, + "learning_rate": 3.430001630156776e-05, + "loss": 0.2524, + "step": 279400 + }, + { + "epoch": 1.57, + "learning_rate": 3.429445128360793e-05, + "loss": 0.2574, + "step": 279500 + }, + { + "epoch": 1.57, + "learning_rate": 3.4288830053345476e-05, + "loss": 0.2495, + "step": 279600 + }, + { + "epoch": 1.57, + "learning_rate": 3.428320882308302e-05, + "loss": 0.2504, + "step": 279700 + }, + { + "epoch": 1.57, + "learning_rate": 3.427758759282056e-05, + "loss": 0.2474, + "step": 279800 + }, + { + "epoch": 1.57, + "learning_rate": 3.427196636255811e-05, + "loss": 0.2488, + "step": 279900 + }, + { + "epoch": 1.57, + "learning_rate": 3.4266345132295654e-05, + "loss": 0.2512, + "step": 280000 + }, + { + "epoch": 1.57, + "learning_rate": 3.42607239020332e-05, + "loss": 0.247, + "step": 280100 + }, + { + "epoch": 1.58, + "learning_rate": 3.425510267177075e-05, + "loss": 0.2563, + "step": 280200 + }, + { + "epoch": 1.58, + "learning_rate": 3.424948144150829e-05, + "loss": 0.2486, + "step": 280300 + }, + { + "epoch": 1.58, + "learning_rate": 3.424386021124584e-05, + "loss": 0.2551, + "step": 280400 + }, + { + "epoch": 1.58, + "learning_rate": 3.423823898098338e-05, + "loss": 0.2454, + "step": 280500 + }, + { + "epoch": 1.58, + "learning_rate": 3.4232617750720925e-05, + "loss": 0.2527, + "step": 280600 + }, + { + "epoch": 1.58, + "learning_rate": 3.422699652045847e-05, + "loss": 0.2517, + "step": 280700 + }, + { + "epoch": 1.58, + "learning_rate": 3.422137529019601e-05, + "loss": 0.2512, + "step": 280800 + }, + { + "epoch": 1.58, + "learning_rate": 3.421575405993356e-05, + "loss": 0.2536, + "step": 280900 + }, + { + "epoch": 1.58, + "learning_rate": 3.4210132829671103e-05, + "loss": 0.2534, + "step": 281000 + }, + { + "epoch": 1.58, + "learning_rate": 3.420451159940865e-05, + "loss": 0.245, + "step": 281100 + }, + { + "epoch": 1.58, + "learning_rate": 3.419889036914619e-05, + "loss": 0.2566, + "step": 281200 + }, + { + "epoch": 1.58, + "learning_rate": 3.4193269138883735e-05, + "loss": 0.2541, + "step": 281300 + }, + { + "epoch": 1.58, + "learning_rate": 3.418764790862128e-05, + "loss": 0.2521, + "step": 281400 + }, + { + "epoch": 1.58, + "learning_rate": 3.418202667835883e-05, + "loss": 0.2551, + "step": 281500 + }, + { + "epoch": 1.58, + "learning_rate": 3.4176405448096374e-05, + "loss": 0.2566, + "step": 281600 + }, + { + "epoch": 1.58, + "learning_rate": 3.417084043013654e-05, + "loss": 0.2462, + "step": 281700 + }, + { + "epoch": 1.58, + "learning_rate": 3.416521919987409e-05, + "loss": 0.2473, + "step": 281800 + }, + { + "epoch": 1.58, + "learning_rate": 3.415959796961163e-05, + "loss": 0.2504, + "step": 281900 + }, + { + "epoch": 1.59, + "learning_rate": 3.4153976739349174e-05, + "loss": 0.2494, + "step": 282000 + }, + { + "epoch": 1.59, + "learning_rate": 3.414835550908672e-05, + "loss": 0.2584, + "step": 282100 + }, + { + "epoch": 1.59, + "learning_rate": 3.4142734278824267e-05, + "loss": 0.2493, + "step": 282200 + }, + { + "epoch": 1.59, + "learning_rate": 3.4137113048561806e-05, + "loss": 0.2424, + "step": 282300 + }, + { + "epoch": 1.59, + "learning_rate": 3.413149181829935e-05, + "loss": 0.2505, + "step": 282400 + }, + { + "epoch": 1.59, + "learning_rate": 3.41258705880369e-05, + "loss": 0.25, + "step": 282500 + }, + { + "epoch": 1.59, + "learning_rate": 3.4120249357774445e-05, + "loss": 0.2638, + "step": 282600 + }, + { + "epoch": 1.59, + "learning_rate": 3.411462812751199e-05, + "loss": 0.2529, + "step": 282700 + }, + { + "epoch": 1.59, + "learning_rate": 3.410900689724954e-05, + "loss": 0.2542, + "step": 282800 + }, + { + "epoch": 1.59, + "learning_rate": 3.4103385666987084e-05, + "loss": 0.2529, + "step": 282900 + }, + { + "epoch": 1.59, + "learning_rate": 3.409776443672462e-05, + "loss": 0.2485, + "step": 283000 + }, + { + "epoch": 1.59, + "learning_rate": 3.409214320646217e-05, + "loss": 0.2538, + "step": 283100 + }, + { + "epoch": 1.59, + "learning_rate": 3.4086521976199716e-05, + "loss": 0.2542, + "step": 283200 + }, + { + "epoch": 1.59, + "learning_rate": 3.4080900745937255e-05, + "loss": 0.2448, + "step": 283300 + }, + { + "epoch": 1.59, + "learning_rate": 3.40752795156748e-05, + "loss": 0.2436, + "step": 283400 + }, + { + "epoch": 1.59, + "learning_rate": 3.406965828541235e-05, + "loss": 0.251, + "step": 283500 + }, + { + "epoch": 1.59, + "learning_rate": 3.4064037055149894e-05, + "loss": 0.2503, + "step": 283600 + }, + { + "epoch": 1.59, + "learning_rate": 3.4058415824887434e-05, + "loss": 0.2501, + "step": 283700 + }, + { + "epoch": 1.6, + "learning_rate": 3.405279459462498e-05, + "loss": 0.2515, + "step": 283800 + }, + { + "epoch": 1.6, + "learning_rate": 3.4047173364362526e-05, + "loss": 0.2487, + "step": 283900 + }, + { + "epoch": 1.6, + "learning_rate": 3.4041552134100066e-05, + "loss": 0.2582, + "step": 284000 + }, + { + "epoch": 1.6, + "learning_rate": 3.403593090383761e-05, + "loss": 0.2542, + "step": 284100 + }, + { + "epoch": 1.6, + "learning_rate": 3.403030967357516e-05, + "loss": 0.2492, + "step": 284200 + }, + { + "epoch": 1.6, + "learning_rate": 3.4024688443312704e-05, + "loss": 0.2503, + "step": 284300 + }, + { + "epoch": 1.6, + "learning_rate": 3.401906721305025e-05, + "loss": 0.2515, + "step": 284400 + }, + { + "epoch": 1.6, + "learning_rate": 3.40134459827878e-05, + "loss": 0.2492, + "step": 284500 + }, + { + "epoch": 1.6, + "learning_rate": 3.400782475252534e-05, + "loss": 0.2438, + "step": 284600 + }, + { + "epoch": 1.6, + "learning_rate": 3.400220352226288e-05, + "loss": 0.2495, + "step": 284700 + }, + { + "epoch": 1.6, + "learning_rate": 3.399658229200043e-05, + "loss": 0.2471, + "step": 284800 + }, + { + "epoch": 1.6, + "learning_rate": 3.3990961061737975e-05, + "loss": 0.2485, + "step": 284900 + }, + { + "epoch": 1.6, + "learning_rate": 3.3985339831475515e-05, + "loss": 0.2418, + "step": 285000 + }, + { + "epoch": 1.6, + "learning_rate": 3.397971860121306e-05, + "loss": 0.2456, + "step": 285100 + }, + { + "epoch": 1.6, + "learning_rate": 3.397409737095061e-05, + "loss": 0.2537, + "step": 285200 + }, + { + "epoch": 1.6, + "learning_rate": 3.3968476140688154e-05, + "loss": 0.2545, + "step": 285300 + }, + { + "epoch": 1.6, + "learning_rate": 3.396291112272832e-05, + "loss": 0.2486, + "step": 285400 + }, + { + "epoch": 1.6, + "learning_rate": 3.395728989246587e-05, + "loss": 0.2586, + "step": 285500 + }, + { + "epoch": 1.61, + "learning_rate": 3.3951668662203414e-05, + "loss": 0.2582, + "step": 285600 + }, + { + "epoch": 1.61, + "learning_rate": 3.394604743194096e-05, + "loss": 0.2526, + "step": 285700 + }, + { + "epoch": 1.61, + "learning_rate": 3.39404262016785e-05, + "loss": 0.2537, + "step": 285800 + }, + { + "epoch": 1.61, + "learning_rate": 3.3934804971416046e-05, + "loss": 0.255, + "step": 285900 + }, + { + "epoch": 1.61, + "learning_rate": 3.392918374115359e-05, + "loss": 0.254, + "step": 286000 + }, + { + "epoch": 1.61, + "learning_rate": 3.392356251089113e-05, + "loss": 0.249, + "step": 286100 + }, + { + "epoch": 1.61, + "learning_rate": 3.391794128062868e-05, + "loss": 0.2498, + "step": 286200 + }, + { + "epoch": 1.61, + "learning_rate": 3.3912320050366224e-05, + "loss": 0.254, + "step": 286300 + }, + { + "epoch": 1.61, + "learning_rate": 3.390669882010377e-05, + "loss": 0.2542, + "step": 286400 + }, + { + "epoch": 1.61, + "learning_rate": 3.390107758984131e-05, + "loss": 0.257, + "step": 286500 + }, + { + "epoch": 1.61, + "learning_rate": 3.3895456359578856e-05, + "loss": 0.2501, + "step": 286600 + }, + { + "epoch": 1.61, + "learning_rate": 3.38898351293164e-05, + "loss": 0.2447, + "step": 286700 + }, + { + "epoch": 1.61, + "learning_rate": 3.388421389905395e-05, + "loss": 0.254, + "step": 286800 + }, + { + "epoch": 1.61, + "learning_rate": 3.3878592668791495e-05, + "loss": 0.2475, + "step": 286900 + }, + { + "epoch": 1.61, + "learning_rate": 3.387297143852904e-05, + "loss": 0.2498, + "step": 287000 + }, + { + "epoch": 1.61, + "learning_rate": 3.386735020826659e-05, + "loss": 0.2546, + "step": 287100 + }, + { + "epoch": 1.61, + "learning_rate": 3.386172897800413e-05, + "loss": 0.2472, + "step": 287200 + }, + { + "epoch": 1.61, + "learning_rate": 3.3856107747741673e-05, + "loss": 0.2479, + "step": 287300 + }, + { + "epoch": 1.62, + "learning_rate": 3.385048651747922e-05, + "loss": 0.2477, + "step": 287400 + }, + { + "epoch": 1.62, + "learning_rate": 3.384486528721676e-05, + "loss": 0.2627, + "step": 287500 + }, + { + "epoch": 1.62, + "learning_rate": 3.3839244056954306e-05, + "loss": 0.2557, + "step": 287600 + }, + { + "epoch": 1.62, + "learning_rate": 3.383362282669185e-05, + "loss": 0.2448, + "step": 287700 + }, + { + "epoch": 1.62, + "learning_rate": 3.38280015964294e-05, + "loss": 0.2518, + "step": 287800 + }, + { + "epoch": 1.62, + "learning_rate": 3.382238036616694e-05, + "loss": 0.2528, + "step": 287900 + }, + { + "epoch": 1.62, + "learning_rate": 3.3816759135904484e-05, + "loss": 0.2455, + "step": 288000 + }, + { + "epoch": 1.62, + "learning_rate": 3.381113790564203e-05, + "loss": 0.2587, + "step": 288100 + }, + { + "epoch": 1.62, + "learning_rate": 3.380551667537957e-05, + "loss": 0.2499, + "step": 288200 + }, + { + "epoch": 1.62, + "learning_rate": 3.3799895445117116e-05, + "loss": 0.2543, + "step": 288300 + }, + { + "epoch": 1.62, + "learning_rate": 3.379427421485466e-05, + "loss": 0.2518, + "step": 288400 + }, + { + "epoch": 1.62, + "learning_rate": 3.378865298459221e-05, + "loss": 0.2505, + "step": 288500 + }, + { + "epoch": 1.62, + "learning_rate": 3.3783031754329755e-05, + "loss": 0.243, + "step": 288600 + }, + { + "epoch": 1.62, + "learning_rate": 3.37774105240673e-05, + "loss": 0.2497, + "step": 288700 + }, + { + "epoch": 1.62, + "learning_rate": 3.377178929380485e-05, + "loss": 0.2584, + "step": 288800 + }, + { + "epoch": 1.62, + "learning_rate": 3.376616806354239e-05, + "loss": 0.2473, + "step": 288900 + }, + { + "epoch": 1.62, + "learning_rate": 3.376054683327993e-05, + "loss": 0.2592, + "step": 289000 + }, + { + "epoch": 1.63, + "learning_rate": 3.37549818153201e-05, + "loss": 0.2522, + "step": 289100 + }, + { + "epoch": 1.63, + "learning_rate": 3.374936058505765e-05, + "loss": 0.251, + "step": 289200 + }, + { + "epoch": 1.63, + "learning_rate": 3.3743739354795186e-05, + "loss": 0.2462, + "step": 289300 + }, + { + "epoch": 1.63, + "learning_rate": 3.373811812453273e-05, + "loss": 0.2471, + "step": 289400 + }, + { + "epoch": 1.63, + "learning_rate": 3.373249689427028e-05, + "loss": 0.2498, + "step": 289500 + }, + { + "epoch": 1.63, + "learning_rate": 3.372687566400783e-05, + "loss": 0.2522, + "step": 289600 + }, + { + "epoch": 1.63, + "learning_rate": 3.372125443374537e-05, + "loss": 0.2574, + "step": 289700 + }, + { + "epoch": 1.63, + "learning_rate": 3.371563320348292e-05, + "loss": 0.2515, + "step": 289800 + }, + { + "epoch": 1.63, + "learning_rate": 3.3710011973220464e-05, + "loss": 0.2494, + "step": 289900 + }, + { + "epoch": 1.63, + "learning_rate": 3.3704390742958004e-05, + "loss": 0.2488, + "step": 290000 + }, + { + "epoch": 1.63, + "learning_rate": 3.369876951269555e-05, + "loss": 0.2447, + "step": 290100 + }, + { + "epoch": 1.63, + "learning_rate": 3.369320449473572e-05, + "loss": 0.2458, + "step": 290200 + }, + { + "epoch": 1.63, + "learning_rate": 3.3687583264473264e-05, + "loss": 0.2512, + "step": 290300 + }, + { + "epoch": 1.63, + "learning_rate": 3.36819620342108e-05, + "loss": 0.2434, + "step": 290400 + }, + { + "epoch": 1.63, + "learning_rate": 3.3676340803948356e-05, + "loss": 0.2471, + "step": 290500 + }, + { + "epoch": 1.63, + "learning_rate": 3.36707195736859e-05, + "loss": 0.2452, + "step": 290600 + }, + { + "epoch": 1.63, + "learning_rate": 3.366509834342345e-05, + "loss": 0.2477, + "step": 290700 + }, + { + "epoch": 1.63, + "learning_rate": 3.365947711316099e-05, + "loss": 0.2558, + "step": 290800 + }, + { + "epoch": 1.64, + "learning_rate": 3.3653855882898535e-05, + "loss": 0.2459, + "step": 290900 + }, + { + "epoch": 1.64, + "learning_rate": 3.364823465263608e-05, + "loss": 0.2487, + "step": 291000 + }, + { + "epoch": 1.64, + "learning_rate": 3.364261342237362e-05, + "loss": 0.2438, + "step": 291100 + }, + { + "epoch": 1.64, + "learning_rate": 3.363699219211117e-05, + "loss": 0.2484, + "step": 291200 + }, + { + "epoch": 1.64, + "learning_rate": 3.363137096184871e-05, + "loss": 0.2433, + "step": 291300 + }, + { + "epoch": 1.64, + "learning_rate": 3.362574973158626e-05, + "loss": 0.245, + "step": 291400 + }, + { + "epoch": 1.64, + "learning_rate": 3.36201285013238e-05, + "loss": 0.2446, + "step": 291500 + }, + { + "epoch": 1.64, + "learning_rate": 3.3614507271061345e-05, + "loss": 0.2444, + "step": 291600 + }, + { + "epoch": 1.64, + "learning_rate": 3.360888604079889e-05, + "loss": 0.2469, + "step": 291700 + }, + { + "epoch": 1.64, + "learning_rate": 3.360326481053643e-05, + "loss": 0.243, + "step": 291800 + }, + { + "epoch": 1.64, + "learning_rate": 3.359764358027398e-05, + "loss": 0.2506, + "step": 291900 + }, + { + "epoch": 1.64, + "learning_rate": 3.3592022350011523e-05, + "loss": 0.2442, + "step": 292000 + }, + { + "epoch": 1.64, + "learning_rate": 3.358640111974907e-05, + "loss": 0.2529, + "step": 292100 + }, + { + "epoch": 1.64, + "learning_rate": 3.3580779889486616e-05, + "loss": 0.258, + "step": 292200 + }, + { + "epoch": 1.64, + "learning_rate": 3.357515865922416e-05, + "loss": 0.2493, + "step": 292300 + }, + { + "epoch": 1.64, + "learning_rate": 3.356953742896171e-05, + "loss": 0.2481, + "step": 292400 + }, + { + "epoch": 1.64, + "learning_rate": 3.356391619869925e-05, + "loss": 0.2578, + "step": 292500 + }, + { + "epoch": 1.64, + "learning_rate": 3.3558294968436794e-05, + "loss": 0.2487, + "step": 292600 + }, + { + "epoch": 1.65, + "learning_rate": 3.355267373817434e-05, + "loss": 0.2526, + "step": 292700 + }, + { + "epoch": 1.65, + "learning_rate": 3.354705250791189e-05, + "loss": 0.2391, + "step": 292800 + }, + { + "epoch": 1.65, + "learning_rate": 3.3541431277649426e-05, + "loss": 0.2504, + "step": 292900 + }, + { + "epoch": 1.65, + "learning_rate": 3.353581004738697e-05, + "loss": 0.2453, + "step": 293000 + }, + { + "epoch": 1.65, + "learning_rate": 3.353018881712452e-05, + "loss": 0.2495, + "step": 293100 + }, + { + "epoch": 1.65, + "learning_rate": 3.352456758686206e-05, + "loss": 0.2492, + "step": 293200 + }, + { + "epoch": 1.65, + "learning_rate": 3.3518946356599605e-05, + "loss": 0.2564, + "step": 293300 + }, + { + "epoch": 1.65, + "learning_rate": 3.351332512633715e-05, + "loss": 0.2564, + "step": 293400 + }, + { + "epoch": 1.65, + "learning_rate": 3.35077038960747e-05, + "loss": 0.2465, + "step": 293500 + }, + { + "epoch": 1.65, + "learning_rate": 3.350208266581224e-05, + "loss": 0.2489, + "step": 293600 + }, + { + "epoch": 1.65, + "learning_rate": 3.349646143554978e-05, + "loss": 0.2502, + "step": 293700 + }, + { + "epoch": 1.65, + "learning_rate": 3.349084020528733e-05, + "loss": 0.2515, + "step": 293800 + }, + { + "epoch": 1.65, + "learning_rate": 3.3485218975024876e-05, + "loss": 0.2478, + "step": 293900 + }, + { + "epoch": 1.65, + "learning_rate": 3.347959774476242e-05, + "loss": 0.2515, + "step": 294000 + }, + { + "epoch": 1.65, + "learning_rate": 3.347397651449997e-05, + "loss": 0.2414, + "step": 294100 + }, + { + "epoch": 1.65, + "learning_rate": 3.3468355284237514e-05, + "loss": 0.2411, + "step": 294200 + }, + { + "epoch": 1.65, + "learning_rate": 3.3462734053975054e-05, + "loss": 0.2452, + "step": 294300 + }, + { + "epoch": 1.65, + "learning_rate": 3.34571128237126e-05, + "loss": 0.2463, + "step": 294400 + }, + { + "epoch": 1.66, + "learning_rate": 3.345154780575277e-05, + "loss": 0.2488, + "step": 294500 + }, + { + "epoch": 1.66, + "learning_rate": 3.3445926575490314e-05, + "loss": 0.2459, + "step": 294600 + }, + { + "epoch": 1.66, + "learning_rate": 3.3440305345227854e-05, + "loss": 0.253, + "step": 294700 + }, + { + "epoch": 1.66, + "learning_rate": 3.343468411496541e-05, + "loss": 0.2528, + "step": 294800 + }, + { + "epoch": 1.66, + "learning_rate": 3.342906288470295e-05, + "loss": 0.2441, + "step": 294900 + }, + { + "epoch": 1.66, + "learning_rate": 3.342344165444049e-05, + "loss": 0.2467, + "step": 295000 + }, + { + "epoch": 1.66, + "learning_rate": 3.341782042417804e-05, + "loss": 0.2446, + "step": 295100 + }, + { + "epoch": 1.66, + "learning_rate": 3.3412199193915585e-05, + "loss": 0.2488, + "step": 295200 + }, + { + "epoch": 1.66, + "learning_rate": 3.3406577963653124e-05, + "loss": 0.2484, + "step": 295300 + }, + { + "epoch": 1.66, + "learning_rate": 3.340095673339067e-05, + "loss": 0.2477, + "step": 295400 + }, + { + "epoch": 1.66, + "learning_rate": 3.339533550312822e-05, + "loss": 0.2553, + "step": 295500 + }, + { + "epoch": 1.66, + "learning_rate": 3.338971427286576e-05, + "loss": 0.2484, + "step": 295600 + }, + { + "epoch": 1.66, + "learning_rate": 3.33840930426033e-05, + "loss": 0.2424, + "step": 295700 + }, + { + "epoch": 1.66, + "learning_rate": 3.337847181234085e-05, + "loss": 0.2486, + "step": 295800 + }, + { + "epoch": 1.66, + "learning_rate": 3.3372850582078395e-05, + "loss": 0.2489, + "step": 295900 + }, + { + "epoch": 1.66, + "learning_rate": 3.3367229351815935e-05, + "loss": 0.245, + "step": 296000 + }, + { + "epoch": 1.66, + "learning_rate": 3.336160812155348e-05, + "loss": 0.2391, + "step": 296100 + }, + { + "epoch": 1.67, + "learning_rate": 3.335598689129103e-05, + "loss": 0.2522, + "step": 296200 + }, + { + "epoch": 1.67, + "learning_rate": 3.3350365661028574e-05, + "loss": 0.2494, + "step": 296300 + }, + { + "epoch": 1.67, + "learning_rate": 3.334474443076612e-05, + "loss": 0.2517, + "step": 296400 + }, + { + "epoch": 1.67, + "learning_rate": 3.3339123200503666e-05, + "loss": 0.2464, + "step": 296500 + }, + { + "epoch": 1.67, + "learning_rate": 3.333350197024121e-05, + "loss": 0.2498, + "step": 296600 + }, + { + "epoch": 1.67, + "learning_rate": 3.332788073997875e-05, + "loss": 0.243, + "step": 296700 + }, + { + "epoch": 1.67, + "learning_rate": 3.33222595097163e-05, + "loss": 0.2461, + "step": 296800 + }, + { + "epoch": 1.67, + "learning_rate": 3.3316638279453845e-05, + "loss": 0.2393, + "step": 296900 + }, + { + "epoch": 1.67, + "learning_rate": 3.331101704919139e-05, + "loss": 0.2476, + "step": 297000 + }, + { + "epoch": 1.67, + "learning_rate": 3.330539581892893e-05, + "loss": 0.2458, + "step": 297100 + }, + { + "epoch": 1.67, + "learning_rate": 3.329977458866648e-05, + "loss": 0.2463, + "step": 297200 + }, + { + "epoch": 1.67, + "learning_rate": 3.329415335840402e-05, + "loss": 0.2521, + "step": 297300 + }, + { + "epoch": 1.67, + "learning_rate": 3.328853212814156e-05, + "loss": 0.243, + "step": 297400 + }, + { + "epoch": 1.67, + "learning_rate": 3.328291089787911e-05, + "loss": 0.2424, + "step": 297500 + }, + { + "epoch": 1.67, + "learning_rate": 3.3277289667616655e-05, + "loss": 0.2498, + "step": 297600 + }, + { + "epoch": 1.67, + "learning_rate": 3.32716684373542e-05, + "loss": 0.2429, + "step": 297700 + }, + { + "epoch": 1.67, + "learning_rate": 3.326604720709174e-05, + "loss": 0.249, + "step": 297800 + }, + { + "epoch": 1.67, + "learning_rate": 3.326042597682929e-05, + "loss": 0.2436, + "step": 297900 + }, + { + "epoch": 1.68, + "learning_rate": 3.325480474656683e-05, + "loss": 0.2485, + "step": 298000 + }, + { + "epoch": 1.68, + "learning_rate": 3.324918351630438e-05, + "loss": 0.2501, + "step": 298100 + }, + { + "epoch": 1.68, + "learning_rate": 3.3243562286041926e-05, + "loss": 0.2481, + "step": 298200 + }, + { + "epoch": 1.68, + "learning_rate": 3.323794105577947e-05, + "loss": 0.2516, + "step": 298300 + }, + { + "epoch": 1.68, + "learning_rate": 3.323231982551702e-05, + "loss": 0.2511, + "step": 298400 + }, + { + "epoch": 1.68, + "learning_rate": 3.322669859525456e-05, + "loss": 0.2463, + "step": 298500 + }, + { + "epoch": 1.68, + "learning_rate": 3.3221133577294726e-05, + "loss": 0.2452, + "step": 298600 + }, + { + "epoch": 1.68, + "learning_rate": 3.321551234703227e-05, + "loss": 0.2531, + "step": 298700 + }, + { + "epoch": 1.68, + "learning_rate": 3.320989111676982e-05, + "loss": 0.2504, + "step": 298800 + }, + { + "epoch": 1.68, + "learning_rate": 3.320426988650736e-05, + "loss": 0.2515, + "step": 298900 + }, + { + "epoch": 1.68, + "learning_rate": 3.319864865624491e-05, + "loss": 0.241, + "step": 299000 + }, + { + "epoch": 1.68, + "learning_rate": 3.319302742598246e-05, + "loss": 0.2529, + "step": 299100 + }, + { + "epoch": 1.68, + "learning_rate": 3.3187406195719996e-05, + "loss": 0.2474, + "step": 299200 + }, + { + "epoch": 1.68, + "learning_rate": 3.318178496545754e-05, + "loss": 0.2457, + "step": 299300 + }, + { + "epoch": 1.68, + "learning_rate": 3.317616373519509e-05, + "loss": 0.2472, + "step": 299400 + }, + { + "epoch": 1.68, + "learning_rate": 3.3170542504932635e-05, + "loss": 0.2452, + "step": 299500 + }, + { + "epoch": 1.68, + "learning_rate": 3.3164921274670175e-05, + "loss": 0.2427, + "step": 299600 + }, + { + "epoch": 1.68, + "learning_rate": 3.315935625671034e-05, + "loss": 0.2434, + "step": 299700 + }, + { + "epoch": 1.69, + "learning_rate": 3.315373502644789e-05, + "loss": 0.2375, + "step": 299800 + }, + { + "epoch": 1.69, + "learning_rate": 3.3148113796185435e-05, + "loss": 0.2446, + "step": 299900 + }, + { + "epoch": 1.69, + "learning_rate": 3.314249256592298e-05, + "loss": 0.2418, + "step": 300000 + }, + { + "epoch": 1.69, + "learning_rate": 3.313687133566053e-05, + "loss": 0.2441, + "step": 300100 + }, + { + "epoch": 1.69, + "learning_rate": 3.3131250105398074e-05, + "loss": 0.2472, + "step": 300200 + }, + { + "epoch": 1.69, + "learning_rate": 3.312562887513561e-05, + "loss": 0.2522, + "step": 300300 + }, + { + "epoch": 1.69, + "learning_rate": 3.312000764487316e-05, + "loss": 0.2568, + "step": 300400 + }, + { + "epoch": 1.69, + "learning_rate": 3.3114386414610706e-05, + "loss": 0.251, + "step": 300500 + }, + { + "epoch": 1.69, + "learning_rate": 3.310876518434825e-05, + "loss": 0.2495, + "step": 300600 + }, + { + "epoch": 1.69, + "learning_rate": 3.310314395408579e-05, + "loss": 0.2388, + "step": 300700 + }, + { + "epoch": 1.69, + "learning_rate": 3.309752272382334e-05, + "loss": 0.2517, + "step": 300800 + }, + { + "epoch": 1.69, + "learning_rate": 3.3091901493560884e-05, + "loss": 0.2462, + "step": 300900 + }, + { + "epoch": 1.69, + "learning_rate": 3.3086280263298424e-05, + "loss": 0.246, + "step": 301000 + }, + { + "epoch": 1.69, + "learning_rate": 3.308065903303597e-05, + "loss": 0.2479, + "step": 301100 + }, + { + "epoch": 1.69, + "learning_rate": 3.3075037802773516e-05, + "loss": 0.2483, + "step": 301200 + }, + { + "epoch": 1.69, + "learning_rate": 3.306941657251106e-05, + "loss": 0.2491, + "step": 301300 + }, + { + "epoch": 1.69, + "learning_rate": 3.30637953422486e-05, + "loss": 0.2429, + "step": 301400 + }, + { + "epoch": 1.69, + "learning_rate": 3.305817411198615e-05, + "loss": 0.2489, + "step": 301500 + }, + { + "epoch": 1.7, + "learning_rate": 3.3052552881723695e-05, + "loss": 0.2512, + "step": 301600 + }, + { + "epoch": 1.7, + "learning_rate": 3.304693165146124e-05, + "loss": 0.2494, + "step": 301700 + }, + { + "epoch": 1.7, + "learning_rate": 3.304131042119879e-05, + "loss": 0.2465, + "step": 301800 + }, + { + "epoch": 1.7, + "learning_rate": 3.3035689190936333e-05, + "loss": 0.2536, + "step": 301900 + }, + { + "epoch": 1.7, + "learning_rate": 3.303006796067388e-05, + "loss": 0.2484, + "step": 302000 + }, + { + "epoch": 1.7, + "learning_rate": 3.302444673041142e-05, + "loss": 0.2477, + "step": 302100 + }, + { + "epoch": 1.7, + "learning_rate": 3.3018825500148965e-05, + "loss": 0.2535, + "step": 302200 + }, + { + "epoch": 1.7, + "learning_rate": 3.301320426988651e-05, + "loss": 0.2453, + "step": 302300 + }, + { + "epoch": 1.7, + "learning_rate": 3.300758303962405e-05, + "loss": 0.2396, + "step": 302400 + }, + { + "epoch": 1.7, + "learning_rate": 3.30019618093616e-05, + "loss": 0.245, + "step": 302500 + }, + { + "epoch": 1.7, + "learning_rate": 3.2996340579099144e-05, + "loss": 0.2455, + "step": 302600 + }, + { + "epoch": 1.7, + "learning_rate": 3.299071934883669e-05, + "loss": 0.244, + "step": 302700 + }, + { + "epoch": 1.7, + "learning_rate": 3.298509811857423e-05, + "loss": 0.2456, + "step": 302800 + }, + { + "epoch": 1.7, + "learning_rate": 3.2979476888311776e-05, + "loss": 0.2478, + "step": 302900 + }, + { + "epoch": 1.7, + "learning_rate": 3.297385565804932e-05, + "loss": 0.2418, + "step": 303000 + }, + { + "epoch": 1.7, + "learning_rate": 3.296823442778686e-05, + "loss": 0.2385, + "step": 303100 + }, + { + "epoch": 1.7, + "learning_rate": 3.296261319752441e-05, + "loss": 0.2424, + "step": 303200 + }, + { + "epoch": 1.7, + "learning_rate": 3.295699196726196e-05, + "loss": 0.2387, + "step": 303300 + }, + { + "epoch": 1.71, + "learning_rate": 3.295142694930213e-05, + "loss": 0.2443, + "step": 303400 + }, + { + "epoch": 1.71, + "learning_rate": 3.294580571903967e-05, + "loss": 0.2505, + "step": 303500 + }, + { + "epoch": 1.71, + "learning_rate": 3.2940184488777214e-05, + "loss": 0.2455, + "step": 303600 + }, + { + "epoch": 1.71, + "learning_rate": 3.293456325851476e-05, + "loss": 0.239, + "step": 303700 + }, + { + "epoch": 1.71, + "learning_rate": 3.292894202825231e-05, + "loss": 0.245, + "step": 303800 + }, + { + "epoch": 1.71, + "learning_rate": 3.2923320797989846e-05, + "loss": 0.2482, + "step": 303900 + }, + { + "epoch": 1.71, + "learning_rate": 3.291769956772739e-05, + "loss": 0.2423, + "step": 304000 + }, + { + "epoch": 1.71, + "learning_rate": 3.291207833746494e-05, + "loss": 0.2457, + "step": 304100 + }, + { + "epoch": 1.71, + "learning_rate": 3.2906457107202485e-05, + "loss": 0.2488, + "step": 304200 + }, + { + "epoch": 1.71, + "learning_rate": 3.290083587694003e-05, + "loss": 0.2423, + "step": 304300 + }, + { + "epoch": 1.71, + "learning_rate": 3.289521464667758e-05, + "loss": 0.245, + "step": 304400 + }, + { + "epoch": 1.71, + "learning_rate": 3.2889593416415124e-05, + "loss": 0.2458, + "step": 304500 + }, + { + "epoch": 1.71, + "learning_rate": 3.2883972186152664e-05, + "loss": 0.2491, + "step": 304600 + }, + { + "epoch": 1.71, + "learning_rate": 3.287835095589021e-05, + "loss": 0.2466, + "step": 304700 + }, + { + "epoch": 1.71, + "learning_rate": 3.2872729725627756e-05, + "loss": 0.2419, + "step": 304800 + }, + { + "epoch": 1.71, + "learning_rate": 3.2867108495365296e-05, + "loss": 0.243, + "step": 304900 + }, + { + "epoch": 1.71, + "learning_rate": 3.286148726510284e-05, + "loss": 0.2539, + "step": 305000 + }, + { + "epoch": 1.72, + "learning_rate": 3.285586603484039e-05, + "loss": 0.248, + "step": 305100 + }, + { + "epoch": 1.72, + "learning_rate": 3.285024480457793e-05, + "loss": 0.2438, + "step": 305200 + }, + { + "epoch": 1.72, + "learning_rate": 3.2844623574315474e-05, + "loss": 0.2465, + "step": 305300 + }, + { + "epoch": 1.72, + "learning_rate": 3.283900234405302e-05, + "loss": 0.2398, + "step": 305400 + }, + { + "epoch": 1.72, + "learning_rate": 3.2833381113790567e-05, + "loss": 0.2413, + "step": 305500 + }, + { + "epoch": 1.72, + "learning_rate": 3.2827759883528106e-05, + "loss": 0.2405, + "step": 305600 + }, + { + "epoch": 1.72, + "learning_rate": 3.282213865326565e-05, + "loss": 0.2368, + "step": 305700 + }, + { + "epoch": 1.72, + "learning_rate": 3.28165174230032e-05, + "loss": 0.2511, + "step": 305800 + }, + { + "epoch": 1.72, + "learning_rate": 3.2810896192740745e-05, + "loss": 0.2451, + "step": 305900 + }, + { + "epoch": 1.72, + "learning_rate": 3.280527496247829e-05, + "loss": 0.2404, + "step": 306000 + }, + { + "epoch": 1.72, + "learning_rate": 3.279965373221584e-05, + "loss": 0.2461, + "step": 306100 + }, + { + "epoch": 1.72, + "learning_rate": 3.2794032501953384e-05, + "loss": 0.2509, + "step": 306200 + }, + { + "epoch": 1.72, + "learning_rate": 3.278841127169092e-05, + "loss": 0.2383, + "step": 306300 + }, + { + "epoch": 1.72, + "learning_rate": 3.278279004142847e-05, + "loss": 0.2375, + "step": 306400 + }, + { + "epoch": 1.72, + "learning_rate": 3.2777168811166016e-05, + "loss": 0.2465, + "step": 306500 + }, + { + "epoch": 1.72, + "learning_rate": 3.2771547580903555e-05, + "loss": 0.2399, + "step": 306600 + }, + { + "epoch": 1.72, + "learning_rate": 3.276598256294372e-05, + "loss": 0.243, + "step": 306700 + }, + { + "epoch": 1.72, + "learning_rate": 3.276036133268127e-05, + "loss": 0.2461, + "step": 306800 + }, + { + "epoch": 1.73, + "learning_rate": 3.2754740102418815e-05, + "loss": 0.2501, + "step": 306900 + }, + { + "epoch": 1.73, + "learning_rate": 3.274911887215636e-05, + "loss": 0.2419, + "step": 307000 + }, + { + "epoch": 1.73, + "learning_rate": 3.274349764189391e-05, + "loss": 0.2451, + "step": 307100 + }, + { + "epoch": 1.73, + "learning_rate": 3.2737876411631454e-05, + "loss": 0.2432, + "step": 307200 + }, + { + "epoch": 1.73, + "learning_rate": 3.2732255181369e-05, + "loss": 0.2448, + "step": 307300 + }, + { + "epoch": 1.73, + "learning_rate": 3.272663395110654e-05, + "loss": 0.2473, + "step": 307400 + }, + { + "epoch": 1.73, + "learning_rate": 3.2721012720844086e-05, + "loss": 0.2417, + "step": 307500 + }, + { + "epoch": 1.73, + "learning_rate": 3.271539149058163e-05, + "loss": 0.243, + "step": 307600 + }, + { + "epoch": 1.73, + "learning_rate": 3.270977026031917e-05, + "loss": 0.2465, + "step": 307700 + }, + { + "epoch": 1.73, + "learning_rate": 3.270414903005672e-05, + "loss": 0.241, + "step": 307800 + }, + { + "epoch": 1.73, + "learning_rate": 3.2698527799794265e-05, + "loss": 0.2432, + "step": 307900 + }, + { + "epoch": 1.73, + "learning_rate": 3.269290656953181e-05, + "loss": 0.2447, + "step": 308000 + }, + { + "epoch": 1.73, + "learning_rate": 3.268728533926935e-05, + "loss": 0.2422, + "step": 308100 + }, + { + "epoch": 1.73, + "learning_rate": 3.26816641090069e-05, + "loss": 0.2409, + "step": 308200 + }, + { + "epoch": 1.73, + "learning_rate": 3.267604287874444e-05, + "loss": 0.2518, + "step": 308300 + }, + { + "epoch": 1.73, + "learning_rate": 3.267042164848198e-05, + "loss": 0.2407, + "step": 308400 + }, + { + "epoch": 1.73, + "learning_rate": 3.2664800418219536e-05, + "loss": 0.2455, + "step": 308500 + }, + { + "epoch": 1.73, + "learning_rate": 3.265917918795708e-05, + "loss": 0.247, + "step": 308600 + }, + { + "epoch": 1.74, + "learning_rate": 3.265355795769463e-05, + "loss": 0.2299, + "step": 308700 + }, + { + "epoch": 1.74, + "learning_rate": 3.264793672743217e-05, + "loss": 0.2319, + "step": 308800 + }, + { + "epoch": 1.74, + "learning_rate": 3.2642315497169714e-05, + "loss": 0.239, + "step": 308900 + }, + { + "epoch": 1.74, + "learning_rate": 3.263669426690726e-05, + "loss": 0.2405, + "step": 309000 + }, + { + "epoch": 1.74, + "learning_rate": 3.26310730366448e-05, + "loss": 0.2472, + "step": 309100 + }, + { + "epoch": 1.74, + "learning_rate": 3.2625451806382346e-05, + "loss": 0.2428, + "step": 309200 + }, + { + "epoch": 1.74, + "learning_rate": 3.261983057611989e-05, + "loss": 0.2432, + "step": 309300 + }, + { + "epoch": 1.74, + "learning_rate": 3.261420934585744e-05, + "loss": 0.2455, + "step": 309400 + }, + { + "epoch": 1.74, + "learning_rate": 3.260858811559498e-05, + "loss": 0.2432, + "step": 309500 + }, + { + "epoch": 1.74, + "learning_rate": 3.2602966885332524e-05, + "loss": 0.2468, + "step": 309600 + }, + { + "epoch": 1.74, + "learning_rate": 3.259734565507007e-05, + "loss": 0.2458, + "step": 309700 + }, + { + "epoch": 1.74, + "learning_rate": 3.259172442480761e-05, + "loss": 0.241, + "step": 309800 + }, + { + "epoch": 1.74, + "learning_rate": 3.2586103194545156e-05, + "loss": 0.2414, + "step": 309900 + }, + { + "epoch": 1.74, + "learning_rate": 3.25804819642827e-05, + "loss": 0.2475, + "step": 310000 + }, + { + "epoch": 1.74, + "learning_rate": 3.257486073402025e-05, + "loss": 0.248, + "step": 310100 + }, + { + "epoch": 1.74, + "learning_rate": 3.2569239503757795e-05, + "loss": 0.2477, + "step": 310200 + }, + { + "epoch": 1.74, + "learning_rate": 3.256361827349534e-05, + "loss": 0.2488, + "step": 310300 + }, + { + "epoch": 1.74, + "learning_rate": 3.255799704323289e-05, + "loss": 0.2498, + "step": 310400 + }, + { + "epoch": 1.75, + "learning_rate": 3.2552432025273055e-05, + "loss": 0.2472, + "step": 310500 + }, + { + "epoch": 1.75, + "learning_rate": 3.2546810795010595e-05, + "loss": 0.2346, + "step": 310600 + }, + { + "epoch": 1.75, + "learning_rate": 3.254118956474814e-05, + "loss": 0.2517, + "step": 310700 + }, + { + "epoch": 1.75, + "learning_rate": 3.253556833448569e-05, + "loss": 0.2397, + "step": 310800 + }, + { + "epoch": 1.75, + "learning_rate": 3.252994710422323e-05, + "loss": 0.2486, + "step": 310900 + }, + { + "epoch": 1.75, + "learning_rate": 3.252432587396077e-05, + "loss": 0.2441, + "step": 311000 + }, + { + "epoch": 1.75, + "learning_rate": 3.251870464369832e-05, + "loss": 0.2478, + "step": 311100 + }, + { + "epoch": 1.75, + "learning_rate": 3.2513083413435866e-05, + "loss": 0.2436, + "step": 311200 + }, + { + "epoch": 1.75, + "learning_rate": 3.250746218317341e-05, + "loss": 0.2443, + "step": 311300 + }, + { + "epoch": 1.75, + "learning_rate": 3.250184095291096e-05, + "loss": 0.2504, + "step": 311400 + }, + { + "epoch": 1.75, + "learning_rate": 3.2496219722648505e-05, + "loss": 0.2513, + "step": 311500 + }, + { + "epoch": 1.75, + "learning_rate": 3.2490598492386044e-05, + "loss": 0.2419, + "step": 311600 + }, + { + "epoch": 1.75, + "learning_rate": 3.248497726212359e-05, + "loss": 0.2398, + "step": 311700 + }, + { + "epoch": 1.75, + "learning_rate": 3.2479356031861137e-05, + "loss": 0.2496, + "step": 311800 + }, + { + "epoch": 1.75, + "learning_rate": 3.247373480159868e-05, + "loss": 0.2418, + "step": 311900 + }, + { + "epoch": 1.75, + "learning_rate": 3.246811357133622e-05, + "loss": 0.233, + "step": 312000 + }, + { + "epoch": 1.75, + "learning_rate": 3.246249234107377e-05, + "loss": 0.2455, + "step": 312100 + }, + { + "epoch": 1.75, + "learning_rate": 3.2456871110811315e-05, + "loss": 0.2394, + "step": 312200 + }, + { + "epoch": 1.76, + "learning_rate": 3.2451249880548854e-05, + "loss": 0.239, + "step": 312300 + }, + { + "epoch": 1.76, + "learning_rate": 3.24456286502864e-05, + "loss": 0.2438, + "step": 312400 + }, + { + "epoch": 1.76, + "learning_rate": 3.244000742002395e-05, + "loss": 0.2419, + "step": 312500 + }, + { + "epoch": 1.76, + "learning_rate": 3.243444240206412e-05, + "loss": 0.2428, + "step": 312600 + }, + { + "epoch": 1.76, + "learning_rate": 3.242882117180166e-05, + "loss": 0.2444, + "step": 312700 + }, + { + "epoch": 1.76, + "learning_rate": 3.242319994153921e-05, + "loss": 0.2401, + "step": 312800 + }, + { + "epoch": 1.76, + "learning_rate": 3.2417578711276753e-05, + "loss": 0.2413, + "step": 312900 + }, + { + "epoch": 1.76, + "learning_rate": 3.24119574810143e-05, + "loss": 0.2438, + "step": 313000 + }, + { + "epoch": 1.76, + "learning_rate": 3.240633625075184e-05, + "loss": 0.2424, + "step": 313100 + }, + { + "epoch": 1.76, + "learning_rate": 3.2400715020489385e-05, + "loss": 0.2439, + "step": 313200 + }, + { + "epoch": 1.76, + "learning_rate": 3.239509379022693e-05, + "loss": 0.245, + "step": 313300 + }, + { + "epoch": 1.76, + "learning_rate": 3.238947255996447e-05, + "loss": 0.2442, + "step": 313400 + }, + { + "epoch": 1.76, + "learning_rate": 3.238385132970202e-05, + "loss": 0.2485, + "step": 313500 + }, + { + "epoch": 1.76, + "learning_rate": 3.2378230099439564e-05, + "loss": 0.2457, + "step": 313600 + }, + { + "epoch": 1.76, + "learning_rate": 3.237260886917711e-05, + "loss": 0.2402, + "step": 313700 + }, + { + "epoch": 1.76, + "learning_rate": 3.2366987638914656e-05, + "loss": 0.2452, + "step": 313800 + }, + { + "epoch": 1.76, + "learning_rate": 3.23613664086522e-05, + "loss": 0.2462, + "step": 313900 + }, + { + "epoch": 1.77, + "learning_rate": 3.235574517838975e-05, + "loss": 0.2376, + "step": 314000 + }, + { + "epoch": 1.77, + "learning_rate": 3.235012394812729e-05, + "loss": 0.2429, + "step": 314100 + }, + { + "epoch": 1.77, + "learning_rate": 3.2344502717864835e-05, + "loss": 0.2476, + "step": 314200 + }, + { + "epoch": 1.77, + "learning_rate": 3.233888148760238e-05, + "loss": 0.2417, + "step": 314300 + }, + { + "epoch": 1.77, + "learning_rate": 3.233326025733993e-05, + "loss": 0.2375, + "step": 314400 + }, + { + "epoch": 1.77, + "learning_rate": 3.232763902707747e-05, + "loss": 0.2421, + "step": 314500 + }, + { + "epoch": 1.77, + "learning_rate": 3.232201779681501e-05, + "loss": 0.2514, + "step": 314600 + }, + { + "epoch": 1.77, + "learning_rate": 3.231645277885518e-05, + "loss": 0.243, + "step": 314700 + }, + { + "epoch": 1.77, + "learning_rate": 3.231083154859273e-05, + "loss": 0.2416, + "step": 314800 + }, + { + "epoch": 1.77, + "learning_rate": 3.230521031833027e-05, + "loss": 0.247, + "step": 314900 + }, + { + "epoch": 1.77, + "learning_rate": 3.229958908806782e-05, + "loss": 0.2462, + "step": 315000 + }, + { + "epoch": 1.77, + "learning_rate": 3.2293967857805366e-05, + "loss": 0.2438, + "step": 315100 + }, + { + "epoch": 1.77, + "learning_rate": 3.2288346627542905e-05, + "loss": 0.249, + "step": 315200 + }, + { + "epoch": 1.77, + "learning_rate": 3.228272539728045e-05, + "loss": 0.2318, + "step": 315300 + }, + { + "epoch": 1.77, + "learning_rate": 3.2277104167018e-05, + "loss": 0.2474, + "step": 315400 + }, + { + "epoch": 1.77, + "learning_rate": 3.227148293675554e-05, + "loss": 0.2406, + "step": 315500 + }, + { + "epoch": 1.77, + "learning_rate": 3.2265861706493084e-05, + "loss": 0.2444, + "step": 315600 + }, + { + "epoch": 1.77, + "learning_rate": 3.226024047623063e-05, + "loss": 0.2398, + "step": 315700 + }, + { + "epoch": 1.78, + "learning_rate": 3.2254619245968176e-05, + "loss": 0.2494, + "step": 315800 + }, + { + "epoch": 1.78, + "learning_rate": 3.2248998015705716e-05, + "loss": 0.2348, + "step": 315900 + }, + { + "epoch": 1.78, + "learning_rate": 3.224337678544326e-05, + "loss": 0.2388, + "step": 316000 + }, + { + "epoch": 1.78, + "learning_rate": 3.223775555518081e-05, + "loss": 0.2484, + "step": 316100 + }, + { + "epoch": 1.78, + "learning_rate": 3.223213432491835e-05, + "loss": 0.2401, + "step": 316200 + }, + { + "epoch": 1.78, + "learning_rate": 3.2226513094655894e-05, + "loss": 0.2458, + "step": 316300 + }, + { + "epoch": 1.78, + "learning_rate": 3.222089186439344e-05, + "loss": 0.2465, + "step": 316400 + }, + { + "epoch": 1.78, + "learning_rate": 3.2215270634130987e-05, + "loss": 0.2461, + "step": 316500 + }, + { + "epoch": 1.78, + "learning_rate": 3.220964940386853e-05, + "loss": 0.2431, + "step": 316600 + }, + { + "epoch": 1.78, + "learning_rate": 3.220402817360608e-05, + "loss": 0.2408, + "step": 316700 + }, + { + "epoch": 1.78, + "learning_rate": 3.2198406943343625e-05, + "loss": 0.2428, + "step": 316800 + }, + { + "epoch": 1.78, + "learning_rate": 3.2192785713081165e-05, + "loss": 0.2392, + "step": 316900 + }, + { + "epoch": 1.78, + "learning_rate": 3.218716448281871e-05, + "loss": 0.2391, + "step": 317000 + }, + { + "epoch": 1.78, + "learning_rate": 3.218154325255626e-05, + "loss": 0.2472, + "step": 317100 + }, + { + "epoch": 1.78, + "learning_rate": 3.2175922022293804e-05, + "loss": 0.2473, + "step": 317200 + }, + { + "epoch": 1.78, + "learning_rate": 3.217030079203134e-05, + "loss": 0.2449, + "step": 317300 + }, + { + "epoch": 1.78, + "learning_rate": 3.216467956176889e-05, + "loss": 0.242, + "step": 317400 + }, + { + "epoch": 1.78, + "learning_rate": 3.2159058331506436e-05, + "loss": 0.2418, + "step": 317500 + }, + { + "epoch": 1.79, + "learning_rate": 3.2153437101243975e-05, + "loss": 0.2427, + "step": 317600 + }, + { + "epoch": 1.79, + "learning_rate": 3.214781587098152e-05, + "loss": 0.2417, + "step": 317700 + }, + { + "epoch": 1.79, + "learning_rate": 3.214219464071907e-05, + "loss": 0.2397, + "step": 317800 + }, + { + "epoch": 1.79, + "learning_rate": 3.2136573410456614e-05, + "loss": 0.2365, + "step": 317900 + }, + { + "epoch": 1.79, + "learning_rate": 3.213095218019416e-05, + "loss": 0.2475, + "step": 318000 + }, + { + "epoch": 1.79, + "learning_rate": 3.212533094993171e-05, + "loss": 0.2446, + "step": 318100 + }, + { + "epoch": 1.79, + "learning_rate": 3.211970971966925e-05, + "loss": 0.2473, + "step": 318200 + }, + { + "epoch": 1.79, + "learning_rate": 3.211408848940679e-05, + "loss": 0.2431, + "step": 318300 + }, + { + "epoch": 1.79, + "learning_rate": 3.210846725914434e-05, + "loss": 0.239, + "step": 318400 + }, + { + "epoch": 1.79, + "learning_rate": 3.2102846028881885e-05, + "loss": 0.251, + "step": 318500 + }, + { + "epoch": 1.79, + "learning_rate": 3.209722479861943e-05, + "loss": 0.2393, + "step": 318600 + }, + { + "epoch": 1.79, + "learning_rate": 3.209165978065959e-05, + "loss": 0.2412, + "step": 318700 + }, + { + "epoch": 1.79, + "learning_rate": 3.208603855039714e-05, + "loss": 0.2441, + "step": 318800 + }, + { + "epoch": 1.79, + "learning_rate": 3.2080417320134685e-05, + "loss": 0.2437, + "step": 318900 + }, + { + "epoch": 1.79, + "learning_rate": 3.207479608987223e-05, + "loss": 0.2416, + "step": 319000 + }, + { + "epoch": 1.79, + "learning_rate": 3.206917485960978e-05, + "loss": 0.2448, + "step": 319100 + }, + { + "epoch": 1.79, + "learning_rate": 3.2063553629347324e-05, + "loss": 0.2363, + "step": 319200 + }, + { + "epoch": 1.79, + "learning_rate": 3.205798861138749e-05, + "loss": 0.2489, + "step": 319300 + }, + { + "epoch": 1.8, + "learning_rate": 3.205236738112504e-05, + "loss": 0.2482, + "step": 319400 + }, + { + "epoch": 1.8, + "learning_rate": 3.204674615086258e-05, + "loss": 0.2426, + "step": 319500 + }, + { + "epoch": 1.8, + "learning_rate": 3.204112492060012e-05, + "loss": 0.2373, + "step": 319600 + }, + { + "epoch": 1.8, + "learning_rate": 3.203550369033767e-05, + "loss": 0.2454, + "step": 319700 + }, + { + "epoch": 1.8, + "learning_rate": 3.202988246007521e-05, + "loss": 0.2375, + "step": 319800 + }, + { + "epoch": 1.8, + "learning_rate": 3.2024261229812755e-05, + "loss": 0.2401, + "step": 319900 + }, + { + "epoch": 1.8, + "learning_rate": 3.20186399995503e-05, + "loss": 0.236, + "step": 320000 + }, + { + "epoch": 1.8, + "learning_rate": 3.201301876928785e-05, + "loss": 0.2452, + "step": 320100 + }, + { + "epoch": 1.8, + "learning_rate": 3.2007397539025394e-05, + "loss": 0.2402, + "step": 320200 + }, + { + "epoch": 1.8, + "learning_rate": 3.200177630876294e-05, + "loss": 0.2409, + "step": 320300 + }, + { + "epoch": 1.8, + "learning_rate": 3.199615507850049e-05, + "loss": 0.2448, + "step": 320400 + }, + { + "epoch": 1.8, + "learning_rate": 3.1990533848238026e-05, + "loss": 0.2426, + "step": 320500 + }, + { + "epoch": 1.8, + "learning_rate": 3.198491261797557e-05, + "loss": 0.2441, + "step": 320600 + }, + { + "epoch": 1.8, + "learning_rate": 3.197929138771312e-05, + "loss": 0.2447, + "step": 320700 + }, + { + "epoch": 1.8, + "learning_rate": 3.1973670157450665e-05, + "loss": 0.2384, + "step": 320800 + }, + { + "epoch": 1.8, + "learning_rate": 3.1968048927188204e-05, + "loss": 0.2399, + "step": 320900 + }, + { + "epoch": 1.8, + "learning_rate": 3.196242769692575e-05, + "loss": 0.2441, + "step": 321000 + }, + { + "epoch": 1.8, + "learning_rate": 3.19568064666633e-05, + "loss": 0.2318, + "step": 321100 + }, + { + "epoch": 1.81, + "learning_rate": 3.1951185236400837e-05, + "loss": 0.24, + "step": 321200 + }, + { + "epoch": 1.81, + "learning_rate": 3.194556400613838e-05, + "loss": 0.2519, + "step": 321300 + }, + { + "epoch": 1.81, + "learning_rate": 3.193994277587593e-05, + "loss": 0.233, + "step": 321400 + }, + { + "epoch": 1.81, + "learning_rate": 3.1934321545613475e-05, + "loss": 0.2453, + "step": 321500 + }, + { + "epoch": 1.81, + "learning_rate": 3.1928700315351015e-05, + "loss": 0.2444, + "step": 321600 + }, + { + "epoch": 1.81, + "learning_rate": 3.192307908508856e-05, + "loss": 0.2463, + "step": 321700 + }, + { + "epoch": 1.81, + "learning_rate": 3.1917514067128736e-05, + "loss": 0.2427, + "step": 321800 + }, + { + "epoch": 1.81, + "learning_rate": 3.191189283686628e-05, + "loss": 0.238, + "step": 321900 + }, + { + "epoch": 1.81, + "learning_rate": 3.190627160660382e-05, + "loss": 0.2407, + "step": 322000 + }, + { + "epoch": 1.81, + "learning_rate": 3.190065037634137e-05, + "loss": 0.244, + "step": 322100 + }, + { + "epoch": 1.81, + "learning_rate": 3.1895029146078914e-05, + "loss": 0.2471, + "step": 322200 + }, + { + "epoch": 1.81, + "learning_rate": 3.188940791581645e-05, + "loss": 0.2425, + "step": 322300 + }, + { + "epoch": 1.81, + "learning_rate": 3.1883786685554e-05, + "loss": 0.2375, + "step": 322400 + }, + { + "epoch": 1.81, + "learning_rate": 3.1878165455291546e-05, + "loss": 0.241, + "step": 322500 + }, + { + "epoch": 1.81, + "learning_rate": 3.187254422502909e-05, + "loss": 0.2476, + "step": 322600 + }, + { + "epoch": 1.81, + "learning_rate": 3.186692299476663e-05, + "loss": 0.2421, + "step": 322700 + }, + { + "epoch": 1.81, + "learning_rate": 3.186130176450418e-05, + "loss": 0.2398, + "step": 322800 + }, + { + "epoch": 1.82, + "learning_rate": 3.1855680534241724e-05, + "loss": 0.2317, + "step": 322900 + }, + { + "epoch": 1.82, + "learning_rate": 3.185005930397927e-05, + "loss": 0.2417, + "step": 323000 + }, + { + "epoch": 1.82, + "learning_rate": 3.184443807371682e-05, + "loss": 0.2351, + "step": 323100 + }, + { + "epoch": 1.82, + "learning_rate": 3.183881684345436e-05, + "loss": 0.2357, + "step": 323200 + }, + { + "epoch": 1.82, + "learning_rate": 3.183319561319191e-05, + "loss": 0.2348, + "step": 323300 + }, + { + "epoch": 1.82, + "learning_rate": 3.182757438292945e-05, + "loss": 0.2462, + "step": 323400 + }, + { + "epoch": 1.82, + "learning_rate": 3.1821953152666995e-05, + "loss": 0.2406, + "step": 323500 + }, + { + "epoch": 1.82, + "learning_rate": 3.181633192240454e-05, + "loss": 0.2409, + "step": 323600 + }, + { + "epoch": 1.82, + "learning_rate": 3.181071069214208e-05, + "loss": 0.2454, + "step": 323700 + }, + { + "epoch": 1.82, + "learning_rate": 3.180508946187963e-05, + "loss": 0.2428, + "step": 323800 + }, + { + "epoch": 1.82, + "learning_rate": 3.1799468231617173e-05, + "loss": 0.2465, + "step": 323900 + }, + { + "epoch": 1.82, + "learning_rate": 3.179384700135472e-05, + "loss": 0.2442, + "step": 324000 + }, + { + "epoch": 1.82, + "learning_rate": 3.178828198339489e-05, + "loss": 0.2402, + "step": 324100 + }, + { + "epoch": 1.82, + "learning_rate": 3.1782660753132434e-05, + "loss": 0.2446, + "step": 324200 + }, + { + "epoch": 1.82, + "learning_rate": 3.177703952286998e-05, + "loss": 0.242, + "step": 324300 + }, + { + "epoch": 1.82, + "learning_rate": 3.1771418292607526e-05, + "loss": 0.2402, + "step": 324400 + }, + { + "epoch": 1.82, + "learning_rate": 3.1765797062345066e-05, + "loss": 0.2353, + "step": 324500 + }, + { + "epoch": 1.82, + "learning_rate": 3.176017583208261e-05, + "loss": 0.2441, + "step": 324600 + }, + { + "epoch": 1.83, + "learning_rate": 3.175455460182016e-05, + "loss": 0.2422, + "step": 324700 + }, + { + "epoch": 1.83, + "learning_rate": 3.17489333715577e-05, + "loss": 0.2471, + "step": 324800 + }, + { + "epoch": 1.83, + "learning_rate": 3.1743312141295244e-05, + "loss": 0.2435, + "step": 324900 + }, + { + "epoch": 1.83, + "learning_rate": 3.173769091103279e-05, + "loss": 0.2376, + "step": 325000 + }, + { + "epoch": 1.83, + "learning_rate": 3.173206968077033e-05, + "loss": 0.2459, + "step": 325100 + }, + { + "epoch": 1.83, + "learning_rate": 3.1726448450507876e-05, + "loss": 0.2419, + "step": 325200 + }, + { + "epoch": 1.83, + "learning_rate": 3.172082722024542e-05, + "loss": 0.2381, + "step": 325300 + }, + { + "epoch": 1.83, + "learning_rate": 3.171520598998297e-05, + "loss": 0.24, + "step": 325400 + }, + { + "epoch": 1.83, + "learning_rate": 3.1709584759720515e-05, + "loss": 0.2407, + "step": 325500 + }, + { + "epoch": 1.83, + "learning_rate": 3.170396352945806e-05, + "loss": 0.2388, + "step": 325600 + }, + { + "epoch": 1.83, + "learning_rate": 3.169834229919561e-05, + "loss": 0.2404, + "step": 325700 + }, + { + "epoch": 1.83, + "learning_rate": 3.169272106893315e-05, + "loss": 0.2439, + "step": 325800 + }, + { + "epoch": 1.83, + "learning_rate": 3.168709983867069e-05, + "loss": 0.2363, + "step": 325900 + }, + { + "epoch": 1.83, + "learning_rate": 3.168147860840824e-05, + "loss": 0.2385, + "step": 326000 + }, + { + "epoch": 1.83, + "learning_rate": 3.1675857378145786e-05, + "loss": 0.2368, + "step": 326100 + }, + { + "epoch": 1.83, + "learning_rate": 3.1670236147883325e-05, + "loss": 0.2389, + "step": 326200 + }, + { + "epoch": 1.83, + "learning_rate": 3.166461491762087e-05, + "loss": 0.2355, + "step": 326300 + }, + { + "epoch": 1.83, + "learning_rate": 3.165899368735842e-05, + "loss": 0.2376, + "step": 326400 + }, + { + "epoch": 1.84, + "learning_rate": 3.165337245709596e-05, + "loss": 0.2375, + "step": 326500 + }, + { + "epoch": 1.84, + "learning_rate": 3.1647751226833504e-05, + "loss": 0.2395, + "step": 326600 + }, + { + "epoch": 1.84, + "learning_rate": 3.164212999657105e-05, + "loss": 0.2432, + "step": 326700 + }, + { + "epoch": 1.84, + "learning_rate": 3.1636508766308596e-05, + "loss": 0.243, + "step": 326800 + }, + { + "epoch": 1.84, + "learning_rate": 3.1630887536046136e-05, + "loss": 0.2314, + "step": 326900 + }, + { + "epoch": 1.84, + "learning_rate": 3.162526630578368e-05, + "loss": 0.2365, + "step": 327000 + }, + { + "epoch": 1.84, + "learning_rate": 3.161964507552123e-05, + "loss": 0.2445, + "step": 327100 + }, + { + "epoch": 1.84, + "learning_rate": 3.1614023845258775e-05, + "loss": 0.2408, + "step": 327200 + }, + { + "epoch": 1.84, + "learning_rate": 3.160840261499632e-05, + "loss": 0.2406, + "step": 327300 + }, + { + "epoch": 1.84, + "learning_rate": 3.160278138473387e-05, + "loss": 0.2421, + "step": 327400 + }, + { + "epoch": 1.84, + "learning_rate": 3.1597216366774035e-05, + "loss": 0.2491, + "step": 327500 + }, + { + "epoch": 1.84, + "learning_rate": 3.1591595136511574e-05, + "loss": 0.24, + "step": 327600 + }, + { + "epoch": 1.84, + "learning_rate": 3.158597390624912e-05, + "loss": 0.2432, + "step": 327700 + }, + { + "epoch": 1.84, + "learning_rate": 3.158035267598667e-05, + "loss": 0.2345, + "step": 327800 + }, + { + "epoch": 1.84, + "learning_rate": 3.157473144572421e-05, + "loss": 0.2462, + "step": 327900 + }, + { + "epoch": 1.84, + "learning_rate": 3.156911021546175e-05, + "loss": 0.2407, + "step": 328000 + }, + { + "epoch": 1.84, + "learning_rate": 3.15634889851993e-05, + "loss": 0.238, + "step": 328100 + }, + { + "epoch": 1.84, + "learning_rate": 3.1557867754936845e-05, + "loss": 0.2331, + "step": 328200 + }, + { + "epoch": 1.85, + "learning_rate": 3.155224652467439e-05, + "loss": 0.2401, + "step": 328300 + }, + { + "epoch": 1.85, + "learning_rate": 3.154662529441194e-05, + "loss": 0.2334, + "step": 328400 + }, + { + "epoch": 1.85, + "learning_rate": 3.1541004064149484e-05, + "loss": 0.2421, + "step": 328500 + }, + { + "epoch": 1.85, + "learning_rate": 3.153538283388703e-05, + "loss": 0.2398, + "step": 328600 + }, + { + "epoch": 1.85, + "learning_rate": 3.152976160362457e-05, + "loss": 0.2377, + "step": 328700 + }, + { + "epoch": 1.85, + "learning_rate": 3.1524140373362116e-05, + "loss": 0.2382, + "step": 328800 + }, + { + "epoch": 1.85, + "learning_rate": 3.151851914309966e-05, + "loss": 0.2429, + "step": 328900 + }, + { + "epoch": 1.85, + "learning_rate": 3.15128979128372e-05, + "loss": 0.2364, + "step": 329000 + }, + { + "epoch": 1.85, + "learning_rate": 3.150727668257475e-05, + "loss": 0.2475, + "step": 329100 + }, + { + "epoch": 1.85, + "learning_rate": 3.1501655452312294e-05, + "loss": 0.2459, + "step": 329200 + }, + { + "epoch": 1.85, + "learning_rate": 3.149603422204984e-05, + "loss": 0.2438, + "step": 329300 + }, + { + "epoch": 1.85, + "learning_rate": 3.149041299178738e-05, + "loss": 0.2337, + "step": 329400 + }, + { + "epoch": 1.85, + "learning_rate": 3.1484791761524926e-05, + "loss": 0.2466, + "step": 329500 + }, + { + "epoch": 1.85, + "learning_rate": 3.14792267435651e-05, + "loss": 0.2337, + "step": 329600 + }, + { + "epoch": 1.85, + "learning_rate": 3.147360551330265e-05, + "loss": 0.2458, + "step": 329700 + }, + { + "epoch": 1.85, + "learning_rate": 3.1467984283040187e-05, + "loss": 0.2398, + "step": 329800 + }, + { + "epoch": 1.85, + "learning_rate": 3.146236305277773e-05, + "loss": 0.2402, + "step": 329900 + }, + { + "epoch": 1.86, + "learning_rate": 3.145674182251528e-05, + "loss": 0.2357, + "step": 330000 + }, + { + "epoch": 1.86, + "learning_rate": 3.145112059225282e-05, + "loss": 0.244, + "step": 330100 + }, + { + "epoch": 1.86, + "learning_rate": 3.1445499361990365e-05, + "loss": 0.2412, + "step": 330200 + }, + { + "epoch": 1.86, + "learning_rate": 3.143987813172791e-05, + "loss": 0.2404, + "step": 330300 + }, + { + "epoch": 1.86, + "learning_rate": 3.143425690146546e-05, + "loss": 0.242, + "step": 330400 + }, + { + "epoch": 1.86, + "learning_rate": 3.1428635671203e-05, + "loss": 0.2446, + "step": 330500 + }, + { + "epoch": 1.86, + "learning_rate": 3.142301444094054e-05, + "loss": 0.2394, + "step": 330600 + }, + { + "epoch": 1.86, + "learning_rate": 3.141739321067809e-05, + "loss": 0.2422, + "step": 330700 + }, + { + "epoch": 1.86, + "learning_rate": 3.1411771980415636e-05, + "loss": 0.2411, + "step": 330800 + }, + { + "epoch": 1.86, + "learning_rate": 3.140615075015318e-05, + "loss": 0.2456, + "step": 330900 + }, + { + "epoch": 1.86, + "learning_rate": 3.140052951989073e-05, + "loss": 0.2393, + "step": 331000 + }, + { + "epoch": 1.86, + "learning_rate": 3.1394908289628275e-05, + "loss": 0.2388, + "step": 331100 + }, + { + "epoch": 1.86, + "learning_rate": 3.1389287059365814e-05, + "loss": 0.2396, + "step": 331200 + }, + { + "epoch": 1.86, + "learning_rate": 3.138366582910336e-05, + "loss": 0.2377, + "step": 331300 + }, + { + "epoch": 1.86, + "learning_rate": 3.137804459884091e-05, + "loss": 0.2469, + "step": 331400 + }, + { + "epoch": 1.86, + "learning_rate": 3.1372423368578446e-05, + "loss": 0.2399, + "step": 331500 + }, + { + "epoch": 1.86, + "learning_rate": 3.136680213831599e-05, + "loss": 0.234, + "step": 331600 + }, + { + "epoch": 1.86, + "learning_rate": 3.136118090805354e-05, + "loss": 0.2499, + "step": 331700 + }, + { + "epoch": 1.87, + "learning_rate": 3.1355559677791085e-05, + "loss": 0.2381, + "step": 331800 + }, + { + "epoch": 1.87, + "learning_rate": 3.1349938447528625e-05, + "loss": 0.2405, + "step": 331900 + }, + { + "epoch": 1.87, + "learning_rate": 3.134431721726617e-05, + "loss": 0.244, + "step": 332000 + }, + { + "epoch": 1.87, + "learning_rate": 3.133869598700372e-05, + "loss": 0.2419, + "step": 332100 + }, + { + "epoch": 1.87, + "learning_rate": 3.1333074756741257e-05, + "loss": 0.2356, + "step": 332200 + }, + { + "epoch": 1.87, + "learning_rate": 3.13274535264788e-05, + "loss": 0.2418, + "step": 332300 + }, + { + "epoch": 1.87, + "learning_rate": 3.132183229621635e-05, + "loss": 0.2434, + "step": 332400 + }, + { + "epoch": 1.87, + "learning_rate": 3.1316211065953895e-05, + "loss": 0.2328, + "step": 332500 + }, + { + "epoch": 1.87, + "learning_rate": 3.131058983569144e-05, + "loss": 0.2329, + "step": 332600 + }, + { + "epoch": 1.87, + "learning_rate": 3.130496860542899e-05, + "loss": 0.236, + "step": 332700 + }, + { + "epoch": 1.87, + "learning_rate": 3.1299347375166534e-05, + "loss": 0.2387, + "step": 332800 + }, + { + "epoch": 1.87, + "learning_rate": 3.1293726144904074e-05, + "loss": 0.245, + "step": 332900 + }, + { + "epoch": 1.87, + "learning_rate": 3.128810491464162e-05, + "loss": 0.2398, + "step": 333000 + }, + { + "epoch": 1.87, + "learning_rate": 3.1282483684379166e-05, + "loss": 0.2415, + "step": 333100 + }, + { + "epoch": 1.87, + "learning_rate": 3.1276918666419334e-05, + "loss": 0.2377, + "step": 333200 + }, + { + "epoch": 1.87, + "learning_rate": 3.1271297436156873e-05, + "loss": 0.2366, + "step": 333300 + }, + { + "epoch": 1.87, + "learning_rate": 3.126567620589442e-05, + "loss": 0.2385, + "step": 333400 + }, + { + "epoch": 1.87, + "learning_rate": 3.126005497563197e-05, + "loss": 0.2369, + "step": 333500 + }, + { + "epoch": 1.88, + "learning_rate": 3.125443374536952e-05, + "loss": 0.2402, + "step": 333600 + }, + { + "epoch": 1.88, + "learning_rate": 3.124881251510706e-05, + "loss": 0.244, + "step": 333700 + }, + { + "epoch": 1.88, + "learning_rate": 3.1243191284844605e-05, + "loss": 0.2433, + "step": 333800 + }, + { + "epoch": 1.88, + "learning_rate": 3.123757005458215e-05, + "loss": 0.2436, + "step": 333900 + }, + { + "epoch": 1.88, + "learning_rate": 3.123194882431969e-05, + "loss": 0.2359, + "step": 334000 + }, + { + "epoch": 1.88, + "learning_rate": 3.122632759405724e-05, + "loss": 0.2385, + "step": 334100 + }, + { + "epoch": 1.88, + "learning_rate": 3.122070636379478e-05, + "loss": 0.2406, + "step": 334200 + }, + { + "epoch": 1.88, + "learning_rate": 3.121508513353233e-05, + "loss": 0.2355, + "step": 334300 + }, + { + "epoch": 1.88, + "learning_rate": 3.120946390326987e-05, + "loss": 0.2377, + "step": 334400 + }, + { + "epoch": 1.88, + "learning_rate": 3.1203842673007415e-05, + "loss": 0.2415, + "step": 334500 + }, + { + "epoch": 1.88, + "learning_rate": 3.119822144274496e-05, + "loss": 0.2443, + "step": 334600 + }, + { + "epoch": 1.88, + "learning_rate": 3.11926002124825e-05, + "loss": 0.2367, + "step": 334700 + }, + { + "epoch": 1.88, + "learning_rate": 3.118697898222005e-05, + "loss": 0.2384, + "step": 334800 + }, + { + "epoch": 1.88, + "learning_rate": 3.1181357751957594e-05, + "loss": 0.2317, + "step": 334900 + }, + { + "epoch": 1.88, + "learning_rate": 3.117573652169514e-05, + "loss": 0.2357, + "step": 335000 + }, + { + "epoch": 1.88, + "learning_rate": 3.1170115291432686e-05, + "loss": 0.2438, + "step": 335100 + }, + { + "epoch": 1.88, + "learning_rate": 3.116449406117023e-05, + "loss": 0.2424, + "step": 335200 + }, + { + "epoch": 1.88, + "learning_rate": 3.115887283090778e-05, + "loss": 0.2358, + "step": 335300 + }, + { + "epoch": 1.89, + "learning_rate": 3.115325160064532e-05, + "loss": 0.2369, + "step": 335400 + }, + { + "epoch": 1.89, + "learning_rate": 3.1147630370382864e-05, + "loss": 0.2396, + "step": 335500 + }, + { + "epoch": 1.89, + "learning_rate": 3.114200914012041e-05, + "loss": 0.2381, + "step": 335600 + }, + { + "epoch": 1.89, + "learning_rate": 3.113638790985795e-05, + "loss": 0.24, + "step": 335700 + }, + { + "epoch": 1.89, + "learning_rate": 3.1130766679595496e-05, + "loss": 0.2407, + "step": 335800 + }, + { + "epoch": 1.89, + "learning_rate": 3.112514544933304e-05, + "loss": 0.2377, + "step": 335900 + }, + { + "epoch": 1.89, + "learning_rate": 3.111952421907059e-05, + "loss": 0.2417, + "step": 336000 + }, + { + "epoch": 1.89, + "learning_rate": 3.111390298880813e-05, + "loss": 0.2395, + "step": 336100 + }, + { + "epoch": 1.89, + "learning_rate": 3.1108281758545675e-05, + "loss": 0.2416, + "step": 336200 + }, + { + "epoch": 1.89, + "learning_rate": 3.110266052828322e-05, + "loss": 0.2327, + "step": 336300 + }, + { + "epoch": 1.89, + "learning_rate": 3.1097095510323395e-05, + "loss": 0.2326, + "step": 336400 + }, + { + "epoch": 1.89, + "learning_rate": 3.1091474280060935e-05, + "loss": 0.2343, + "step": 336500 + }, + { + "epoch": 1.89, + "learning_rate": 3.108585304979848e-05, + "loss": 0.2395, + "step": 336600 + }, + { + "epoch": 1.89, + "learning_rate": 3.108023181953603e-05, + "loss": 0.243, + "step": 336700 + }, + { + "epoch": 1.89, + "learning_rate": 3.107461058927357e-05, + "loss": 0.2302, + "step": 336800 + }, + { + "epoch": 1.89, + "learning_rate": 3.106898935901111e-05, + "loss": 0.2313, + "step": 336900 + }, + { + "epoch": 1.89, + "learning_rate": 3.106336812874866e-05, + "loss": 0.2367, + "step": 337000 + }, + { + "epoch": 1.89, + "learning_rate": 3.1057746898486206e-05, + "loss": 0.2414, + "step": 337100 + }, + { + "epoch": 1.9, + "learning_rate": 3.1052125668223745e-05, + "loss": 0.2358, + "step": 337200 + }, + { + "epoch": 1.9, + "learning_rate": 3.104650443796129e-05, + "loss": 0.2359, + "step": 337300 + }, + { + "epoch": 1.9, + "learning_rate": 3.104088320769884e-05, + "loss": 0.2402, + "step": 337400 + }, + { + "epoch": 1.9, + "learning_rate": 3.103526197743638e-05, + "loss": 0.2379, + "step": 337500 + }, + { + "epoch": 1.9, + "learning_rate": 3.1029640747173924e-05, + "loss": 0.2404, + "step": 337600 + }, + { + "epoch": 1.9, + "learning_rate": 3.102401951691147e-05, + "loss": 0.2381, + "step": 337700 + }, + { + "epoch": 1.9, + "learning_rate": 3.101839828664902e-05, + "loss": 0.2373, + "step": 337800 + }, + { + "epoch": 1.9, + "learning_rate": 3.1012833268689184e-05, + "loss": 0.2412, + "step": 337900 + }, + { + "epoch": 1.9, + "learning_rate": 3.100721203842673e-05, + "loss": 0.2387, + "step": 338000 + }, + { + "epoch": 1.9, + "learning_rate": 3.1001590808164276e-05, + "loss": 0.2345, + "step": 338100 + }, + { + "epoch": 1.9, + "learning_rate": 3.099596957790182e-05, + "loss": 0.2372, + "step": 338200 + }, + { + "epoch": 1.9, + "learning_rate": 3.099034834763936e-05, + "loss": 0.2416, + "step": 338300 + }, + { + "epoch": 1.9, + "learning_rate": 3.098472711737691e-05, + "loss": 0.2343, + "step": 338400 + }, + { + "epoch": 1.9, + "learning_rate": 3.0979105887114455e-05, + "loss": 0.2375, + "step": 338500 + }, + { + "epoch": 1.9, + "learning_rate": 3.0973484656851994e-05, + "loss": 0.2392, + "step": 338600 + }, + { + "epoch": 1.9, + "learning_rate": 3.096786342658955e-05, + "loss": 0.2438, + "step": 338700 + }, + { + "epoch": 1.9, + "learning_rate": 3.0962242196327094e-05, + "loss": 0.245, + "step": 338800 + }, + { + "epoch": 1.91, + "learning_rate": 3.095662096606464e-05, + "loss": 0.2371, + "step": 338900 + }, + { + "epoch": 1.91, + "learning_rate": 3.095099973580218e-05, + "loss": 0.2441, + "step": 339000 + }, + { + "epoch": 1.91, + "learning_rate": 3.0945378505539726e-05, + "loss": 0.239, + "step": 339100 + }, + { + "epoch": 1.91, + "learning_rate": 3.093975727527727e-05, + "loss": 0.2433, + "step": 339200 + }, + { + "epoch": 1.91, + "learning_rate": 3.093413604501481e-05, + "loss": 0.2446, + "step": 339300 + }, + { + "epoch": 1.91, + "learning_rate": 3.092851481475236e-05, + "loss": 0.2373, + "step": 339400 + }, + { + "epoch": 1.91, + "learning_rate": 3.0922893584489904e-05, + "loss": 0.2321, + "step": 339500 + }, + { + "epoch": 1.91, + "learning_rate": 3.091727235422745e-05, + "loss": 0.234, + "step": 339600 + }, + { + "epoch": 1.91, + "learning_rate": 3.091165112396499e-05, + "loss": 0.2369, + "step": 339700 + }, + { + "epoch": 1.91, + "learning_rate": 3.0906029893702536e-05, + "loss": 0.238, + "step": 339800 + }, + { + "epoch": 1.91, + "learning_rate": 3.090040866344008e-05, + "loss": 0.2322, + "step": 339900 + }, + { + "epoch": 1.91, + "learning_rate": 3.089478743317762e-05, + "loss": 0.2351, + "step": 340000 + }, + { + "epoch": 1.91, + "learning_rate": 3.088916620291517e-05, + "loss": 0.2356, + "step": 340100 + }, + { + "epoch": 1.91, + "learning_rate": 3.088360118495534e-05, + "loss": 0.2408, + "step": 340200 + }, + { + "epoch": 1.91, + "learning_rate": 3.087797995469289e-05, + "loss": 0.2354, + "step": 340300 + }, + { + "epoch": 1.91, + "learning_rate": 3.087235872443043e-05, + "loss": 0.2369, + "step": 340400 + }, + { + "epoch": 1.91, + "learning_rate": 3.0866737494167975e-05, + "loss": 0.2328, + "step": 340500 + }, + { + "epoch": 1.91, + "learning_rate": 3.086111626390552e-05, + "loss": 0.229, + "step": 340600 + }, + { + "epoch": 1.92, + "learning_rate": 3.085549503364307e-05, + "loss": 0.2349, + "step": 340700 + }, + { + "epoch": 1.92, + "learning_rate": 3.0849873803380607e-05, + "loss": 0.2318, + "step": 340800 + }, + { + "epoch": 1.92, + "learning_rate": 3.084425257311815e-05, + "loss": 0.2417, + "step": 340900 + }, + { + "epoch": 1.92, + "learning_rate": 3.08386313428557e-05, + "loss": 0.2436, + "step": 341000 + }, + { + "epoch": 1.92, + "learning_rate": 3.083301011259324e-05, + "loss": 0.2423, + "step": 341100 + }, + { + "epoch": 1.92, + "learning_rate": 3.0827388882330785e-05, + "loss": 0.2379, + "step": 341200 + }, + { + "epoch": 1.92, + "learning_rate": 3.082176765206833e-05, + "loss": 0.236, + "step": 341300 + }, + { + "epoch": 1.92, + "learning_rate": 3.081614642180588e-05, + "loss": 0.2332, + "step": 341400 + }, + { + "epoch": 1.92, + "learning_rate": 3.0810525191543424e-05, + "loss": 0.2377, + "step": 341500 + }, + { + "epoch": 1.92, + "learning_rate": 3.080490396128097e-05, + "loss": 0.2409, + "step": 341600 + }, + { + "epoch": 1.92, + "learning_rate": 3.0799282731018516e-05, + "loss": 0.2491, + "step": 341700 + }, + { + "epoch": 1.92, + "learning_rate": 3.0793661500756056e-05, + "loss": 0.2367, + "step": 341800 + }, + { + "epoch": 1.92, + "learning_rate": 3.07880402704936e-05, + "loss": 0.2399, + "step": 341900 + }, + { + "epoch": 1.92, + "learning_rate": 3.078241904023115e-05, + "loss": 0.2336, + "step": 342000 + }, + { + "epoch": 1.92, + "learning_rate": 3.0776797809968695e-05, + "loss": 0.2356, + "step": 342100 + }, + { + "epoch": 1.92, + "learning_rate": 3.0771176579706234e-05, + "loss": 0.2417, + "step": 342200 + }, + { + "epoch": 1.92, + "learning_rate": 3.076555534944378e-05, + "loss": 0.2341, + "step": 342300 + }, + { + "epoch": 1.92, + "learning_rate": 3.075993411918133e-05, + "loss": 0.2436, + "step": 342400 + }, + { + "epoch": 1.93, + "learning_rate": 3.0754312888918866e-05, + "loss": 0.2366, + "step": 342500 + }, + { + "epoch": 1.93, + "learning_rate": 3.074869165865641e-05, + "loss": 0.2398, + "step": 342600 + }, + { + "epoch": 1.93, + "learning_rate": 3.074307042839396e-05, + "loss": 0.2338, + "step": 342700 + }, + { + "epoch": 1.93, + "learning_rate": 3.0737449198131505e-05, + "loss": 0.2365, + "step": 342800 + }, + { + "epoch": 1.93, + "learning_rate": 3.073182796786905e-05, + "loss": 0.2303, + "step": 342900 + }, + { + "epoch": 1.93, + "learning_rate": 3.07262067376066e-05, + "loss": 0.2297, + "step": 343000 + }, + { + "epoch": 1.93, + "learning_rate": 3.0720585507344144e-05, + "loss": 0.2402, + "step": 343100 + }, + { + "epoch": 1.93, + "learning_rate": 3.071496427708168e-05, + "loss": 0.2325, + "step": 343200 + }, + { + "epoch": 1.93, + "learning_rate": 3.070934304681923e-05, + "loss": 0.2353, + "step": 343300 + }, + { + "epoch": 1.93, + "learning_rate": 3.0703721816556776e-05, + "loss": 0.2298, + "step": 343400 + }, + { + "epoch": 1.93, + "learning_rate": 3.069810058629432e-05, + "loss": 0.2404, + "step": 343500 + }, + { + "epoch": 1.93, + "learning_rate": 3.069247935603186e-05, + "loss": 0.2355, + "step": 343600 + }, + { + "epoch": 1.93, + "learning_rate": 3.068685812576941e-05, + "loss": 0.2383, + "step": 343700 + }, + { + "epoch": 1.93, + "learning_rate": 3.0681236895506954e-05, + "loss": 0.2383, + "step": 343800 + }, + { + "epoch": 1.93, + "learning_rate": 3.0675615665244494e-05, + "loss": 0.2278, + "step": 343900 + }, + { + "epoch": 1.93, + "learning_rate": 3.066999443498204e-05, + "loss": 0.2367, + "step": 344000 + }, + { + "epoch": 1.93, + "learning_rate": 3.0664373204719586e-05, + "loss": 0.2358, + "step": 344100 + }, + { + "epoch": 1.93, + "learning_rate": 3.065875197445713e-05, + "loss": 0.2329, + "step": 344200 + }, + { + "epoch": 1.94, + "learning_rate": 3.065313074419467e-05, + "loss": 0.2384, + "step": 344300 + }, + { + "epoch": 1.94, + "learning_rate": 3.064750951393222e-05, + "loss": 0.2368, + "step": 344400 + }, + { + "epoch": 1.94, + "learning_rate": 3.0641888283669765e-05, + "loss": 0.2377, + "step": 344500 + }, + { + "epoch": 1.94, + "learning_rate": 3.063626705340731e-05, + "loss": 0.2347, + "step": 344600 + }, + { + "epoch": 1.94, + "learning_rate": 3.063064582314486e-05, + "loss": 0.2403, + "step": 344700 + }, + { + "epoch": 1.94, + "learning_rate": 3.0625024592882403e-05, + "loss": 0.2415, + "step": 344800 + }, + { + "epoch": 1.94, + "learning_rate": 3.061940336261995e-05, + "loss": 0.2379, + "step": 344900 + }, + { + "epoch": 1.94, + "learning_rate": 3.061378213235749e-05, + "loss": 0.2364, + "step": 345000 + }, + { + "epoch": 1.94, + "learning_rate": 3.0608160902095036e-05, + "loss": 0.2333, + "step": 345100 + }, + { + "epoch": 1.94, + "learning_rate": 3.060253967183258e-05, + "loss": 0.2294, + "step": 345200 + }, + { + "epoch": 1.94, + "learning_rate": 3.059691844157012e-05, + "loss": 0.2311, + "step": 345300 + }, + { + "epoch": 1.94, + "learning_rate": 3.059129721130767e-05, + "loss": 0.2312, + "step": 345400 + }, + { + "epoch": 1.94, + "learning_rate": 3.0585675981045214e-05, + "loss": 0.2378, + "step": 345500 + }, + { + "epoch": 1.94, + "learning_rate": 3.058005475078275e-05, + "loss": 0.2381, + "step": 345600 + }, + { + "epoch": 1.94, + "learning_rate": 3.05744335205203e-05, + "loss": 0.234, + "step": 345700 + }, + { + "epoch": 1.94, + "learning_rate": 3.0568812290257846e-05, + "loss": 0.2381, + "step": 345800 + }, + { + "epoch": 1.94, + "learning_rate": 3.056319105999539e-05, + "loss": 0.2375, + "step": 345900 + }, + { + "epoch": 1.94, + "learning_rate": 3.055756982973293e-05, + "loss": 0.2411, + "step": 346000 + }, + { + "epoch": 1.95, + "learning_rate": 3.055194859947048e-05, + "loss": 0.2344, + "step": 346100 + }, + { + "epoch": 1.95, + "learning_rate": 3.0546327369208024e-05, + "loss": 0.2342, + "step": 346200 + }, + { + "epoch": 1.95, + "learning_rate": 3.054070613894557e-05, + "loss": 0.2338, + "step": 346300 + }, + { + "epoch": 1.95, + "learning_rate": 3.053514112098574e-05, + "loss": 0.2369, + "step": 346400 + }, + { + "epoch": 1.95, + "learning_rate": 3.0529576103025906e-05, + "loss": 0.2417, + "step": 346500 + }, + { + "epoch": 1.95, + "learning_rate": 3.052395487276345e-05, + "loss": 0.2324, + "step": 346600 + }, + { + "epoch": 1.95, + "learning_rate": 3.0518333642501e-05, + "loss": 0.2361, + "step": 346700 + }, + { + "epoch": 1.95, + "learning_rate": 3.051271241223854e-05, + "loss": 0.2384, + "step": 346800 + }, + { + "epoch": 1.95, + "learning_rate": 3.0507091181976088e-05, + "loss": 0.2304, + "step": 346900 + }, + { + "epoch": 1.95, + "learning_rate": 3.0501469951713634e-05, + "loss": 0.2275, + "step": 347000 + }, + { + "epoch": 1.95, + "learning_rate": 3.0495848721451177e-05, + "loss": 0.2382, + "step": 347100 + }, + { + "epoch": 1.95, + "learning_rate": 3.0490227491188723e-05, + "loss": 0.2307, + "step": 347200 + }, + { + "epoch": 1.95, + "learning_rate": 3.048460626092627e-05, + "loss": 0.232, + "step": 347300 + }, + { + "epoch": 1.95, + "learning_rate": 3.0478985030663816e-05, + "loss": 0.2325, + "step": 347400 + }, + { + "epoch": 1.95, + "learning_rate": 3.0473363800401355e-05, + "loss": 0.2315, + "step": 347500 + }, + { + "epoch": 1.95, + "learning_rate": 3.046785499474415e-05, + "loss": 0.2325, + "step": 347600 + }, + { + "epoch": 1.95, + "learning_rate": 3.0462233764481697e-05, + "loss": 0.2354, + "step": 347700 + }, + { + "epoch": 1.96, + "learning_rate": 3.0456612534219243e-05, + "loss": 0.2372, + "step": 347800 + }, + { + "epoch": 1.96, + "learning_rate": 3.0450991303956783e-05, + "loss": 0.2395, + "step": 347900 + }, + { + "epoch": 1.96, + "learning_rate": 3.044537007369433e-05, + "loss": 0.2413, + "step": 348000 + }, + { + "epoch": 1.96, + "learning_rate": 3.0439748843431875e-05, + "loss": 0.2409, + "step": 348100 + }, + { + "epoch": 1.96, + "learning_rate": 3.043412761316942e-05, + "loss": 0.2408, + "step": 348200 + }, + { + "epoch": 1.96, + "learning_rate": 3.042850638290696e-05, + "loss": 0.2399, + "step": 348300 + }, + { + "epoch": 1.96, + "learning_rate": 3.0422885152644507e-05, + "loss": 0.2339, + "step": 348400 + }, + { + "epoch": 1.96, + "learning_rate": 3.0417263922382057e-05, + "loss": 0.2383, + "step": 348500 + }, + { + "epoch": 1.96, + "learning_rate": 3.0411642692119597e-05, + "loss": 0.2363, + "step": 348600 + }, + { + "epoch": 1.96, + "learning_rate": 3.0406021461857143e-05, + "loss": 0.2344, + "step": 348700 + }, + { + "epoch": 1.96, + "learning_rate": 3.040040023159469e-05, + "loss": 0.2374, + "step": 348800 + }, + { + "epoch": 1.96, + "learning_rate": 3.0394779001332235e-05, + "loss": 0.2339, + "step": 348900 + }, + { + "epoch": 1.96, + "learning_rate": 3.0389157771069775e-05, + "loss": 0.2383, + "step": 349000 + }, + { + "epoch": 1.96, + "learning_rate": 3.038353654080732e-05, + "loss": 0.2335, + "step": 349100 + }, + { + "epoch": 1.96, + "learning_rate": 3.0377915310544867e-05, + "loss": 0.2407, + "step": 349200 + }, + { + "epoch": 1.96, + "learning_rate": 3.037229408028241e-05, + "loss": 0.2391, + "step": 349300 + }, + { + "epoch": 1.96, + "learning_rate": 3.0366672850019957e-05, + "loss": 0.2392, + "step": 349400 + }, + { + "epoch": 1.96, + "learning_rate": 3.0361051619757503e-05, + "loss": 0.2327, + "step": 349500 + }, + { + "epoch": 1.97, + "learning_rate": 3.035543038949505e-05, + "loss": 0.2351, + "step": 349600 + }, + { + "epoch": 1.97, + "learning_rate": 3.034980915923259e-05, + "loss": 0.2346, + "step": 349700 + }, + { + "epoch": 1.97, + "learning_rate": 3.0344187928970135e-05, + "loss": 0.2326, + "step": 349800 + }, + { + "epoch": 1.97, + "learning_rate": 3.033856669870768e-05, + "loss": 0.2372, + "step": 349900 + }, + { + "epoch": 1.97, + "learning_rate": 3.0332945468445224e-05, + "loss": 0.2356, + "step": 350000 + }, + { + "epoch": 1.97, + "learning_rate": 3.032732423818277e-05, + "loss": 0.2371, + "step": 350100 + }, + { + "epoch": 1.97, + "learning_rate": 3.0321703007920317e-05, + "loss": 0.2324, + "step": 350200 + }, + { + "epoch": 1.97, + "learning_rate": 3.0316081777657863e-05, + "loss": 0.2349, + "step": 350300 + }, + { + "epoch": 1.97, + "learning_rate": 3.0310460547395402e-05, + "loss": 0.2348, + "step": 350400 + }, + { + "epoch": 1.97, + "learning_rate": 3.030483931713295e-05, + "loss": 0.238, + "step": 350500 + }, + { + "epoch": 1.97, + "learning_rate": 3.0299218086870495e-05, + "loss": 0.2378, + "step": 350600 + }, + { + "epoch": 1.97, + "learning_rate": 3.0293596856608038e-05, + "loss": 0.2329, + "step": 350700 + }, + { + "epoch": 1.97, + "learning_rate": 3.0287975626345584e-05, + "loss": 0.231, + "step": 350800 + }, + { + "epoch": 1.97, + "learning_rate": 3.028235439608313e-05, + "loss": 0.2358, + "step": 350900 + }, + { + "epoch": 1.97, + "learning_rate": 3.0276733165820677e-05, + "loss": 0.2369, + "step": 351000 + }, + { + "epoch": 1.97, + "learning_rate": 3.0271111935558216e-05, + "loss": 0.2351, + "step": 351100 + }, + { + "epoch": 1.97, + "learning_rate": 3.0265490705295763e-05, + "loss": 0.2346, + "step": 351200 + }, + { + "epoch": 1.97, + "learning_rate": 3.025986947503331e-05, + "loss": 0.2354, + "step": 351300 + }, + { + "epoch": 1.98, + "learning_rate": 3.025424824477085e-05, + "loss": 0.2331, + "step": 351400 + }, + { + "epoch": 1.98, + "learning_rate": 3.0248627014508395e-05, + "loss": 0.2269, + "step": 351500 + }, + { + "epoch": 1.98, + "learning_rate": 3.024300578424594e-05, + "loss": 0.2363, + "step": 351600 + }, + { + "epoch": 1.98, + "learning_rate": 3.0237384553983487e-05, + "loss": 0.2449, + "step": 351700 + }, + { + "epoch": 1.98, + "learning_rate": 3.023176332372103e-05, + "loss": 0.2368, + "step": 351800 + }, + { + "epoch": 1.98, + "learning_rate": 3.0226142093458576e-05, + "loss": 0.233, + "step": 351900 + }, + { + "epoch": 1.98, + "learning_rate": 3.0220520863196123e-05, + "loss": 0.2363, + "step": 352000 + }, + { + "epoch": 1.98, + "learning_rate": 3.0214899632933662e-05, + "loss": 0.2342, + "step": 352100 + }, + { + "epoch": 1.98, + "learning_rate": 3.020927840267121e-05, + "loss": 0.234, + "step": 352200 + }, + { + "epoch": 1.98, + "learning_rate": 3.0203657172408755e-05, + "loss": 0.2313, + "step": 352300 + }, + { + "epoch": 1.98, + "learning_rate": 3.01980359421463e-05, + "loss": 0.2315, + "step": 352400 + }, + { + "epoch": 1.98, + "learning_rate": 3.0192414711883844e-05, + "loss": 0.2363, + "step": 352500 + }, + { + "epoch": 1.98, + "learning_rate": 3.018679348162139e-05, + "loss": 0.2344, + "step": 352600 + }, + { + "epoch": 1.98, + "learning_rate": 3.0181172251358936e-05, + "loss": 0.2396, + "step": 352700 + }, + { + "epoch": 1.98, + "learning_rate": 3.0175551021096476e-05, + "loss": 0.2384, + "step": 352800 + }, + { + "epoch": 1.98, + "learning_rate": 3.0169929790834022e-05, + "loss": 0.2323, + "step": 352900 + }, + { + "epoch": 1.98, + "learning_rate": 3.016430856057157e-05, + "loss": 0.2308, + "step": 353000 + }, + { + "epoch": 1.98, + "learning_rate": 3.0158687330309115e-05, + "loss": 0.2364, + "step": 353100 + }, + { + "epoch": 1.99, + "learning_rate": 3.0153066100046658e-05, + "loss": 0.2373, + "step": 353200 + }, + { + "epoch": 1.99, + "learning_rate": 3.0147444869784204e-05, + "loss": 0.232, + "step": 353300 + }, + { + "epoch": 1.99, + "learning_rate": 3.014182363952175e-05, + "loss": 0.2353, + "step": 353400 + }, + { + "epoch": 1.99, + "learning_rate": 3.013620240925929e-05, + "loss": 0.238, + "step": 353500 + }, + { + "epoch": 1.99, + "learning_rate": 3.013063739129946e-05, + "loss": 0.2407, + "step": 353600 + }, + { + "epoch": 1.99, + "learning_rate": 3.0125016161037007e-05, + "loss": 0.235, + "step": 353700 + }, + { + "epoch": 1.99, + "learning_rate": 3.0119394930774553e-05, + "loss": 0.236, + "step": 353800 + }, + { + "epoch": 1.99, + "learning_rate": 3.0113773700512093e-05, + "loss": 0.2359, + "step": 353900 + }, + { + "epoch": 1.99, + "learning_rate": 3.010815247024964e-05, + "loss": 0.2353, + "step": 354000 + }, + { + "epoch": 1.99, + "learning_rate": 3.0102531239987185e-05, + "loss": 0.2367, + "step": 354100 + }, + { + "epoch": 1.99, + "learning_rate": 3.009691000972473e-05, + "loss": 0.2382, + "step": 354200 + }, + { + "epoch": 1.99, + "learning_rate": 3.0091288779462274e-05, + "loss": 0.2398, + "step": 354300 + }, + { + "epoch": 1.99, + "learning_rate": 3.008566754919982e-05, + "loss": 0.2357, + "step": 354400 + }, + { + "epoch": 1.99, + "learning_rate": 3.0080046318937367e-05, + "loss": 0.2413, + "step": 354500 + }, + { + "epoch": 1.99, + "learning_rate": 3.0074425088674906e-05, + "loss": 0.2326, + "step": 354600 + }, + { + "epoch": 1.99, + "learning_rate": 3.0068803858412453e-05, + "loss": 0.2369, + "step": 354700 + }, + { + "epoch": 1.99, + "learning_rate": 3.006318262815e-05, + "loss": 0.2367, + "step": 354800 + }, + { + "epoch": 1.99, + "learning_rate": 3.0057561397887545e-05, + "loss": 0.2359, + "step": 354900 + }, + { + "epoch": 2.0, + "learning_rate": 3.0051940167625088e-05, + "loss": 0.2348, + "step": 355000 + }, + { + "epoch": 2.0, + "learning_rate": 3.0046318937362634e-05, + "loss": 0.2296, + "step": 355100 + }, + { + "epoch": 2.0, + "learning_rate": 3.004069770710018e-05, + "loss": 0.2334, + "step": 355200 + }, + { + "epoch": 2.0, + "learning_rate": 3.003507647683772e-05, + "loss": 0.2412, + "step": 355300 + }, + { + "epoch": 2.0, + "learning_rate": 3.0029455246575267e-05, + "loss": 0.2393, + "step": 355400 + }, + { + "epoch": 2.0, + "learning_rate": 3.0023834016312813e-05, + "loss": 0.235, + "step": 355500 + }, + { + "epoch": 2.0, + "learning_rate": 3.0018212786050352e-05, + "loss": 0.2321, + "step": 355600 + }, + { + "epoch": 2.0, + "learning_rate": 3.00125915557879e-05, + "loss": 0.233, + "step": 355700 + }, + { + "epoch": 2.0, + "eval_bleu": 75.8819, + "eval_cer": 2.4613, + "eval_chrF": 95.00769603599446, + "eval_gen_len": 16.760384, + "eval_loss": 0.5156659483909607, + "eval_runtime": 7263.3248, + "eval_samples_per_second": 34.419, + "eval_steps_per_second": 0.538, + "eval_wer": 13.4362, + "step": 355794 + }, + { + "epoch": 2.0, + "learning_rate": 3.000702653782807e-05, + "loss": 0.2436, + "step": 355800 + }, + { + "epoch": 2.0, + "learning_rate": 3.0001405307565616e-05, + "loss": 0.2181, + "step": 355900 + }, + { + "epoch": 2.0, + "learning_rate": 2.999578407730316e-05, + "loss": 0.2178, + "step": 356000 + }, + { + "epoch": 2.0, + "learning_rate": 2.9990162847040705e-05, + "loss": 0.2193, + "step": 356100 + }, + { + "epoch": 2.0, + "learning_rate": 2.998454161677825e-05, + "loss": 0.2147, + "step": 356200 + }, + { + "epoch": 2.0, + "learning_rate": 2.9978920386515798e-05, + "loss": 0.2212, + "step": 356300 + }, + { + "epoch": 2.0, + "learning_rate": 2.9973299156253337e-05, + "loss": 0.2285, + "step": 356400 + }, + { + "epoch": 2.0, + "learning_rate": 2.9967677925990883e-05, + "loss": 0.2124, + "step": 356500 + }, + { + "epoch": 2.0, + "learning_rate": 2.996205669572843e-05, + "loss": 0.2141, + "step": 356600 + }, + { + "epoch": 2.01, + "learning_rate": 2.995643546546597e-05, + "loss": 0.2214, + "step": 356700 + }, + { + "epoch": 2.01, + "learning_rate": 2.9950814235203515e-05, + "loss": 0.2103, + "step": 356800 + }, + { + "epoch": 2.01, + "learning_rate": 2.9945193004941062e-05, + "loss": 0.2223, + "step": 356900 + }, + { + "epoch": 2.01, + "learning_rate": 2.9939627986981233e-05, + "loss": 0.2166, + "step": 357000 + }, + { + "epoch": 2.01, + "learning_rate": 2.9934006756718776e-05, + "loss": 0.2146, + "step": 357100 + }, + { + "epoch": 2.01, + "learning_rate": 2.9928385526456322e-05, + "loss": 0.2239, + "step": 357200 + }, + { + "epoch": 2.01, + "learning_rate": 2.9922764296193868e-05, + "loss": 0.2098, + "step": 357300 + }, + { + "epoch": 2.01, + "learning_rate": 2.9917143065931414e-05, + "loss": 0.2149, + "step": 357400 + }, + { + "epoch": 2.01, + "learning_rate": 2.991157804797158e-05, + "loss": 0.2201, + "step": 357500 + }, + { + "epoch": 2.01, + "learning_rate": 2.9905956817709125e-05, + "loss": 0.2205, + "step": 357600 + }, + { + "epoch": 2.01, + "learning_rate": 2.990033558744667e-05, + "loss": 0.2084, + "step": 357700 + }, + { + "epoch": 2.01, + "learning_rate": 2.9894714357184218e-05, + "loss": 0.2166, + "step": 357800 + }, + { + "epoch": 2.01, + "learning_rate": 2.9889093126921757e-05, + "loss": 0.2145, + "step": 357900 + }, + { + "epoch": 2.01, + "learning_rate": 2.9883471896659303e-05, + "loss": 0.2223, + "step": 358000 + }, + { + "epoch": 2.01, + "learning_rate": 2.987785066639685e-05, + "loss": 0.2151, + "step": 358100 + }, + { + "epoch": 2.01, + "learning_rate": 2.9872229436134392e-05, + "loss": 0.2164, + "step": 358200 + }, + { + "epoch": 2.01, + "learning_rate": 2.986660820587194e-05, + "loss": 0.2185, + "step": 358300 + }, + { + "epoch": 2.01, + "learning_rate": 2.9860986975609485e-05, + "loss": 0.2262, + "step": 358400 + }, + { + "epoch": 2.02, + "learning_rate": 2.985536574534703e-05, + "loss": 0.2157, + "step": 358500 + }, + { + "epoch": 2.02, + "learning_rate": 2.984974451508457e-05, + "loss": 0.2182, + "step": 358600 + }, + { + "epoch": 2.02, + "learning_rate": 2.9844123284822117e-05, + "loss": 0.2131, + "step": 358700 + }, + { + "epoch": 2.02, + "learning_rate": 2.9838502054559663e-05, + "loss": 0.2243, + "step": 358800 + }, + { + "epoch": 2.02, + "learning_rate": 2.9832880824297206e-05, + "loss": 0.2167, + "step": 358900 + }, + { + "epoch": 2.02, + "learning_rate": 2.9827259594034753e-05, + "loss": 0.2195, + "step": 359000 + }, + { + "epoch": 2.02, + "learning_rate": 2.98216383637723e-05, + "loss": 0.2196, + "step": 359100 + }, + { + "epoch": 2.02, + "learning_rate": 2.9816017133509845e-05, + "loss": 0.2209, + "step": 359200 + }, + { + "epoch": 2.02, + "learning_rate": 2.9810395903247385e-05, + "loss": 0.2255, + "step": 359300 + }, + { + "epoch": 2.02, + "learning_rate": 2.980477467298493e-05, + "loss": 0.2166, + "step": 359400 + }, + { + "epoch": 2.02, + "learning_rate": 2.9799153442722477e-05, + "loss": 0.215, + "step": 359500 + }, + { + "epoch": 2.02, + "learning_rate": 2.9793532212460017e-05, + "loss": 0.2123, + "step": 359600 + }, + { + "epoch": 2.02, + "learning_rate": 2.9787910982197563e-05, + "loss": 0.2188, + "step": 359700 + }, + { + "epoch": 2.02, + "learning_rate": 2.978228975193511e-05, + "loss": 0.2118, + "step": 359800 + }, + { + "epoch": 2.02, + "learning_rate": 2.9776668521672655e-05, + "loss": 0.2188, + "step": 359900 + }, + { + "epoch": 2.02, + "learning_rate": 2.97710472914102e-05, + "loss": 0.2173, + "step": 360000 + }, + { + "epoch": 2.02, + "learning_rate": 2.9765426061147745e-05, + "loss": 0.2238, + "step": 360100 + }, + { + "epoch": 2.02, + "learning_rate": 2.975980483088529e-05, + "loss": 0.2238, + "step": 360200 + }, + { + "epoch": 2.03, + "learning_rate": 2.975418360062283e-05, + "loss": 0.2116, + "step": 360300 + }, + { + "epoch": 2.03, + "learning_rate": 2.9748562370360377e-05, + "loss": 0.2181, + "step": 360400 + }, + { + "epoch": 2.03, + "learning_rate": 2.9742941140097923e-05, + "loss": 0.214, + "step": 360500 + }, + { + "epoch": 2.03, + "learning_rate": 2.973731990983547e-05, + "loss": 0.2229, + "step": 360600 + }, + { + "epoch": 2.03, + "learning_rate": 2.9731698679573012e-05, + "loss": 0.2103, + "step": 360700 + }, + { + "epoch": 2.03, + "learning_rate": 2.972607744931056e-05, + "loss": 0.2167, + "step": 360800 + }, + { + "epoch": 2.03, + "learning_rate": 2.9720456219048105e-05, + "loss": 0.2221, + "step": 360900 + }, + { + "epoch": 2.03, + "learning_rate": 2.9714834988785644e-05, + "loss": 0.2279, + "step": 361000 + }, + { + "epoch": 2.03, + "learning_rate": 2.970921375852319e-05, + "loss": 0.2226, + "step": 361100 + }, + { + "epoch": 2.03, + "learning_rate": 2.9703592528260737e-05, + "loss": 0.2198, + "step": 361200 + }, + { + "epoch": 2.03, + "learning_rate": 2.9697971297998283e-05, + "loss": 0.2139, + "step": 361300 + }, + { + "epoch": 2.03, + "learning_rate": 2.9692350067735826e-05, + "loss": 0.2148, + "step": 361400 + }, + { + "epoch": 2.03, + "learning_rate": 2.9686728837473372e-05, + "loss": 0.2192, + "step": 361500 + }, + { + "epoch": 2.03, + "learning_rate": 2.968110760721092e-05, + "loss": 0.2172, + "step": 361600 + }, + { + "epoch": 2.03, + "learning_rate": 2.9675486376948458e-05, + "loss": 0.2141, + "step": 361700 + }, + { + "epoch": 2.03, + "learning_rate": 2.9669865146686004e-05, + "loss": 0.2147, + "step": 361800 + }, + { + "epoch": 2.03, + "learning_rate": 2.966424391642355e-05, + "loss": 0.2136, + "step": 361900 + }, + { + "epoch": 2.03, + "learning_rate": 2.9658622686161097e-05, + "loss": 0.2186, + "step": 362000 + }, + { + "epoch": 2.04, + "learning_rate": 2.9653001455898636e-05, + "loss": 0.2196, + "step": 362100 + }, + { + "epoch": 2.04, + "learning_rate": 2.9647380225636186e-05, + "loss": 0.2231, + "step": 362200 + }, + { + "epoch": 2.04, + "learning_rate": 2.9641758995373732e-05, + "loss": 0.217, + "step": 362300 + }, + { + "epoch": 2.04, + "learning_rate": 2.9636137765111272e-05, + "loss": 0.2191, + "step": 362400 + }, + { + "epoch": 2.04, + "learning_rate": 2.9630516534848818e-05, + "loss": 0.2173, + "step": 362500 + }, + { + "epoch": 2.04, + "learning_rate": 2.9624895304586364e-05, + "loss": 0.221, + "step": 362600 + }, + { + "epoch": 2.04, + "learning_rate": 2.961927407432391e-05, + "loss": 0.2139, + "step": 362700 + }, + { + "epoch": 2.04, + "learning_rate": 2.961365284406145e-05, + "loss": 0.2209, + "step": 362800 + }, + { + "epoch": 2.04, + "learning_rate": 2.9608031613798996e-05, + "loss": 0.2124, + "step": 362900 + }, + { + "epoch": 2.04, + "learning_rate": 2.9602410383536543e-05, + "loss": 0.2184, + "step": 363000 + }, + { + "epoch": 2.04, + "learning_rate": 2.9596789153274086e-05, + "loss": 0.2162, + "step": 363100 + }, + { + "epoch": 2.04, + "learning_rate": 2.9591167923011632e-05, + "loss": 0.2219, + "step": 363200 + }, + { + "epoch": 2.04, + "learning_rate": 2.9585546692749178e-05, + "loss": 0.2223, + "step": 363300 + }, + { + "epoch": 2.04, + "learning_rate": 2.9579925462486724e-05, + "loss": 0.2161, + "step": 363400 + }, + { + "epoch": 2.04, + "learning_rate": 2.9574304232224264e-05, + "loss": 0.2126, + "step": 363500 + }, + { + "epoch": 2.04, + "learning_rate": 2.956868300196181e-05, + "loss": 0.2232, + "step": 363600 + }, + { + "epoch": 2.04, + "learning_rate": 2.9563061771699356e-05, + "loss": 0.2226, + "step": 363700 + }, + { + "epoch": 2.05, + "learning_rate": 2.95574405414369e-05, + "loss": 0.2184, + "step": 363800 + }, + { + "epoch": 2.05, + "learning_rate": 2.9551819311174446e-05, + "loss": 0.214, + "step": 363900 + }, + { + "epoch": 2.05, + "learning_rate": 2.9546198080911992e-05, + "loss": 0.2195, + "step": 364000 + }, + { + "epoch": 2.05, + "learning_rate": 2.9540576850649538e-05, + "loss": 0.2162, + "step": 364100 + }, + { + "epoch": 2.05, + "learning_rate": 2.9534955620387078e-05, + "loss": 0.2235, + "step": 364200 + }, + { + "epoch": 2.05, + "learning_rate": 2.9529334390124624e-05, + "loss": 0.2216, + "step": 364300 + }, + { + "epoch": 2.05, + "learning_rate": 2.952371315986217e-05, + "loss": 0.2198, + "step": 364400 + }, + { + "epoch": 2.05, + "learning_rate": 2.9518091929599713e-05, + "loss": 0.218, + "step": 364500 + }, + { + "epoch": 2.05, + "learning_rate": 2.951247069933726e-05, + "loss": 0.216, + "step": 364600 + }, + { + "epoch": 2.05, + "learning_rate": 2.9506849469074806e-05, + "loss": 0.2175, + "step": 364700 + }, + { + "epoch": 2.05, + "learning_rate": 2.9501228238812352e-05, + "loss": 0.2211, + "step": 364800 + }, + { + "epoch": 2.05, + "learning_rate": 2.949560700854989e-05, + "loss": 0.2117, + "step": 364900 + }, + { + "epoch": 2.05, + "learning_rate": 2.9489985778287438e-05, + "loss": 0.2167, + "step": 365000 + }, + { + "epoch": 2.05, + "learning_rate": 2.9484364548024984e-05, + "loss": 0.2224, + "step": 365100 + }, + { + "epoch": 2.05, + "learning_rate": 2.9478743317762523e-05, + "loss": 0.2141, + "step": 365200 + }, + { + "epoch": 2.05, + "learning_rate": 2.947312208750007e-05, + "loss": 0.2191, + "step": 365300 + }, + { + "epoch": 2.05, + "learning_rate": 2.9467500857237616e-05, + "loss": 0.2132, + "step": 365400 + }, + { + "epoch": 2.05, + "learning_rate": 2.946187962697516e-05, + "loss": 0.2224, + "step": 365500 + }, + { + "epoch": 2.06, + "learning_rate": 2.9456258396712705e-05, + "loss": 0.217, + "step": 365600 + }, + { + "epoch": 2.06, + "learning_rate": 2.945063716645025e-05, + "loss": 0.225, + "step": 365700 + }, + { + "epoch": 2.06, + "learning_rate": 2.9445015936187798e-05, + "loss": 0.2209, + "step": 365800 + }, + { + "epoch": 2.06, + "learning_rate": 2.9439394705925337e-05, + "loss": 0.214, + "step": 365900 + }, + { + "epoch": 2.06, + "learning_rate": 2.9433773475662884e-05, + "loss": 0.2169, + "step": 366000 + }, + { + "epoch": 2.06, + "learning_rate": 2.9428208457703055e-05, + "loss": 0.2187, + "step": 366100 + }, + { + "epoch": 2.06, + "learning_rate": 2.94225872274406e-05, + "loss": 0.218, + "step": 366200 + }, + { + "epoch": 2.06, + "learning_rate": 2.9417022209480765e-05, + "loss": 0.2112, + "step": 366300 + }, + { + "epoch": 2.06, + "learning_rate": 2.941140097921831e-05, + "loss": 0.2188, + "step": 366400 + }, + { + "epoch": 2.06, + "learning_rate": 2.9405779748955858e-05, + "loss": 0.222, + "step": 366500 + }, + { + "epoch": 2.06, + "learning_rate": 2.9400158518693404e-05, + "loss": 0.2174, + "step": 366600 + }, + { + "epoch": 2.06, + "learning_rate": 2.9394537288430947e-05, + "loss": 0.2153, + "step": 366700 + }, + { + "epoch": 2.06, + "learning_rate": 2.9388916058168493e-05, + "loss": 0.2215, + "step": 366800 + }, + { + "epoch": 2.06, + "learning_rate": 2.938329482790604e-05, + "loss": 0.2205, + "step": 366900 + }, + { + "epoch": 2.06, + "learning_rate": 2.937767359764358e-05, + "loss": 0.2224, + "step": 367000 + }, + { + "epoch": 2.06, + "learning_rate": 2.9372052367381125e-05, + "loss": 0.2211, + "step": 367100 + }, + { + "epoch": 2.06, + "learning_rate": 2.936643113711867e-05, + "loss": 0.2165, + "step": 367200 + }, + { + "epoch": 2.06, + "learning_rate": 2.9360809906856218e-05, + "loss": 0.217, + "step": 367300 + }, + { + "epoch": 2.07, + "learning_rate": 2.935518867659376e-05, + "loss": 0.2164, + "step": 367400 + }, + { + "epoch": 2.07, + "learning_rate": 2.9349567446331307e-05, + "loss": 0.2196, + "step": 367500 + }, + { + "epoch": 2.07, + "learning_rate": 2.9343946216068853e-05, + "loss": 0.2224, + "step": 367600 + }, + { + "epoch": 2.07, + "learning_rate": 2.9338324985806393e-05, + "loss": 0.2124, + "step": 367700 + }, + { + "epoch": 2.07, + "learning_rate": 2.933270375554394e-05, + "loss": 0.2222, + "step": 367800 + }, + { + "epoch": 2.07, + "learning_rate": 2.9327082525281485e-05, + "loss": 0.2176, + "step": 367900 + }, + { + "epoch": 2.07, + "learning_rate": 2.932146129501903e-05, + "loss": 0.2197, + "step": 368000 + }, + { + "epoch": 2.07, + "learning_rate": 2.931584006475657e-05, + "loss": 0.2268, + "step": 368100 + }, + { + "epoch": 2.07, + "learning_rate": 2.9310218834494117e-05, + "loss": 0.2092, + "step": 368200 + }, + { + "epoch": 2.07, + "learning_rate": 2.9304597604231663e-05, + "loss": 0.2198, + "step": 368300 + }, + { + "epoch": 2.07, + "learning_rate": 2.9298976373969206e-05, + "loss": 0.2228, + "step": 368400 + }, + { + "epoch": 2.07, + "learning_rate": 2.9293355143706753e-05, + "loss": 0.2116, + "step": 368500 + }, + { + "epoch": 2.07, + "learning_rate": 2.92877339134443e-05, + "loss": 0.216, + "step": 368600 + }, + { + "epoch": 2.07, + "learning_rate": 2.9282112683181845e-05, + "loss": 0.2183, + "step": 368700 + }, + { + "epoch": 2.07, + "learning_rate": 2.9276491452919385e-05, + "loss": 0.2208, + "step": 368800 + }, + { + "epoch": 2.07, + "learning_rate": 2.927087022265693e-05, + "loss": 0.2214, + "step": 368900 + }, + { + "epoch": 2.07, + "learning_rate": 2.9265248992394477e-05, + "loss": 0.2206, + "step": 369000 + }, + { + "epoch": 2.07, + "learning_rate": 2.925962776213202e-05, + "loss": 0.2173, + "step": 369100 + }, + { + "epoch": 2.08, + "learning_rate": 2.9254006531869566e-05, + "loss": 0.2214, + "step": 369200 + }, + { + "epoch": 2.08, + "learning_rate": 2.9248385301607113e-05, + "loss": 0.2198, + "step": 369300 + }, + { + "epoch": 2.08, + "learning_rate": 2.924276407134466e-05, + "loss": 0.2225, + "step": 369400 + }, + { + "epoch": 2.08, + "learning_rate": 2.92371428410822e-05, + "loss": 0.2149, + "step": 369500 + }, + { + "epoch": 2.08, + "learning_rate": 2.9231521610819745e-05, + "loss": 0.2143, + "step": 369600 + }, + { + "epoch": 2.08, + "learning_rate": 2.922590038055729e-05, + "loss": 0.2221, + "step": 369700 + }, + { + "epoch": 2.08, + "learning_rate": 2.9220279150294834e-05, + "loss": 0.2244, + "step": 369800 + }, + { + "epoch": 2.08, + "learning_rate": 2.921465792003238e-05, + "loss": 0.2182, + "step": 369900 + }, + { + "epoch": 2.08, + "learning_rate": 2.9209149114375173e-05, + "loss": 0.211, + "step": 370000 + }, + { + "epoch": 2.08, + "learning_rate": 2.920352788411272e-05, + "loss": 0.2209, + "step": 370100 + }, + { + "epoch": 2.08, + "learning_rate": 2.9197906653850265e-05, + "loss": 0.2224, + "step": 370200 + }, + { + "epoch": 2.08, + "learning_rate": 2.9192285423587805e-05, + "loss": 0.2151, + "step": 370300 + }, + { + "epoch": 2.08, + "learning_rate": 2.9186664193325354e-05, + "loss": 0.2151, + "step": 370400 + }, + { + "epoch": 2.08, + "learning_rate": 2.91810429630629e-05, + "loss": 0.2185, + "step": 370500 + }, + { + "epoch": 2.08, + "learning_rate": 2.917542173280044e-05, + "loss": 0.2233, + "step": 370600 + }, + { + "epoch": 2.08, + "learning_rate": 2.9169800502537986e-05, + "loss": 0.2206, + "step": 370700 + }, + { + "epoch": 2.08, + "learning_rate": 2.9164179272275533e-05, + "loss": 0.2181, + "step": 370800 + }, + { + "epoch": 2.08, + "learning_rate": 2.915855804201308e-05, + "loss": 0.2146, + "step": 370900 + }, + { + "epoch": 2.09, + "learning_rate": 2.915293681175062e-05, + "loss": 0.2212, + "step": 371000 + }, + { + "epoch": 2.09, + "learning_rate": 2.9147315581488165e-05, + "loss": 0.2166, + "step": 371100 + }, + { + "epoch": 2.09, + "learning_rate": 2.914169435122571e-05, + "loss": 0.2151, + "step": 371200 + }, + { + "epoch": 2.09, + "learning_rate": 2.9136073120963254e-05, + "loss": 0.2113, + "step": 371300 + }, + { + "epoch": 2.09, + "learning_rate": 2.91304518907008e-05, + "loss": 0.2162, + "step": 371400 + }, + { + "epoch": 2.09, + "learning_rate": 2.9124830660438346e-05, + "loss": 0.2225, + "step": 371500 + }, + { + "epoch": 2.09, + "learning_rate": 2.9119209430175893e-05, + "loss": 0.2138, + "step": 371600 + }, + { + "epoch": 2.09, + "learning_rate": 2.9113588199913432e-05, + "loss": 0.2229, + "step": 371700 + }, + { + "epoch": 2.09, + "learning_rate": 2.910796696965098e-05, + "loss": 0.2242, + "step": 371800 + }, + { + "epoch": 2.09, + "learning_rate": 2.9102345739388525e-05, + "loss": 0.2165, + "step": 371900 + }, + { + "epoch": 2.09, + "learning_rate": 2.9096724509126068e-05, + "loss": 0.2092, + "step": 372000 + }, + { + "epoch": 2.09, + "learning_rate": 2.9091103278863614e-05, + "loss": 0.2175, + "step": 372100 + }, + { + "epoch": 2.09, + "learning_rate": 2.908548204860116e-05, + "loss": 0.2235, + "step": 372200 + }, + { + "epoch": 2.09, + "learning_rate": 2.9079860818338706e-05, + "loss": 0.2122, + "step": 372300 + }, + { + "epoch": 2.09, + "learning_rate": 2.9074239588076246e-05, + "loss": 0.2229, + "step": 372400 + }, + { + "epoch": 2.09, + "learning_rate": 2.9068618357813792e-05, + "loss": 0.2175, + "step": 372500 + }, + { + "epoch": 2.09, + "learning_rate": 2.906299712755134e-05, + "loss": 0.2257, + "step": 372600 + }, + { + "epoch": 2.1, + "learning_rate": 2.905737589728888e-05, + "loss": 0.2153, + "step": 372700 + }, + { + "epoch": 2.1, + "learning_rate": 2.9051754667026428e-05, + "loss": 0.2219, + "step": 372800 + }, + { + "epoch": 2.1, + "learning_rate": 2.9046133436763974e-05, + "loss": 0.2168, + "step": 372900 + }, + { + "epoch": 2.1, + "learning_rate": 2.904051220650152e-05, + "loss": 0.2173, + "step": 373000 + }, + { + "epoch": 2.1, + "learning_rate": 2.903489097623906e-05, + "loss": 0.217, + "step": 373100 + }, + { + "epoch": 2.1, + "learning_rate": 2.9029269745976606e-05, + "loss": 0.221, + "step": 373200 + }, + { + "epoch": 2.1, + "learning_rate": 2.9023648515714152e-05, + "loss": 0.2098, + "step": 373300 + }, + { + "epoch": 2.1, + "learning_rate": 2.9018027285451692e-05, + "loss": 0.2169, + "step": 373400 + }, + { + "epoch": 2.1, + "learning_rate": 2.9012406055189238e-05, + "loss": 0.2177, + "step": 373500 + }, + { + "epoch": 2.1, + "learning_rate": 2.9006784824926784e-05, + "loss": 0.2218, + "step": 373600 + }, + { + "epoch": 2.1, + "learning_rate": 2.9001163594664334e-05, + "loss": 0.2132, + "step": 373700 + }, + { + "epoch": 2.1, + "learning_rate": 2.8995542364401873e-05, + "loss": 0.2187, + "step": 373800 + }, + { + "epoch": 2.1, + "learning_rate": 2.898992113413942e-05, + "loss": 0.219, + "step": 373900 + }, + { + "epoch": 2.1, + "learning_rate": 2.8984299903876966e-05, + "loss": 0.215, + "step": 374000 + }, + { + "epoch": 2.1, + "learning_rate": 2.8978678673614506e-05, + "loss": 0.2203, + "step": 374100 + }, + { + "epoch": 2.1, + "learning_rate": 2.8973057443352052e-05, + "loss": 0.2138, + "step": 374200 + }, + { + "epoch": 2.1, + "learning_rate": 2.8967436213089598e-05, + "loss": 0.2136, + "step": 374300 + }, + { + "epoch": 2.1, + "learning_rate": 2.8961814982827144e-05, + "loss": 0.2228, + "step": 374400 + }, + { + "epoch": 2.11, + "learning_rate": 2.8956193752564687e-05, + "loss": 0.2182, + "step": 374500 + }, + { + "epoch": 2.11, + "learning_rate": 2.8950572522302234e-05, + "loss": 0.2223, + "step": 374600 + }, + { + "epoch": 2.11, + "learning_rate": 2.894495129203978e-05, + "loss": 0.2196, + "step": 374700 + }, + { + "epoch": 2.11, + "learning_rate": 2.893933006177732e-05, + "loss": 0.2279, + "step": 374800 + }, + { + "epoch": 2.11, + "learning_rate": 2.8933708831514866e-05, + "loss": 0.2204, + "step": 374900 + }, + { + "epoch": 2.11, + "learning_rate": 2.8928087601252412e-05, + "loss": 0.2177, + "step": 375000 + }, + { + "epoch": 2.11, + "learning_rate": 2.8922466370989955e-05, + "loss": 0.2188, + "step": 375100 + }, + { + "epoch": 2.11, + "learning_rate": 2.89168451407275e-05, + "loss": 0.2192, + "step": 375200 + }, + { + "epoch": 2.11, + "learning_rate": 2.8911223910465047e-05, + "loss": 0.2201, + "step": 375300 + }, + { + "epoch": 2.11, + "learning_rate": 2.8905602680202594e-05, + "loss": 0.212, + "step": 375400 + }, + { + "epoch": 2.11, + "learning_rate": 2.8899981449940133e-05, + "loss": 0.2185, + "step": 375500 + }, + { + "epoch": 2.11, + "learning_rate": 2.889436021967768e-05, + "loss": 0.2228, + "step": 375600 + }, + { + "epoch": 2.11, + "learning_rate": 2.8888738989415226e-05, + "loss": 0.2205, + "step": 375700 + }, + { + "epoch": 2.11, + "learning_rate": 2.888311775915277e-05, + "loss": 0.2168, + "step": 375800 + }, + { + "epoch": 2.11, + "learning_rate": 2.8877496528890315e-05, + "loss": 0.2162, + "step": 375900 + }, + { + "epoch": 2.11, + "learning_rate": 2.887187529862786e-05, + "loss": 0.2161, + "step": 376000 + }, + { + "epoch": 2.11, + "learning_rate": 2.8866254068365407e-05, + "loss": 0.2165, + "step": 376100 + }, + { + "epoch": 2.11, + "learning_rate": 2.8860632838102947e-05, + "loss": 0.2254, + "step": 376200 + }, + { + "epoch": 2.12, + "learning_rate": 2.8855011607840493e-05, + "loss": 0.2199, + "step": 376300 + }, + { + "epoch": 2.12, + "learning_rate": 2.884939037757804e-05, + "loss": 0.2211, + "step": 376400 + }, + { + "epoch": 2.12, + "learning_rate": 2.884382535961821e-05, + "loss": 0.2202, + "step": 376500 + }, + { + "epoch": 2.12, + "learning_rate": 2.883820412935575e-05, + "loss": 0.219, + "step": 376600 + }, + { + "epoch": 2.12, + "learning_rate": 2.8832582899093296e-05, + "loss": 0.2123, + "step": 376700 + }, + { + "epoch": 2.12, + "learning_rate": 2.8826961668830843e-05, + "loss": 0.2198, + "step": 376800 + }, + { + "epoch": 2.12, + "learning_rate": 2.8821340438568385e-05, + "loss": 0.22, + "step": 376900 + }, + { + "epoch": 2.12, + "learning_rate": 2.881571920830593e-05, + "loss": 0.2171, + "step": 377000 + }, + { + "epoch": 2.12, + "learning_rate": 2.88101541903461e-05, + "loss": 0.2236, + "step": 377100 + }, + { + "epoch": 2.12, + "learning_rate": 2.8804532960083646e-05, + "loss": 0.2204, + "step": 377200 + }, + { + "epoch": 2.12, + "learning_rate": 2.879891172982119e-05, + "loss": 0.2155, + "step": 377300 + }, + { + "epoch": 2.12, + "learning_rate": 2.8793290499558735e-05, + "loss": 0.2219, + "step": 377400 + }, + { + "epoch": 2.12, + "learning_rate": 2.878766926929628e-05, + "loss": 0.211, + "step": 377500 + }, + { + "epoch": 2.12, + "learning_rate": 2.8782048039033827e-05, + "loss": 0.2136, + "step": 377600 + }, + { + "epoch": 2.12, + "learning_rate": 2.8776426808771367e-05, + "loss": 0.2169, + "step": 377700 + }, + { + "epoch": 2.12, + "learning_rate": 2.8770805578508913e-05, + "loss": 0.2116, + "step": 377800 + }, + { + "epoch": 2.12, + "learning_rate": 2.876518434824646e-05, + "loss": 0.22, + "step": 377900 + }, + { + "epoch": 2.12, + "learning_rate": 2.8759563117984002e-05, + "loss": 0.218, + "step": 378000 + }, + { + "epoch": 2.13, + "learning_rate": 2.875394188772155e-05, + "loss": 0.2139, + "step": 378100 + }, + { + "epoch": 2.13, + "learning_rate": 2.8748320657459095e-05, + "loss": 0.2088, + "step": 378200 + }, + { + "epoch": 2.13, + "learning_rate": 2.874269942719664e-05, + "loss": 0.2196, + "step": 378300 + }, + { + "epoch": 2.13, + "learning_rate": 2.873707819693418e-05, + "loss": 0.2175, + "step": 378400 + }, + { + "epoch": 2.13, + "learning_rate": 2.8731456966671727e-05, + "loss": 0.2214, + "step": 378500 + }, + { + "epoch": 2.13, + "learning_rate": 2.8725835736409273e-05, + "loss": 0.2189, + "step": 378600 + }, + { + "epoch": 2.13, + "learning_rate": 2.8720214506146813e-05, + "loss": 0.2203, + "step": 378700 + }, + { + "epoch": 2.13, + "learning_rate": 2.871459327588436e-05, + "loss": 0.2195, + "step": 378800 + }, + { + "epoch": 2.13, + "learning_rate": 2.870897204562191e-05, + "loss": 0.2184, + "step": 378900 + }, + { + "epoch": 2.13, + "learning_rate": 2.8703350815359455e-05, + "loss": 0.2179, + "step": 379000 + }, + { + "epoch": 2.13, + "learning_rate": 2.8697729585096994e-05, + "loss": 0.2144, + "step": 379100 + }, + { + "epoch": 2.13, + "learning_rate": 2.869210835483454e-05, + "loss": 0.2096, + "step": 379200 + }, + { + "epoch": 2.13, + "learning_rate": 2.8686487124572087e-05, + "loss": 0.2212, + "step": 379300 + }, + { + "epoch": 2.13, + "learning_rate": 2.8680865894309626e-05, + "loss": 0.2197, + "step": 379400 + }, + { + "epoch": 2.13, + "learning_rate": 2.8675244664047173e-05, + "loss": 0.2081, + "step": 379500 + }, + { + "epoch": 2.13, + "learning_rate": 2.866962343378472e-05, + "loss": 0.2174, + "step": 379600 + }, + { + "epoch": 2.13, + "learning_rate": 2.8664002203522265e-05, + "loss": 0.2256, + "step": 379700 + }, + { + "epoch": 2.13, + "learning_rate": 2.8658380973259808e-05, + "loss": 0.2201, + "step": 379800 + }, + { + "epoch": 2.14, + "learning_rate": 2.8652759742997354e-05, + "loss": 0.2256, + "step": 379900 + }, + { + "epoch": 2.14, + "learning_rate": 2.86471385127349e-05, + "loss": 0.2161, + "step": 380000 + }, + { + "epoch": 2.14, + "learning_rate": 2.864151728247244e-05, + "loss": 0.2228, + "step": 380100 + }, + { + "epoch": 2.14, + "learning_rate": 2.863595226451261e-05, + "loss": 0.2249, + "step": 380200 + }, + { + "epoch": 2.14, + "learning_rate": 2.8630331034250157e-05, + "loss": 0.2208, + "step": 380300 + }, + { + "epoch": 2.14, + "learning_rate": 2.8624709803987704e-05, + "loss": 0.2148, + "step": 380400 + }, + { + "epoch": 2.14, + "learning_rate": 2.8619088573725243e-05, + "loss": 0.2201, + "step": 380500 + }, + { + "epoch": 2.14, + "learning_rate": 2.861346734346279e-05, + "loss": 0.2172, + "step": 380600 + }, + { + "epoch": 2.14, + "learning_rate": 2.8607846113200336e-05, + "loss": 0.2181, + "step": 380700 + }, + { + "epoch": 2.14, + "learning_rate": 2.8602224882937882e-05, + "loss": 0.2186, + "step": 380800 + }, + { + "epoch": 2.14, + "learning_rate": 2.8596603652675425e-05, + "loss": 0.2207, + "step": 380900 + }, + { + "epoch": 2.14, + "learning_rate": 2.859098242241297e-05, + "loss": 0.2147, + "step": 381000 + }, + { + "epoch": 2.14, + "learning_rate": 2.8585361192150518e-05, + "loss": 0.2221, + "step": 381100 + }, + { + "epoch": 2.14, + "learning_rate": 2.8579739961888057e-05, + "loss": 0.2219, + "step": 381200 + }, + { + "epoch": 2.14, + "learning_rate": 2.8574118731625603e-05, + "loss": 0.2177, + "step": 381300 + }, + { + "epoch": 2.14, + "learning_rate": 2.856849750136315e-05, + "loss": 0.2163, + "step": 381400 + }, + { + "epoch": 2.14, + "learning_rate": 2.8562876271100696e-05, + "loss": 0.2186, + "step": 381500 + }, + { + "epoch": 2.15, + "learning_rate": 2.855725504083824e-05, + "loss": 0.2145, + "step": 381600 + }, + { + "epoch": 2.15, + "learning_rate": 2.8551633810575785e-05, + "loss": 0.2175, + "step": 381700 + }, + { + "epoch": 2.15, + "learning_rate": 2.854601258031333e-05, + "loss": 0.2185, + "step": 381800 + }, + { + "epoch": 2.15, + "learning_rate": 2.854039135005087e-05, + "loss": 0.2206, + "step": 381900 + }, + { + "epoch": 2.15, + "learning_rate": 2.8534770119788417e-05, + "loss": 0.2137, + "step": 382000 + }, + { + "epoch": 2.15, + "learning_rate": 2.8529148889525963e-05, + "loss": 0.2112, + "step": 382100 + }, + { + "epoch": 2.15, + "learning_rate": 2.852352765926351e-05, + "loss": 0.2193, + "step": 382200 + }, + { + "epoch": 2.15, + "learning_rate": 2.8517906429001053e-05, + "loss": 0.2193, + "step": 382300 + }, + { + "epoch": 2.15, + "learning_rate": 2.85122851987386e-05, + "loss": 0.2103, + "step": 382400 + }, + { + "epoch": 2.15, + "learning_rate": 2.8506663968476145e-05, + "loss": 0.2256, + "step": 382500 + }, + { + "epoch": 2.15, + "learning_rate": 2.8501042738213685e-05, + "loss": 0.2183, + "step": 382600 + }, + { + "epoch": 2.15, + "learning_rate": 2.849542150795123e-05, + "loss": 0.2167, + "step": 382700 + }, + { + "epoch": 2.15, + "learning_rate": 2.8489800277688777e-05, + "loss": 0.2156, + "step": 382800 + }, + { + "epoch": 2.15, + "learning_rate": 2.8484179047426323e-05, + "loss": 0.2186, + "step": 382900 + }, + { + "epoch": 2.15, + "learning_rate": 2.8478557817163863e-05, + "loss": 0.2131, + "step": 383000 + }, + { + "epoch": 2.15, + "learning_rate": 2.847293658690141e-05, + "loss": 0.2155, + "step": 383100 + }, + { + "epoch": 2.15, + "learning_rate": 2.846731535663896e-05, + "loss": 0.2139, + "step": 383200 + }, + { + "epoch": 2.15, + "learning_rate": 2.84616941263765e-05, + "loss": 0.2234, + "step": 383300 + }, + { + "epoch": 2.16, + "learning_rate": 2.8456072896114045e-05, + "loss": 0.2118, + "step": 383400 + }, + { + "epoch": 2.16, + "learning_rate": 2.845045166585159e-05, + "loss": 0.2177, + "step": 383500 + }, + { + "epoch": 2.16, + "learning_rate": 2.8444830435589137e-05, + "loss": 0.2167, + "step": 383600 + }, + { + "epoch": 2.16, + "learning_rate": 2.8439209205326677e-05, + "loss": 0.2144, + "step": 383700 + }, + { + "epoch": 2.16, + "learning_rate": 2.8433587975064223e-05, + "loss": 0.2172, + "step": 383800 + }, + { + "epoch": 2.16, + "learning_rate": 2.842796674480177e-05, + "loss": 0.2112, + "step": 383900 + }, + { + "epoch": 2.16, + "learning_rate": 2.8422345514539312e-05, + "loss": 0.2157, + "step": 384000 + }, + { + "epoch": 2.16, + "learning_rate": 2.841672428427686e-05, + "loss": 0.2182, + "step": 384100 + }, + { + "epoch": 2.16, + "learning_rate": 2.8411103054014405e-05, + "loss": 0.2192, + "step": 384200 + }, + { + "epoch": 2.16, + "learning_rate": 2.840548182375195e-05, + "loss": 0.2143, + "step": 384300 + }, + { + "epoch": 2.16, + "learning_rate": 2.839986059348949e-05, + "loss": 0.2133, + "step": 384400 + }, + { + "epoch": 2.16, + "learning_rate": 2.8394239363227037e-05, + "loss": 0.2184, + "step": 384500 + }, + { + "epoch": 2.16, + "learning_rate": 2.8388618132964583e-05, + "loss": 0.2165, + "step": 384600 + }, + { + "epoch": 2.16, + "learning_rate": 2.8382996902702126e-05, + "loss": 0.2187, + "step": 384700 + }, + { + "epoch": 2.16, + "learning_rate": 2.8377431884742294e-05, + "loss": 0.2194, + "step": 384800 + }, + { + "epoch": 2.16, + "learning_rate": 2.837181065447984e-05, + "loss": 0.2212, + "step": 384900 + }, + { + "epoch": 2.16, + "learning_rate": 2.8366189424217386e-05, + "loss": 0.2145, + "step": 385000 + }, + { + "epoch": 2.16, + "learning_rate": 2.836056819395493e-05, + "loss": 0.2153, + "step": 385100 + }, + { + "epoch": 2.17, + "learning_rate": 2.8354946963692475e-05, + "loss": 0.2162, + "step": 385200 + }, + { + "epoch": 2.17, + "learning_rate": 2.834932573343002e-05, + "loss": 0.2179, + "step": 385300 + }, + { + "epoch": 2.17, + "learning_rate": 2.834370450316756e-05, + "loss": 0.2192, + "step": 385400 + }, + { + "epoch": 2.17, + "learning_rate": 2.8338083272905107e-05, + "loss": 0.222, + "step": 385500 + }, + { + "epoch": 2.17, + "learning_rate": 2.8332462042642654e-05, + "loss": 0.2168, + "step": 385600 + }, + { + "epoch": 2.17, + "learning_rate": 2.83268408123802e-05, + "loss": 0.2214, + "step": 385700 + }, + { + "epoch": 2.17, + "learning_rate": 2.8321219582117743e-05, + "loss": 0.2191, + "step": 385800 + }, + { + "epoch": 2.17, + "learning_rate": 2.831559835185529e-05, + "loss": 0.2143, + "step": 385900 + }, + { + "epoch": 2.17, + "learning_rate": 2.8309977121592835e-05, + "loss": 0.2205, + "step": 386000 + }, + { + "epoch": 2.17, + "learning_rate": 2.8304355891330375e-05, + "loss": 0.2195, + "step": 386100 + }, + { + "epoch": 2.17, + "learning_rate": 2.829873466106792e-05, + "loss": 0.2209, + "step": 386200 + }, + { + "epoch": 2.17, + "learning_rate": 2.8293113430805467e-05, + "loss": 0.2169, + "step": 386300 + }, + { + "epoch": 2.17, + "learning_rate": 2.8287492200543014e-05, + "loss": 0.2172, + "step": 386400 + }, + { + "epoch": 2.17, + "learning_rate": 2.8281870970280557e-05, + "loss": 0.2171, + "step": 386500 + }, + { + "epoch": 2.17, + "learning_rate": 2.8276249740018103e-05, + "loss": 0.2154, + "step": 386600 + }, + { + "epoch": 2.17, + "learning_rate": 2.827062850975565e-05, + "loss": 0.2238, + "step": 386700 + }, + { + "epoch": 2.17, + "learning_rate": 2.826500727949319e-05, + "loss": 0.2199, + "step": 386800 + }, + { + "epoch": 2.17, + "learning_rate": 2.8259386049230735e-05, + "loss": 0.218, + "step": 386900 + }, + { + "epoch": 2.18, + "learning_rate": 2.825376481896828e-05, + "loss": 0.2174, + "step": 387000 + }, + { + "epoch": 2.18, + "learning_rate": 2.8248143588705827e-05, + "loss": 0.2195, + "step": 387100 + }, + { + "epoch": 2.18, + "learning_rate": 2.8242522358443367e-05, + "loss": 0.2109, + "step": 387200 + }, + { + "epoch": 2.18, + "learning_rate": 2.8236901128180913e-05, + "loss": 0.2203, + "step": 387300 + }, + { + "epoch": 2.18, + "learning_rate": 2.8231336110221084e-05, + "loss": 0.2192, + "step": 387400 + }, + { + "epoch": 2.18, + "learning_rate": 2.822571487995863e-05, + "loss": 0.2169, + "step": 387500 + }, + { + "epoch": 2.18, + "learning_rate": 2.8220149861998795e-05, + "loss": 0.2192, + "step": 387600 + }, + { + "epoch": 2.18, + "learning_rate": 2.821452863173634e-05, + "loss": 0.216, + "step": 387700 + }, + { + "epoch": 2.18, + "learning_rate": 2.8208907401473887e-05, + "loss": 0.2163, + "step": 387800 + }, + { + "epoch": 2.18, + "learning_rate": 2.8203286171211434e-05, + "loss": 0.2182, + "step": 387900 + }, + { + "epoch": 2.18, + "learning_rate": 2.8197664940948976e-05, + "loss": 0.2223, + "step": 388000 + }, + { + "epoch": 2.18, + "learning_rate": 2.8192043710686523e-05, + "loss": 0.2195, + "step": 388100 + }, + { + "epoch": 2.18, + "learning_rate": 2.818642248042407e-05, + "loss": 0.2179, + "step": 388200 + }, + { + "epoch": 2.18, + "learning_rate": 2.818080125016161e-05, + "loss": 0.2158, + "step": 388300 + }, + { + "epoch": 2.18, + "learning_rate": 2.8175180019899155e-05, + "loss": 0.2166, + "step": 388400 + }, + { + "epoch": 2.18, + "learning_rate": 2.81695587896367e-05, + "loss": 0.2175, + "step": 388500 + }, + { + "epoch": 2.18, + "learning_rate": 2.8163937559374247e-05, + "loss": 0.2142, + "step": 388600 + }, + { + "epoch": 2.18, + "learning_rate": 2.815831632911179e-05, + "loss": 0.2194, + "step": 388700 + }, + { + "epoch": 2.19, + "learning_rate": 2.8152695098849336e-05, + "loss": 0.2192, + "step": 388800 + }, + { + "epoch": 2.19, + "learning_rate": 2.8147073868586883e-05, + "loss": 0.2182, + "step": 388900 + }, + { + "epoch": 2.19, + "learning_rate": 2.814150885062705e-05, + "loss": 0.223, + "step": 389000 + }, + { + "epoch": 2.19, + "learning_rate": 2.8135887620364593e-05, + "loss": 0.221, + "step": 389100 + }, + { + "epoch": 2.19, + "learning_rate": 2.813026639010214e-05, + "loss": 0.2131, + "step": 389200 + }, + { + "epoch": 2.19, + "learning_rate": 2.8124645159839686e-05, + "loss": 0.2137, + "step": 389300 + }, + { + "epoch": 2.19, + "learning_rate": 2.8119023929577225e-05, + "loss": 0.2136, + "step": 389400 + }, + { + "epoch": 2.19, + "learning_rate": 2.811340269931477e-05, + "loss": 0.218, + "step": 389500 + }, + { + "epoch": 2.19, + "learning_rate": 2.8107781469052318e-05, + "loss": 0.2171, + "step": 389600 + }, + { + "epoch": 2.19, + "learning_rate": 2.8102160238789864e-05, + "loss": 0.2241, + "step": 389700 + }, + { + "epoch": 2.19, + "learning_rate": 2.8096539008527407e-05, + "loss": 0.2198, + "step": 389800 + }, + { + "epoch": 2.19, + "learning_rate": 2.8090917778264953e-05, + "loss": 0.2219, + "step": 389900 + }, + { + "epoch": 2.19, + "learning_rate": 2.80852965480025e-05, + "loss": 0.2183, + "step": 390000 + }, + { + "epoch": 2.19, + "learning_rate": 2.807967531774004e-05, + "loss": 0.2156, + "step": 390100 + }, + { + "epoch": 2.19, + "learning_rate": 2.8074054087477585e-05, + "loss": 0.22, + "step": 390200 + }, + { + "epoch": 2.19, + "learning_rate": 2.806843285721513e-05, + "loss": 0.2163, + "step": 390300 + }, + { + "epoch": 2.19, + "learning_rate": 2.8062811626952678e-05, + "loss": 0.2207, + "step": 390400 + }, + { + "epoch": 2.2, + "learning_rate": 2.805719039669022e-05, + "loss": 0.2159, + "step": 390500 + }, + { + "epoch": 2.2, + "learning_rate": 2.8051569166427767e-05, + "loss": 0.2176, + "step": 390600 + }, + { + "epoch": 2.2, + "learning_rate": 2.8045947936165313e-05, + "loss": 0.2162, + "step": 390700 + }, + { + "epoch": 2.2, + "learning_rate": 2.8040326705902853e-05, + "loss": 0.2229, + "step": 390800 + }, + { + "epoch": 2.2, + "learning_rate": 2.80347054756404e-05, + "loss": 0.2145, + "step": 390900 + }, + { + "epoch": 2.2, + "learning_rate": 2.8029084245377945e-05, + "loss": 0.2276, + "step": 391000 + }, + { + "epoch": 2.2, + "learning_rate": 2.8023463015115492e-05, + "loss": 0.2143, + "step": 391100 + }, + { + "epoch": 2.2, + "learning_rate": 2.801784178485303e-05, + "loss": 0.2143, + "step": 391200 + }, + { + "epoch": 2.2, + "learning_rate": 2.8012220554590578e-05, + "loss": 0.2184, + "step": 391300 + }, + { + "epoch": 2.2, + "learning_rate": 2.8006599324328127e-05, + "loss": 0.2198, + "step": 391400 + }, + { + "epoch": 2.2, + "learning_rate": 2.8000978094065667e-05, + "loss": 0.2182, + "step": 391500 + }, + { + "epoch": 2.2, + "learning_rate": 2.7995356863803213e-05, + "loss": 0.2173, + "step": 391600 + }, + { + "epoch": 2.2, + "learning_rate": 2.798973563354076e-05, + "loss": 0.217, + "step": 391700 + }, + { + "epoch": 2.2, + "learning_rate": 2.7984114403278306e-05, + "loss": 0.2164, + "step": 391800 + }, + { + "epoch": 2.2, + "learning_rate": 2.7978493173015845e-05, + "loss": 0.2139, + "step": 391900 + }, + { + "epoch": 2.2, + "learning_rate": 2.797287194275339e-05, + "loss": 0.2095, + "step": 392000 + }, + { + "epoch": 2.2, + "learning_rate": 2.7967250712490938e-05, + "loss": 0.2134, + "step": 392100 + }, + { + "epoch": 2.2, + "learning_rate": 2.796162948222848e-05, + "loss": 0.2177, + "step": 392200 + }, + { + "epoch": 2.21, + "learning_rate": 2.7956008251966027e-05, + "loss": 0.2169, + "step": 392300 + }, + { + "epoch": 2.21, + "learning_rate": 2.7950387021703573e-05, + "loss": 0.2189, + "step": 392400 + }, + { + "epoch": 2.21, + "learning_rate": 2.794476579144112e-05, + "loss": 0.2164, + "step": 392500 + }, + { + "epoch": 2.21, + "learning_rate": 2.793914456117866e-05, + "loss": 0.2221, + "step": 392600 + }, + { + "epoch": 2.21, + "learning_rate": 2.7933523330916205e-05, + "loss": 0.2136, + "step": 392700 + }, + { + "epoch": 2.21, + "learning_rate": 2.792790210065375e-05, + "loss": 0.2231, + "step": 392800 + }, + { + "epoch": 2.21, + "learning_rate": 2.7922280870391294e-05, + "loss": 0.2211, + "step": 392900 + }, + { + "epoch": 2.21, + "learning_rate": 2.791665964012884e-05, + "loss": 0.2161, + "step": 393000 + }, + { + "epoch": 2.21, + "learning_rate": 2.7911038409866387e-05, + "loss": 0.2143, + "step": 393100 + }, + { + "epoch": 2.21, + "learning_rate": 2.7905417179603933e-05, + "loss": 0.2206, + "step": 393200 + }, + { + "epoch": 2.21, + "learning_rate": 2.7899795949341473e-05, + "loss": 0.2148, + "step": 393300 + }, + { + "epoch": 2.21, + "learning_rate": 2.789417471907902e-05, + "loss": 0.2205, + "step": 393400 + }, + { + "epoch": 2.21, + "learning_rate": 2.7888553488816565e-05, + "loss": 0.2116, + "step": 393500 + }, + { + "epoch": 2.21, + "learning_rate": 2.7882932258554108e-05, + "loss": 0.2164, + "step": 393600 + }, + { + "epoch": 2.21, + "learning_rate": 2.7877311028291654e-05, + "loss": 0.2198, + "step": 393700 + }, + { + "epoch": 2.21, + "learning_rate": 2.78716897980292e-05, + "loss": 0.2119, + "step": 393800 + }, + { + "epoch": 2.21, + "learning_rate": 2.7866068567766747e-05, + "loss": 0.2131, + "step": 393900 + }, + { + "epoch": 2.21, + "learning_rate": 2.7860447337504286e-05, + "loss": 0.2127, + "step": 394000 + }, + { + "epoch": 2.22, + "learning_rate": 2.7854826107241833e-05, + "loss": 0.2114, + "step": 394100 + }, + { + "epoch": 2.22, + "learning_rate": 2.784920487697938e-05, + "loss": 0.2167, + "step": 394200 + }, + { + "epoch": 2.22, + "learning_rate": 2.784358364671692e-05, + "loss": 0.2178, + "step": 394300 + }, + { + "epoch": 2.22, + "learning_rate": 2.783801862875709e-05, + "loss": 0.2139, + "step": 394400 + }, + { + "epoch": 2.22, + "learning_rate": 2.7832397398494636e-05, + "loss": 0.213, + "step": 394500 + }, + { + "epoch": 2.22, + "learning_rate": 2.7826776168232182e-05, + "loss": 0.2177, + "step": 394600 + }, + { + "epoch": 2.22, + "learning_rate": 2.7821154937969725e-05, + "loss": 0.2138, + "step": 394700 + }, + { + "epoch": 2.22, + "learning_rate": 2.781553370770727e-05, + "loss": 0.2219, + "step": 394800 + }, + { + "epoch": 2.22, + "learning_rate": 2.7809912477444817e-05, + "loss": 0.2124, + "step": 394900 + }, + { + "epoch": 2.22, + "learning_rate": 2.7804347459484985e-05, + "loss": 0.2182, + "step": 395000 + }, + { + "epoch": 2.22, + "learning_rate": 2.7798726229222528e-05, + "loss": 0.2147, + "step": 395100 + }, + { + "epoch": 2.22, + "learning_rate": 2.7793104998960074e-05, + "loss": 0.2091, + "step": 395200 + }, + { + "epoch": 2.22, + "learning_rate": 2.778748376869762e-05, + "loss": 0.2172, + "step": 395300 + }, + { + "epoch": 2.22, + "learning_rate": 2.778186253843516e-05, + "loss": 0.2171, + "step": 395400 + }, + { + "epoch": 2.22, + "learning_rate": 2.7776241308172706e-05, + "loss": 0.2222, + "step": 395500 + }, + { + "epoch": 2.22, + "learning_rate": 2.7770620077910253e-05, + "loss": 0.2045, + "step": 395600 + }, + { + "epoch": 2.22, + "learning_rate": 2.77649988476478e-05, + "loss": 0.2163, + "step": 395700 + }, + { + "epoch": 2.22, + "learning_rate": 2.7759377617385342e-05, + "loss": 0.2148, + "step": 395800 + }, + { + "epoch": 2.23, + "learning_rate": 2.7753756387122888e-05, + "loss": 0.2125, + "step": 395900 + }, + { + "epoch": 2.23, + "learning_rate": 2.7748135156860434e-05, + "loss": 0.2147, + "step": 396000 + }, + { + "epoch": 2.23, + "learning_rate": 2.7742513926597974e-05, + "loss": 0.2179, + "step": 396100 + }, + { + "epoch": 2.23, + "learning_rate": 2.773689269633552e-05, + "loss": 0.2183, + "step": 396200 + }, + { + "epoch": 2.23, + "learning_rate": 2.7731271466073066e-05, + "loss": 0.2145, + "step": 396300 + }, + { + "epoch": 2.23, + "learning_rate": 2.7725650235810613e-05, + "loss": 0.2193, + "step": 396400 + }, + { + "epoch": 2.23, + "learning_rate": 2.7720029005548152e-05, + "loss": 0.2129, + "step": 396500 + }, + { + "epoch": 2.23, + "learning_rate": 2.7714407775285702e-05, + "loss": 0.2186, + "step": 396600 + }, + { + "epoch": 2.23, + "learning_rate": 2.7708786545023248e-05, + "loss": 0.2136, + "step": 396700 + }, + { + "epoch": 2.23, + "learning_rate": 2.7703165314760788e-05, + "loss": 0.2157, + "step": 396800 + }, + { + "epoch": 2.23, + "learning_rate": 2.7697544084498334e-05, + "loss": 0.2139, + "step": 396900 + }, + { + "epoch": 2.23, + "learning_rate": 2.769192285423588e-05, + "loss": 0.2202, + "step": 397000 + }, + { + "epoch": 2.23, + "learning_rate": 2.7686301623973426e-05, + "loss": 0.2271, + "step": 397100 + }, + { + "epoch": 2.23, + "learning_rate": 2.768073660601359e-05, + "loss": 0.2239, + "step": 397200 + }, + { + "epoch": 2.23, + "learning_rate": 2.7675115375751137e-05, + "loss": 0.214, + "step": 397300 + }, + { + "epoch": 2.23, + "learning_rate": 2.7669494145488683e-05, + "loss": 0.2171, + "step": 397400 + }, + { + "epoch": 2.23, + "learning_rate": 2.766387291522623e-05, + "loss": 0.2144, + "step": 397500 + }, + { + "epoch": 2.24, + "learning_rate": 2.7658251684963772e-05, + "loss": 0.2206, + "step": 397600 + }, + { + "epoch": 2.24, + "learning_rate": 2.765263045470132e-05, + "loss": 0.214, + "step": 397700 + }, + { + "epoch": 2.24, + "learning_rate": 2.7647009224438865e-05, + "loss": 0.2165, + "step": 397800 + }, + { + "epoch": 2.24, + "learning_rate": 2.7641387994176404e-05, + "loss": 0.2219, + "step": 397900 + }, + { + "epoch": 2.24, + "learning_rate": 2.763576676391395e-05, + "loss": 0.213, + "step": 398000 + }, + { + "epoch": 2.24, + "learning_rate": 2.7630145533651497e-05, + "loss": 0.2144, + "step": 398100 + }, + { + "epoch": 2.24, + "learning_rate": 2.7624524303389043e-05, + "loss": 0.2195, + "step": 398200 + }, + { + "epoch": 2.24, + "learning_rate": 2.7618903073126583e-05, + "loss": 0.2209, + "step": 398300 + }, + { + "epoch": 2.24, + "learning_rate": 2.761328184286413e-05, + "loss": 0.2136, + "step": 398400 + }, + { + "epoch": 2.24, + "learning_rate": 2.7607660612601675e-05, + "loss": 0.22, + "step": 398500 + }, + { + "epoch": 2.24, + "learning_rate": 2.7602039382339218e-05, + "loss": 0.2159, + "step": 398600 + }, + { + "epoch": 2.24, + "learning_rate": 2.7596418152076764e-05, + "loss": 0.2178, + "step": 398700 + }, + { + "epoch": 2.24, + "learning_rate": 2.759079692181431e-05, + "loss": 0.2169, + "step": 398800 + }, + { + "epoch": 2.24, + "learning_rate": 2.7585175691551857e-05, + "loss": 0.2145, + "step": 398900 + }, + { + "epoch": 2.24, + "learning_rate": 2.7579554461289396e-05, + "loss": 0.2223, + "step": 399000 + }, + { + "epoch": 2.24, + "learning_rate": 2.7573933231026943e-05, + "loss": 0.2098, + "step": 399100 + }, + { + "epoch": 2.24, + "learning_rate": 2.756831200076449e-05, + "loss": 0.2109, + "step": 399200 + }, + { + "epoch": 2.24, + "learning_rate": 2.7562690770502032e-05, + "loss": 0.2123, + "step": 399300 + }, + { + "epoch": 2.25, + "learning_rate": 2.7557069540239578e-05, + "loss": 0.2179, + "step": 399400 + }, + { + "epoch": 2.25, + "learning_rate": 2.7551448309977124e-05, + "loss": 0.2202, + "step": 399500 + }, + { + "epoch": 2.25, + "learning_rate": 2.754582707971467e-05, + "loss": 0.2142, + "step": 399600 + }, + { + "epoch": 2.25, + "learning_rate": 2.754020584945221e-05, + "loss": 0.214, + "step": 399700 + }, + { + "epoch": 2.25, + "learning_rate": 2.7534584619189757e-05, + "loss": 0.2164, + "step": 399800 + }, + { + "epoch": 2.25, + "learning_rate": 2.7528963388927303e-05, + "loss": 0.2136, + "step": 399900 + }, + { + "epoch": 2.25, + "learning_rate": 2.7523342158664846e-05, + "loss": 0.2108, + "step": 400000 + }, + { + "epoch": 2.25, + "learning_rate": 2.7517720928402392e-05, + "loss": 0.2106, + "step": 400100 + }, + { + "epoch": 2.25, + "learning_rate": 2.7512099698139938e-05, + "loss": 0.2162, + "step": 400200 + }, + { + "epoch": 2.25, + "learning_rate": 2.7506478467877485e-05, + "loss": 0.2134, + "step": 400300 + }, + { + "epoch": 2.25, + "learning_rate": 2.7500857237615024e-05, + "loss": 0.2162, + "step": 400400 + }, + { + "epoch": 2.25, + "learning_rate": 2.749523600735257e-05, + "loss": 0.2187, + "step": 400500 + }, + { + "epoch": 2.25, + "learning_rate": 2.7489614777090117e-05, + "loss": 0.2185, + "step": 400600 + }, + { + "epoch": 2.25, + "learning_rate": 2.7483993546827656e-05, + "loss": 0.2102, + "step": 400700 + }, + { + "epoch": 2.25, + "learning_rate": 2.7478372316565202e-05, + "loss": 0.2233, + "step": 400800 + }, + { + "epoch": 2.25, + "learning_rate": 2.7472751086302752e-05, + "loss": 0.2158, + "step": 400900 + }, + { + "epoch": 2.25, + "learning_rate": 2.74671298560403e-05, + "loss": 0.2168, + "step": 401000 + }, + { + "epoch": 2.25, + "learning_rate": 2.7461508625777838e-05, + "loss": 0.2145, + "step": 401100 + }, + { + "epoch": 2.26, + "learning_rate": 2.7455887395515384e-05, + "loss": 0.2108, + "step": 401200 + }, + { + "epoch": 2.26, + "learning_rate": 2.745026616525293e-05, + "loss": 0.2183, + "step": 401300 + }, + { + "epoch": 2.26, + "learning_rate": 2.744464493499047e-05, + "loss": 0.2087, + "step": 401400 + }, + { + "epoch": 2.26, + "learning_rate": 2.7439023704728016e-05, + "loss": 0.2129, + "step": 401500 + }, + { + "epoch": 2.26, + "learning_rate": 2.7433402474465562e-05, + "loss": 0.2192, + "step": 401600 + }, + { + "epoch": 2.26, + "learning_rate": 2.742778124420311e-05, + "loss": 0.2154, + "step": 401700 + }, + { + "epoch": 2.26, + "learning_rate": 2.742216001394065e-05, + "loss": 0.2147, + "step": 401800 + }, + { + "epoch": 2.26, + "learning_rate": 2.7416538783678198e-05, + "loss": 0.2161, + "step": 401900 + }, + { + "epoch": 2.26, + "learning_rate": 2.7410917553415744e-05, + "loss": 0.2092, + "step": 402000 + }, + { + "epoch": 2.26, + "learning_rate": 2.7405296323153284e-05, + "loss": 0.2081, + "step": 402100 + }, + { + "epoch": 2.26, + "learning_rate": 2.739967509289083e-05, + "loss": 0.2171, + "step": 402200 + }, + { + "epoch": 2.26, + "learning_rate": 2.7394053862628376e-05, + "loss": 0.2156, + "step": 402300 + }, + { + "epoch": 2.26, + "learning_rate": 2.7388432632365922e-05, + "loss": 0.2188, + "step": 402400 + }, + { + "epoch": 2.26, + "learning_rate": 2.7382811402103465e-05, + "loss": 0.2215, + "step": 402500 + }, + { + "epoch": 2.26, + "learning_rate": 2.737719017184101e-05, + "loss": 0.2148, + "step": 402600 + }, + { + "epoch": 2.26, + "learning_rate": 2.7371568941578558e-05, + "loss": 0.211, + "step": 402700 + }, + { + "epoch": 2.26, + "learning_rate": 2.7365947711316097e-05, + "loss": 0.2123, + "step": 402800 + }, + { + "epoch": 2.26, + "learning_rate": 2.7360326481053644e-05, + "loss": 0.2193, + "step": 402900 + }, + { + "epoch": 2.27, + "learning_rate": 2.735470525079119e-05, + "loss": 0.2197, + "step": 403000 + }, + { + "epoch": 2.27, + "learning_rate": 2.7349084020528736e-05, + "loss": 0.2088, + "step": 403100 + }, + { + "epoch": 2.27, + "learning_rate": 2.73435190025689e-05, + "loss": 0.2172, + "step": 403200 + }, + { + "epoch": 2.27, + "learning_rate": 2.7337897772306447e-05, + "loss": 0.2166, + "step": 403300 + }, + { + "epoch": 2.27, + "learning_rate": 2.7332276542043993e-05, + "loss": 0.2077, + "step": 403400 + }, + { + "epoch": 2.27, + "learning_rate": 2.732665531178154e-05, + "loss": 0.2126, + "step": 403500 + }, + { + "epoch": 2.27, + "learning_rate": 2.7321034081519082e-05, + "loss": 0.2122, + "step": 403600 + }, + { + "epoch": 2.27, + "learning_rate": 2.731541285125663e-05, + "loss": 0.2212, + "step": 403700 + }, + { + "epoch": 2.27, + "learning_rate": 2.7309847833296796e-05, + "loss": 0.223, + "step": 403800 + }, + { + "epoch": 2.27, + "learning_rate": 2.7304226603034346e-05, + "loss": 0.2121, + "step": 403900 + }, + { + "epoch": 2.27, + "learning_rate": 2.7298605372771885e-05, + "loss": 0.2237, + "step": 404000 + }, + { + "epoch": 2.27, + "learning_rate": 2.729298414250943e-05, + "loss": 0.2167, + "step": 404100 + }, + { + "epoch": 2.27, + "learning_rate": 2.7287362912246978e-05, + "loss": 0.2149, + "step": 404200 + }, + { + "epoch": 2.27, + "learning_rate": 2.7281741681984517e-05, + "loss": 0.2197, + "step": 404300 + }, + { + "epoch": 2.27, + "learning_rate": 2.7276120451722064e-05, + "loss": 0.2098, + "step": 404400 + }, + { + "epoch": 2.27, + "learning_rate": 2.727049922145961e-05, + "loss": 0.2191, + "step": 404500 + }, + { + "epoch": 2.27, + "learning_rate": 2.7264877991197156e-05, + "loss": 0.2135, + "step": 404600 + }, + { + "epoch": 2.27, + "learning_rate": 2.72592567609347e-05, + "loss": 0.2198, + "step": 404700 + }, + { + "epoch": 2.28, + "learning_rate": 2.7253635530672245e-05, + "loss": 0.2134, + "step": 404800 + }, + { + "epoch": 2.28, + "learning_rate": 2.724801430040979e-05, + "loss": 0.211, + "step": 404900 + }, + { + "epoch": 2.28, + "learning_rate": 2.724239307014733e-05, + "loss": 0.2183, + "step": 405000 + }, + { + "epoch": 2.28, + "learning_rate": 2.7236771839884877e-05, + "loss": 0.2152, + "step": 405100 + }, + { + "epoch": 2.28, + "learning_rate": 2.7231150609622424e-05, + "loss": 0.2151, + "step": 405200 + }, + { + "epoch": 2.28, + "learning_rate": 2.7225529379359967e-05, + "loss": 0.2171, + "step": 405300 + }, + { + "epoch": 2.28, + "learning_rate": 2.7219908149097513e-05, + "loss": 0.2152, + "step": 405400 + }, + { + "epoch": 2.28, + "learning_rate": 2.721428691883506e-05, + "loss": 0.2222, + "step": 405500 + }, + { + "epoch": 2.28, + "learning_rate": 2.7208721900875227e-05, + "loss": 0.214, + "step": 405600 + }, + { + "epoch": 2.28, + "learning_rate": 2.720310067061277e-05, + "loss": 0.2203, + "step": 405700 + }, + { + "epoch": 2.28, + "learning_rate": 2.7197479440350316e-05, + "loss": 0.2082, + "step": 405800 + }, + { + "epoch": 2.28, + "learning_rate": 2.7191858210087862e-05, + "loss": 0.2126, + "step": 405900 + }, + { + "epoch": 2.28, + "learning_rate": 2.718623697982541e-05, + "loss": 0.2206, + "step": 406000 + }, + { + "epoch": 2.28, + "learning_rate": 2.7180615749562948e-05, + "loss": 0.2165, + "step": 406100 + }, + { + "epoch": 2.28, + "learning_rate": 2.7174994519300494e-05, + "loss": 0.2124, + "step": 406200 + }, + { + "epoch": 2.28, + "learning_rate": 2.716937328903804e-05, + "loss": 0.2226, + "step": 406300 + }, + { + "epoch": 2.28, + "learning_rate": 2.7163752058775583e-05, + "loss": 0.213, + "step": 406400 + }, + { + "epoch": 2.29, + "learning_rate": 2.715813082851313e-05, + "loss": 0.2135, + "step": 406500 + }, + { + "epoch": 2.29, + "learning_rate": 2.7152509598250676e-05, + "loss": 0.2127, + "step": 406600 + }, + { + "epoch": 2.29, + "learning_rate": 2.7146888367988222e-05, + "loss": 0.215, + "step": 406700 + }, + { + "epoch": 2.29, + "learning_rate": 2.7141267137725762e-05, + "loss": 0.2173, + "step": 406800 + }, + { + "epoch": 2.29, + "learning_rate": 2.7135645907463308e-05, + "loss": 0.2081, + "step": 406900 + }, + { + "epoch": 2.29, + "learning_rate": 2.7130024677200854e-05, + "loss": 0.2178, + "step": 407000 + }, + { + "epoch": 2.29, + "learning_rate": 2.7124403446938397e-05, + "loss": 0.2131, + "step": 407100 + }, + { + "epoch": 2.29, + "learning_rate": 2.7118782216675943e-05, + "loss": 0.2179, + "step": 407200 + }, + { + "epoch": 2.29, + "learning_rate": 2.711316098641349e-05, + "loss": 0.2188, + "step": 407300 + }, + { + "epoch": 2.29, + "learning_rate": 2.7107539756151036e-05, + "loss": 0.2201, + "step": 407400 + }, + { + "epoch": 2.29, + "learning_rate": 2.7101918525888576e-05, + "loss": 0.2104, + "step": 407500 + }, + { + "epoch": 2.29, + "learning_rate": 2.7096297295626122e-05, + "loss": 0.214, + "step": 407600 + }, + { + "epoch": 2.29, + "learning_rate": 2.7090676065363668e-05, + "loss": 0.2125, + "step": 407700 + }, + { + "epoch": 2.29, + "learning_rate": 2.7085054835101208e-05, + "loss": 0.216, + "step": 407800 + }, + { + "epoch": 2.29, + "learning_rate": 2.7079433604838754e-05, + "loss": 0.2122, + "step": 407900 + }, + { + "epoch": 2.29, + "learning_rate": 2.70738123745763e-05, + "loss": 0.217, + "step": 408000 + }, + { + "epoch": 2.29, + "learning_rate": 2.7068191144313846e-05, + "loss": 0.2086, + "step": 408100 + }, + { + "epoch": 2.29, + "learning_rate": 2.706256991405139e-05, + "loss": 0.2128, + "step": 408200 + }, + { + "epoch": 2.3, + "learning_rate": 2.7056948683788936e-05, + "loss": 0.2125, + "step": 408300 + }, + { + "epoch": 2.3, + "learning_rate": 2.7051327453526482e-05, + "loss": 0.2125, + "step": 408400 + }, + { + "epoch": 2.3, + "learning_rate": 2.704570622326402e-05, + "loss": 0.2064, + "step": 408500 + }, + { + "epoch": 2.3, + "learning_rate": 2.7040084993001568e-05, + "loss": 0.2195, + "step": 408600 + }, + { + "epoch": 2.3, + "learning_rate": 2.7034463762739114e-05, + "loss": 0.2096, + "step": 408700 + }, + { + "epoch": 2.3, + "learning_rate": 2.702884253247666e-05, + "loss": 0.2177, + "step": 408800 + }, + { + "epoch": 2.3, + "learning_rate": 2.7023221302214203e-05, + "loss": 0.2163, + "step": 408900 + }, + { + "epoch": 2.3, + "learning_rate": 2.701760007195175e-05, + "loss": 0.2121, + "step": 409000 + }, + { + "epoch": 2.3, + "learning_rate": 2.7011978841689296e-05, + "loss": 0.2144, + "step": 409100 + }, + { + "epoch": 2.3, + "learning_rate": 2.7006357611426835e-05, + "loss": 0.2207, + "step": 409200 + }, + { + "epoch": 2.3, + "learning_rate": 2.700073638116438e-05, + "loss": 0.2121, + "step": 409300 + }, + { + "epoch": 2.3, + "learning_rate": 2.6995115150901928e-05, + "loss": 0.2225, + "step": 409400 + }, + { + "epoch": 2.3, + "learning_rate": 2.6989493920639474e-05, + "loss": 0.2197, + "step": 409500 + }, + { + "epoch": 2.3, + "learning_rate": 2.6983872690377017e-05, + "loss": 0.2183, + "step": 409600 + }, + { + "epoch": 2.3, + "learning_rate": 2.6978251460114563e-05, + "loss": 0.2154, + "step": 409700 + }, + { + "epoch": 2.3, + "learning_rate": 2.697263022985211e-05, + "loss": 0.2059, + "step": 409800 + }, + { + "epoch": 2.3, + "learning_rate": 2.696700899958965e-05, + "loss": 0.2181, + "step": 409900 + }, + { + "epoch": 2.3, + "learning_rate": 2.6961387769327195e-05, + "loss": 0.2244, + "step": 410000 + }, + { + "epoch": 2.31, + "learning_rate": 2.695576653906474e-05, + "loss": 0.2126, + "step": 410100 + }, + { + "epoch": 2.31, + "learning_rate": 2.6950145308802288e-05, + "loss": 0.2108, + "step": 410200 + }, + { + "epoch": 2.31, + "learning_rate": 2.694452407853983e-05, + "loss": 0.2085, + "step": 410300 + }, + { + "epoch": 2.31, + "learning_rate": 2.6938902848277377e-05, + "loss": 0.2142, + "step": 410400 + }, + { + "epoch": 2.31, + "learning_rate": 2.6933281618014923e-05, + "loss": 0.2177, + "step": 410500 + }, + { + "epoch": 2.31, + "learning_rate": 2.6927660387752463e-05, + "loss": 0.2103, + "step": 410600 + }, + { + "epoch": 2.31, + "learning_rate": 2.6922095369792634e-05, + "loss": 0.2139, + "step": 410700 + }, + { + "epoch": 2.31, + "learning_rate": 2.691647413953018e-05, + "loss": 0.2145, + "step": 410800 + }, + { + "epoch": 2.31, + "learning_rate": 2.6910852909267726e-05, + "loss": 0.2152, + "step": 410900 + }, + { + "epoch": 2.31, + "learning_rate": 2.6905231679005266e-05, + "loss": 0.2085, + "step": 411000 + }, + { + "epoch": 2.31, + "learning_rate": 2.6899610448742812e-05, + "loss": 0.2183, + "step": 411100 + }, + { + "epoch": 2.31, + "learning_rate": 2.6893989218480358e-05, + "loss": 0.208, + "step": 411200 + }, + { + "epoch": 2.31, + "learning_rate": 2.6888367988217905e-05, + "loss": 0.2112, + "step": 411300 + }, + { + "epoch": 2.31, + "learning_rate": 2.6882746757955447e-05, + "loss": 0.2145, + "step": 411400 + }, + { + "epoch": 2.31, + "learning_rate": 2.6877125527692994e-05, + "loss": 0.2165, + "step": 411500 + }, + { + "epoch": 2.31, + "learning_rate": 2.687150429743054e-05, + "loss": 0.2143, + "step": 411600 + }, + { + "epoch": 2.31, + "learning_rate": 2.686588306716808e-05, + "loss": 0.2118, + "step": 411700 + }, + { + "epoch": 2.31, + "learning_rate": 2.6860261836905626e-05, + "loss": 0.2121, + "step": 411800 + }, + { + "epoch": 2.32, + "learning_rate": 2.6854640606643172e-05, + "loss": 0.2218, + "step": 411900 + }, + { + "epoch": 2.32, + "learning_rate": 2.684901937638072e-05, + "loss": 0.2135, + "step": 412000 + }, + { + "epoch": 2.32, + "learning_rate": 2.6843398146118258e-05, + "loss": 0.2158, + "step": 412100 + }, + { + "epoch": 2.32, + "learning_rate": 2.6837776915855804e-05, + "loss": 0.2182, + "step": 412200 + }, + { + "epoch": 2.32, + "learning_rate": 2.683215568559335e-05, + "loss": 0.2178, + "step": 412300 + }, + { + "epoch": 2.32, + "learning_rate": 2.6826534455330893e-05, + "loss": 0.2121, + "step": 412400 + }, + { + "epoch": 2.32, + "learning_rate": 2.682091322506844e-05, + "loss": 0.2078, + "step": 412500 + }, + { + "epoch": 2.32, + "learning_rate": 2.6815291994805986e-05, + "loss": 0.2153, + "step": 412600 + }, + { + "epoch": 2.32, + "learning_rate": 2.6809670764543532e-05, + "loss": 0.2121, + "step": 412700 + }, + { + "epoch": 2.32, + "learning_rate": 2.680404953428107e-05, + "loss": 0.2187, + "step": 412800 + }, + { + "epoch": 2.32, + "learning_rate": 2.6798428304018618e-05, + "loss": 0.2111, + "step": 412900 + }, + { + "epoch": 2.32, + "learning_rate": 2.6792807073756164e-05, + "loss": 0.2124, + "step": 413000 + }, + { + "epoch": 2.32, + "learning_rate": 2.6787185843493707e-05, + "loss": 0.2111, + "step": 413100 + }, + { + "epoch": 2.32, + "learning_rate": 2.6781564613231253e-05, + "loss": 0.2111, + "step": 413200 + }, + { + "epoch": 2.32, + "learning_rate": 2.67759433829688e-05, + "loss": 0.2122, + "step": 413300 + }, + { + "epoch": 2.32, + "learning_rate": 2.6770322152706346e-05, + "loss": 0.2128, + "step": 413400 + }, + { + "epoch": 2.32, + "learning_rate": 2.6764700922443885e-05, + "loss": 0.2165, + "step": 413500 + }, + { + "epoch": 2.32, + "learning_rate": 2.675907969218143e-05, + "loss": 0.221, + "step": 413600 + }, + { + "epoch": 2.33, + "learning_rate": 2.6753458461918978e-05, + "loss": 0.2112, + "step": 413700 + }, + { + "epoch": 2.33, + "learning_rate": 2.674783723165652e-05, + "loss": 0.2197, + "step": 413800 + }, + { + "epoch": 2.33, + "learning_rate": 2.6742216001394067e-05, + "loss": 0.2129, + "step": 413900 + }, + { + "epoch": 2.33, + "learning_rate": 2.6736594771131613e-05, + "loss": 0.2117, + "step": 414000 + }, + { + "epoch": 2.33, + "learning_rate": 2.673097354086916e-05, + "loss": 0.2108, + "step": 414100 + }, + { + "epoch": 2.33, + "learning_rate": 2.67253523106067e-05, + "loss": 0.2139, + "step": 414200 + }, + { + "epoch": 2.33, + "learning_rate": 2.6719731080344245e-05, + "loss": 0.2191, + "step": 414300 + }, + { + "epoch": 2.33, + "learning_rate": 2.6714109850081792e-05, + "loss": 0.2188, + "step": 414400 + }, + { + "epoch": 2.33, + "learning_rate": 2.670848861981933e-05, + "loss": 0.2142, + "step": 414500 + }, + { + "epoch": 2.33, + "learning_rate": 2.670286738955688e-05, + "loss": 0.2156, + "step": 414600 + }, + { + "epoch": 2.33, + "learning_rate": 2.6697246159294427e-05, + "loss": 0.2139, + "step": 414700 + }, + { + "epoch": 2.33, + "learning_rate": 2.6691624929031973e-05, + "loss": 0.2127, + "step": 414800 + }, + { + "epoch": 2.33, + "learning_rate": 2.6686003698769513e-05, + "loss": 0.2143, + "step": 414900 + }, + { + "epoch": 2.33, + "learning_rate": 2.668038246850706e-05, + "loss": 0.2165, + "step": 415000 + }, + { + "epoch": 2.33, + "learning_rate": 2.6674761238244606e-05, + "loss": 0.2116, + "step": 415100 + }, + { + "epoch": 2.33, + "learning_rate": 2.6669140007982145e-05, + "loss": 0.2133, + "step": 415200 + }, + { + "epoch": 2.33, + "learning_rate": 2.666351877771969e-05, + "loss": 0.2116, + "step": 415300 + }, + { + "epoch": 2.34, + "learning_rate": 2.6657897547457238e-05, + "loss": 0.2068, + "step": 415400 + }, + { + "epoch": 2.34, + "learning_rate": 2.665227631719478e-05, + "loss": 0.2134, + "step": 415500 + }, + { + "epoch": 2.34, + "learning_rate": 2.6646655086932327e-05, + "loss": 0.2134, + "step": 415600 + }, + { + "epoch": 2.34, + "learning_rate": 2.6641033856669873e-05, + "loss": 0.2127, + "step": 415700 + }, + { + "epoch": 2.34, + "learning_rate": 2.663541262640742e-05, + "loss": 0.2182, + "step": 415800 + }, + { + "epoch": 2.34, + "learning_rate": 2.6629847608447584e-05, + "loss": 0.213, + "step": 415900 + }, + { + "epoch": 2.34, + "learning_rate": 2.662422637818513e-05, + "loss": 0.2113, + "step": 416000 + }, + { + "epoch": 2.34, + "learning_rate": 2.6618605147922676e-05, + "loss": 0.2174, + "step": 416100 + }, + { + "epoch": 2.34, + "learning_rate": 2.6612983917660222e-05, + "loss": 0.2136, + "step": 416200 + }, + { + "epoch": 2.34, + "learning_rate": 2.6607362687397762e-05, + "loss": 0.2086, + "step": 416300 + }, + { + "epoch": 2.34, + "learning_rate": 2.6601741457135308e-05, + "loss": 0.2161, + "step": 416400 + }, + { + "epoch": 2.34, + "learning_rate": 2.6596120226872854e-05, + "loss": 0.2163, + "step": 416500 + }, + { + "epoch": 2.34, + "learning_rate": 2.6590498996610397e-05, + "loss": 0.2183, + "step": 416600 + }, + { + "epoch": 2.34, + "learning_rate": 2.6584877766347944e-05, + "loss": 0.2112, + "step": 416700 + }, + { + "epoch": 2.34, + "learning_rate": 2.657925653608549e-05, + "loss": 0.2117, + "step": 416800 + }, + { + "epoch": 2.34, + "learning_rate": 2.6573635305823036e-05, + "loss": 0.2089, + "step": 416900 + }, + { + "epoch": 2.34, + "learning_rate": 2.6568014075560576e-05, + "loss": 0.2096, + "step": 417000 + }, + { + "epoch": 2.34, + "learning_rate": 2.6562392845298122e-05, + "loss": 0.2131, + "step": 417100 + }, + { + "epoch": 2.35, + "learning_rate": 2.6556771615035668e-05, + "loss": 0.2113, + "step": 417200 + }, + { + "epoch": 2.35, + "learning_rate": 2.655115038477321e-05, + "loss": 0.2094, + "step": 417300 + }, + { + "epoch": 2.35, + "learning_rate": 2.6545529154510757e-05, + "loss": 0.2155, + "step": 417400 + }, + { + "epoch": 2.35, + "learning_rate": 2.6539907924248304e-05, + "loss": 0.2154, + "step": 417500 + }, + { + "epoch": 2.35, + "learning_rate": 2.653428669398585e-05, + "loss": 0.2163, + "step": 417600 + }, + { + "epoch": 2.35, + "learning_rate": 2.652866546372339e-05, + "loss": 0.2082, + "step": 417700 + }, + { + "epoch": 2.35, + "learning_rate": 2.6523044233460936e-05, + "loss": 0.214, + "step": 417800 + }, + { + "epoch": 2.35, + "learning_rate": 2.6517479215501107e-05, + "loss": 0.2092, + "step": 417900 + }, + { + "epoch": 2.35, + "learning_rate": 2.6511857985238653e-05, + "loss": 0.2142, + "step": 418000 + }, + { + "epoch": 2.35, + "learning_rate": 2.6506236754976192e-05, + "loss": 0.2101, + "step": 418100 + }, + { + "epoch": 2.35, + "learning_rate": 2.650061552471374e-05, + "loss": 0.2161, + "step": 418200 + }, + { + "epoch": 2.35, + "learning_rate": 2.649505050675391e-05, + "loss": 0.2127, + "step": 418300 + }, + { + "epoch": 2.35, + "learning_rate": 2.6489429276491456e-05, + "loss": 0.2133, + "step": 418400 + }, + { + "epoch": 2.35, + "learning_rate": 2.6483808046229e-05, + "loss": 0.2177, + "step": 418500 + }, + { + "epoch": 2.35, + "learning_rate": 2.6478243028269167e-05, + "loss": 0.2102, + "step": 418600 + }, + { + "epoch": 2.35, + "learning_rate": 2.6472621798006713e-05, + "loss": 0.2136, + "step": 418700 + }, + { + "epoch": 2.35, + "learning_rate": 2.646700056774426e-05, + "loss": 0.2111, + "step": 418800 + }, + { + "epoch": 2.35, + "learning_rate": 2.6461379337481802e-05, + "loss": 0.2163, + "step": 418900 + }, + { + "epoch": 2.36, + "learning_rate": 2.6455758107219348e-05, + "loss": 0.2114, + "step": 419000 + }, + { + "epoch": 2.36, + "learning_rate": 2.6450136876956895e-05, + "loss": 0.2152, + "step": 419100 + }, + { + "epoch": 2.36, + "learning_rate": 2.6444515646694434e-05, + "loss": 0.2108, + "step": 419200 + }, + { + "epoch": 2.36, + "learning_rate": 2.643889441643198e-05, + "loss": 0.2163, + "step": 419300 + }, + { + "epoch": 2.36, + "learning_rate": 2.6433273186169527e-05, + "loss": 0.2108, + "step": 419400 + }, + { + "epoch": 2.36, + "learning_rate": 2.6427651955907073e-05, + "loss": 0.2172, + "step": 419500 + }, + { + "epoch": 2.36, + "learning_rate": 2.6422030725644616e-05, + "loss": 0.2168, + "step": 419600 + }, + { + "epoch": 2.36, + "learning_rate": 2.6416409495382162e-05, + "loss": 0.2169, + "step": 419700 + }, + { + "epoch": 2.36, + "learning_rate": 2.641078826511971e-05, + "loss": 0.2159, + "step": 419800 + }, + { + "epoch": 2.36, + "learning_rate": 2.6405167034857248e-05, + "loss": 0.2135, + "step": 419900 + }, + { + "epoch": 2.36, + "learning_rate": 2.6399545804594794e-05, + "loss": 0.2092, + "step": 420000 + }, + { + "epoch": 2.36, + "learning_rate": 2.639392457433234e-05, + "loss": 0.2201, + "step": 420100 + }, + { + "epoch": 2.36, + "learning_rate": 2.6388303344069887e-05, + "loss": 0.2175, + "step": 420200 + }, + { + "epoch": 2.36, + "learning_rate": 2.6382682113807426e-05, + "loss": 0.2161, + "step": 420300 + }, + { + "epoch": 2.36, + "learning_rate": 2.6377060883544972e-05, + "loss": 0.2119, + "step": 420400 + }, + { + "epoch": 2.36, + "learning_rate": 2.637143965328252e-05, + "loss": 0.2207, + "step": 420500 + }, + { + "epoch": 2.36, + "learning_rate": 2.636587463532269e-05, + "loss": 0.2097, + "step": 420600 + }, + { + "epoch": 2.36, + "learning_rate": 2.6360253405060233e-05, + "loss": 0.2108, + "step": 420700 + }, + { + "epoch": 2.37, + "learning_rate": 2.635463217479778e-05, + "loss": 0.209, + "step": 420800 + }, + { + "epoch": 2.37, + "learning_rate": 2.6349010944535325e-05, + "loss": 0.2138, + "step": 420900 + }, + { + "epoch": 2.37, + "learning_rate": 2.6343389714272865e-05, + "loss": 0.2094, + "step": 421000 + }, + { + "epoch": 2.37, + "learning_rate": 2.633776848401041e-05, + "loss": 0.2153, + "step": 421100 + }, + { + "epoch": 2.37, + "learning_rate": 2.6332147253747957e-05, + "loss": 0.2149, + "step": 421200 + }, + { + "epoch": 2.37, + "learning_rate": 2.6326526023485504e-05, + "loss": 0.2168, + "step": 421300 + }, + { + "epoch": 2.37, + "learning_rate": 2.6320904793223043e-05, + "loss": 0.2221, + "step": 421400 + }, + { + "epoch": 2.37, + "learning_rate": 2.631528356296059e-05, + "loss": 0.2102, + "step": 421500 + }, + { + "epoch": 2.37, + "learning_rate": 2.630966233269814e-05, + "loss": 0.211, + "step": 421600 + }, + { + "epoch": 2.37, + "learning_rate": 2.630404110243568e-05, + "loss": 0.2143, + "step": 421700 + }, + { + "epoch": 2.37, + "learning_rate": 2.6298419872173225e-05, + "loss": 0.2175, + "step": 421800 + }, + { + "epoch": 2.37, + "learning_rate": 2.629279864191077e-05, + "loss": 0.2092, + "step": 421900 + }, + { + "epoch": 2.37, + "learning_rate": 2.6287177411648317e-05, + "loss": 0.2174, + "step": 422000 + }, + { + "epoch": 2.37, + "learning_rate": 2.6281556181385857e-05, + "loss": 0.2119, + "step": 422100 + }, + { + "epoch": 2.37, + "learning_rate": 2.6275934951123403e-05, + "loss": 0.2127, + "step": 422200 + }, + { + "epoch": 2.37, + "learning_rate": 2.627031372086095e-05, + "loss": 0.2185, + "step": 422300 + }, + { + "epoch": 2.37, + "learning_rate": 2.6264692490598492e-05, + "loss": 0.2148, + "step": 422400 + }, + { + "epoch": 2.37, + "learning_rate": 2.625907126033604e-05, + "loss": 0.2207, + "step": 422500 + }, + { + "epoch": 2.38, + "learning_rate": 2.6253450030073585e-05, + "loss": 0.2181, + "step": 422600 + }, + { + "epoch": 2.38, + "learning_rate": 2.624782879981113e-05, + "loss": 0.2091, + "step": 422700 + }, + { + "epoch": 2.38, + "learning_rate": 2.624220756954867e-05, + "loss": 0.2185, + "step": 422800 + }, + { + "epoch": 2.38, + "learning_rate": 2.6236586339286217e-05, + "loss": 0.2104, + "step": 422900 + }, + { + "epoch": 2.38, + "learning_rate": 2.6230965109023763e-05, + "loss": 0.2179, + "step": 423000 + }, + { + "epoch": 2.38, + "learning_rate": 2.6225343878761306e-05, + "loss": 0.2167, + "step": 423100 + }, + { + "epoch": 2.38, + "learning_rate": 2.6219722648498852e-05, + "loss": 0.2108, + "step": 423200 + }, + { + "epoch": 2.38, + "learning_rate": 2.62141014182364e-05, + "loss": 0.2139, + "step": 423300 + }, + { + "epoch": 2.38, + "learning_rate": 2.6208480187973945e-05, + "loss": 0.2111, + "step": 423400 + }, + { + "epoch": 2.38, + "learning_rate": 2.6202858957711484e-05, + "loss": 0.2115, + "step": 423500 + }, + { + "epoch": 2.38, + "learning_rate": 2.619723772744903e-05, + "loss": 0.214, + "step": 423600 + }, + { + "epoch": 2.38, + "learning_rate": 2.6191616497186577e-05, + "loss": 0.2199, + "step": 423700 + }, + { + "epoch": 2.38, + "learning_rate": 2.618599526692412e-05, + "loss": 0.2161, + "step": 423800 + }, + { + "epoch": 2.38, + "learning_rate": 2.6180374036661666e-05, + "loss": 0.2102, + "step": 423900 + }, + { + "epoch": 2.38, + "learning_rate": 2.6174752806399212e-05, + "loss": 0.2114, + "step": 424000 + }, + { + "epoch": 2.38, + "learning_rate": 2.616913157613676e-05, + "loss": 0.2092, + "step": 424100 + }, + { + "epoch": 2.38, + "learning_rate": 2.6163510345874298e-05, + "loss": 0.2069, + "step": 424200 + }, + { + "epoch": 2.39, + "learning_rate": 2.6157889115611844e-05, + "loss": 0.215, + "step": 424300 + }, + { + "epoch": 2.39, + "learning_rate": 2.615226788534939e-05, + "loss": 0.213, + "step": 424400 + }, + { + "epoch": 2.39, + "learning_rate": 2.614664665508693e-05, + "loss": 0.2124, + "step": 424500 + }, + { + "epoch": 2.39, + "learning_rate": 2.6141025424824476e-05, + "loss": 0.2131, + "step": 424600 + }, + { + "epoch": 2.39, + "learning_rate": 2.6135404194562023e-05, + "loss": 0.2071, + "step": 424700 + }, + { + "epoch": 2.39, + "learning_rate": 2.612978296429957e-05, + "loss": 0.2192, + "step": 424800 + }, + { + "epoch": 2.39, + "learning_rate": 2.6124161734037112e-05, + "loss": 0.2154, + "step": 424900 + }, + { + "epoch": 2.39, + "learning_rate": 2.6118540503774658e-05, + "loss": 0.2207, + "step": 425000 + }, + { + "epoch": 2.39, + "learning_rate": 2.6112919273512204e-05, + "loss": 0.2137, + "step": 425100 + }, + { + "epoch": 2.39, + "learning_rate": 2.6107298043249744e-05, + "loss": 0.2103, + "step": 425200 + }, + { + "epoch": 2.39, + "learning_rate": 2.610167681298729e-05, + "loss": 0.2124, + "step": 425300 + }, + { + "epoch": 2.39, + "learning_rate": 2.6096055582724837e-05, + "loss": 0.2115, + "step": 425400 + }, + { + "epoch": 2.39, + "learning_rate": 2.6090490564765008e-05, + "loss": 0.2124, + "step": 425500 + }, + { + "epoch": 2.39, + "learning_rate": 2.6084869334502547e-05, + "loss": 0.2111, + "step": 425600 + }, + { + "epoch": 2.39, + "learning_rate": 2.6079248104240093e-05, + "loss": 0.2147, + "step": 425700 + }, + { + "epoch": 2.39, + "learning_rate": 2.607362687397764e-05, + "loss": 0.213, + "step": 425800 + }, + { + "epoch": 2.39, + "learning_rate": 2.6068005643715182e-05, + "loss": 0.215, + "step": 425900 + }, + { + "epoch": 2.39, + "learning_rate": 2.606238441345273e-05, + "loss": 0.2121, + "step": 426000 + }, + { + "epoch": 2.4, + "learning_rate": 2.6056763183190275e-05, + "loss": 0.2155, + "step": 426100 + }, + { + "epoch": 2.4, + "learning_rate": 2.605114195292782e-05, + "loss": 0.2136, + "step": 426200 + }, + { + "epoch": 2.4, + "learning_rate": 2.604552072266536e-05, + "loss": 0.213, + "step": 426300 + }, + { + "epoch": 2.4, + "learning_rate": 2.6039899492402907e-05, + "loss": 0.221, + "step": 426400 + }, + { + "epoch": 2.4, + "learning_rate": 2.6034278262140453e-05, + "loss": 0.2201, + "step": 426500 + }, + { + "epoch": 2.4, + "learning_rate": 2.6028657031877996e-05, + "loss": 0.2102, + "step": 426600 + }, + { + "epoch": 2.4, + "learning_rate": 2.6023092013918164e-05, + "loss": 0.2136, + "step": 426700 + }, + { + "epoch": 2.4, + "learning_rate": 2.6017470783655714e-05, + "loss": 0.2098, + "step": 426800 + }, + { + "epoch": 2.4, + "learning_rate": 2.601184955339326e-05, + "loss": 0.2144, + "step": 426900 + }, + { + "epoch": 2.4, + "learning_rate": 2.60062283231308e-05, + "loss": 0.2123, + "step": 427000 + }, + { + "epoch": 2.4, + "learning_rate": 2.6000607092868346e-05, + "loss": 0.2126, + "step": 427100 + }, + { + "epoch": 2.4, + "learning_rate": 2.5994985862605892e-05, + "loss": 0.2108, + "step": 427200 + }, + { + "epoch": 2.4, + "learning_rate": 2.5989364632343438e-05, + "loss": 0.2175, + "step": 427300 + }, + { + "epoch": 2.4, + "learning_rate": 2.5983743402080978e-05, + "loss": 0.2152, + "step": 427400 + }, + { + "epoch": 2.4, + "learning_rate": 2.5978122171818524e-05, + "loss": 0.211, + "step": 427500 + }, + { + "epoch": 2.4, + "learning_rate": 2.597250094155607e-05, + "loss": 0.2168, + "step": 427600 + }, + { + "epoch": 2.4, + "learning_rate": 2.5966879711293613e-05, + "loss": 0.21, + "step": 427700 + }, + { + "epoch": 2.4, + "learning_rate": 2.596125848103116e-05, + "loss": 0.2138, + "step": 427800 + }, + { + "epoch": 2.41, + "learning_rate": 2.5955637250768706e-05, + "loss": 0.2135, + "step": 427900 + }, + { + "epoch": 2.41, + "learning_rate": 2.5950016020506252e-05, + "loss": 0.2063, + "step": 428000 + }, + { + "epoch": 2.41, + "learning_rate": 2.594439479024379e-05, + "loss": 0.216, + "step": 428100 + }, + { + "epoch": 2.41, + "learning_rate": 2.5938773559981338e-05, + "loss": 0.2158, + "step": 428200 + }, + { + "epoch": 2.41, + "learning_rate": 2.5933152329718884e-05, + "loss": 0.2132, + "step": 428300 + }, + { + "epoch": 2.41, + "learning_rate": 2.5927531099456427e-05, + "loss": 0.2159, + "step": 428400 + }, + { + "epoch": 2.41, + "learning_rate": 2.5921909869193973e-05, + "loss": 0.2136, + "step": 428500 + }, + { + "epoch": 2.41, + "learning_rate": 2.591628863893152e-05, + "loss": 0.2123, + "step": 428600 + }, + { + "epoch": 2.41, + "learning_rate": 2.5910667408669066e-05, + "loss": 0.2079, + "step": 428700 + }, + { + "epoch": 2.41, + "learning_rate": 2.5905046178406605e-05, + "loss": 0.2135, + "step": 428800 + }, + { + "epoch": 2.41, + "learning_rate": 2.589942494814415e-05, + "loss": 0.2109, + "step": 428900 + }, + { + "epoch": 2.41, + "learning_rate": 2.5893803717881698e-05, + "loss": 0.2156, + "step": 429000 + }, + { + "epoch": 2.41, + "learning_rate": 2.588823869992187e-05, + "loss": 0.2174, + "step": 429100 + }, + { + "epoch": 2.41, + "learning_rate": 2.5882617469659408e-05, + "loss": 0.2182, + "step": 429200 + }, + { + "epoch": 2.41, + "learning_rate": 2.5876996239396955e-05, + "loss": 0.2178, + "step": 429300 + }, + { + "epoch": 2.41, + "learning_rate": 2.58713750091345e-05, + "loss": 0.2073, + "step": 429400 + }, + { + "epoch": 2.41, + "learning_rate": 2.5865753778872044e-05, + "loss": 0.2097, + "step": 429500 + }, + { + "epoch": 2.41, + "learning_rate": 2.586013254860959e-05, + "loss": 0.2133, + "step": 429600 + }, + { + "epoch": 2.42, + "learning_rate": 2.5854511318347136e-05, + "loss": 0.2083, + "step": 429700 + }, + { + "epoch": 2.42, + "learning_rate": 2.5848890088084683e-05, + "loss": 0.211, + "step": 429800 + }, + { + "epoch": 2.42, + "learning_rate": 2.5843268857822222e-05, + "loss": 0.2082, + "step": 429900 + }, + { + "epoch": 2.42, + "learning_rate": 2.583764762755977e-05, + "loss": 0.2185, + "step": 430000 + }, + { + "epoch": 2.42, + "learning_rate": 2.5832026397297315e-05, + "loss": 0.2151, + "step": 430100 + }, + { + "epoch": 2.42, + "learning_rate": 2.5826405167034857e-05, + "loss": 0.2117, + "step": 430200 + }, + { + "epoch": 2.42, + "learning_rate": 2.5820783936772404e-05, + "loss": 0.2115, + "step": 430300 + }, + { + "epoch": 2.42, + "learning_rate": 2.581516270650995e-05, + "loss": 0.2097, + "step": 430400 + }, + { + "epoch": 2.42, + "learning_rate": 2.5809541476247496e-05, + "loss": 0.2125, + "step": 430500 + }, + { + "epoch": 2.42, + "learning_rate": 2.5803920245985036e-05, + "loss": 0.2158, + "step": 430600 + }, + { + "epoch": 2.42, + "learning_rate": 2.5798299015722582e-05, + "loss": 0.213, + "step": 430700 + }, + { + "epoch": 2.42, + "learning_rate": 2.579267778546013e-05, + "loss": 0.2136, + "step": 430800 + }, + { + "epoch": 2.42, + "learning_rate": 2.5787056555197668e-05, + "loss": 0.2113, + "step": 430900 + }, + { + "epoch": 2.42, + "learning_rate": 2.5781435324935214e-05, + "loss": 0.2058, + "step": 431000 + }, + { + "epoch": 2.42, + "learning_rate": 2.5775814094672764e-05, + "loss": 0.2103, + "step": 431100 + }, + { + "epoch": 2.42, + "learning_rate": 2.577019286441031e-05, + "loss": 0.2151, + "step": 431200 + }, + { + "epoch": 2.42, + "learning_rate": 2.576457163414785e-05, + "loss": 0.2137, + "step": 431300 + }, + { + "epoch": 2.42, + "learning_rate": 2.5758950403885396e-05, + "loss": 0.2074, + "step": 431400 + }, + { + "epoch": 2.43, + "learning_rate": 2.5753329173622942e-05, + "loss": 0.2188, + "step": 431500 + }, + { + "epoch": 2.43, + "learning_rate": 2.574770794336048e-05, + "loss": 0.2134, + "step": 431600 + }, + { + "epoch": 2.43, + "learning_rate": 2.5742086713098028e-05, + "loss": 0.212, + "step": 431700 + }, + { + "epoch": 2.43, + "learning_rate": 2.5736465482835574e-05, + "loss": 0.2079, + "step": 431800 + }, + { + "epoch": 2.43, + "learning_rate": 2.573084425257312e-05, + "loss": 0.2109, + "step": 431900 + }, + { + "epoch": 2.43, + "learning_rate": 2.5725223022310663e-05, + "loss": 0.2143, + "step": 432000 + }, + { + "epoch": 2.43, + "learning_rate": 2.571960179204821e-05, + "loss": 0.2118, + "step": 432100 + }, + { + "epoch": 2.43, + "learning_rate": 2.5713980561785756e-05, + "loss": 0.2069, + "step": 432200 + }, + { + "epoch": 2.43, + "learning_rate": 2.5708359331523295e-05, + "loss": 0.2063, + "step": 432300 + }, + { + "epoch": 2.43, + "learning_rate": 2.5702738101260842e-05, + "loss": 0.2145, + "step": 432400 + }, + { + "epoch": 2.43, + "learning_rate": 2.5697116870998388e-05, + "loss": 0.211, + "step": 432500 + }, + { + "epoch": 2.43, + "learning_rate": 2.5691495640735934e-05, + "loss": 0.21, + "step": 432600 + }, + { + "epoch": 2.43, + "learning_rate": 2.5685874410473477e-05, + "loss": 0.2123, + "step": 432700 + }, + { + "epoch": 2.43, + "learning_rate": 2.5680253180211023e-05, + "loss": 0.2062, + "step": 432800 + }, + { + "epoch": 2.43, + "learning_rate": 2.567463194994857e-05, + "loss": 0.2112, + "step": 432900 + }, + { + "epoch": 2.43, + "learning_rate": 2.566901071968611e-05, + "loss": 0.2086, + "step": 433000 + }, + { + "epoch": 2.43, + "learning_rate": 2.5663389489423655e-05, + "loss": 0.2089, + "step": 433100 + }, + { + "epoch": 2.44, + "learning_rate": 2.5657768259161202e-05, + "loss": 0.2149, + "step": 433200 + }, + { + "epoch": 2.44, + "learning_rate": 2.5652147028898748e-05, + "loss": 0.2136, + "step": 433300 + }, + { + "epoch": 2.44, + "learning_rate": 2.564652579863629e-05, + "loss": 0.2177, + "step": 433400 + }, + { + "epoch": 2.44, + "learning_rate": 2.5640904568373837e-05, + "loss": 0.2167, + "step": 433500 + }, + { + "epoch": 2.44, + "learning_rate": 2.5635283338111383e-05, + "loss": 0.2186, + "step": 433600 + }, + { + "epoch": 2.44, + "learning_rate": 2.5629662107848923e-05, + "loss": 0.2095, + "step": 433700 + }, + { + "epoch": 2.44, + "learning_rate": 2.562404087758647e-05, + "loss": 0.2103, + "step": 433800 + }, + { + "epoch": 2.44, + "learning_rate": 2.5618419647324016e-05, + "loss": 0.2035, + "step": 433900 + }, + { + "epoch": 2.44, + "learning_rate": 2.5612854629364187e-05, + "loss": 0.2128, + "step": 434000 + }, + { + "epoch": 2.44, + "learning_rate": 2.5607233399101726e-05, + "loss": 0.2166, + "step": 434100 + }, + { + "epoch": 2.44, + "learning_rate": 2.5601612168839272e-05, + "loss": 0.2129, + "step": 434200 + }, + { + "epoch": 2.44, + "learning_rate": 2.559599093857682e-05, + "loss": 0.2079, + "step": 434300 + }, + { + "epoch": 2.44, + "learning_rate": 2.5590369708314365e-05, + "loss": 0.2145, + "step": 434400 + }, + { + "epoch": 2.44, + "learning_rate": 2.5584748478051908e-05, + "loss": 0.2121, + "step": 434500 + }, + { + "epoch": 2.44, + "learning_rate": 2.5579127247789454e-05, + "loss": 0.2081, + "step": 434600 + }, + { + "epoch": 2.44, + "learning_rate": 2.5573506017527e-05, + "loss": 0.2219, + "step": 434700 + }, + { + "epoch": 2.44, + "learning_rate": 2.556788478726454e-05, + "loss": 0.2162, + "step": 434800 + }, + { + "epoch": 2.44, + "learning_rate": 2.5562263557002086e-05, + "loss": 0.2135, + "step": 434900 + }, + { + "epoch": 2.45, + "learning_rate": 2.5556642326739632e-05, + "loss": 0.2173, + "step": 435000 + }, + { + "epoch": 2.45, + "learning_rate": 2.5551021096477172e-05, + "loss": 0.2135, + "step": 435100 + }, + { + "epoch": 2.45, + "learning_rate": 2.5545399866214718e-05, + "loss": 0.2091, + "step": 435200 + }, + { + "epoch": 2.45, + "learning_rate": 2.5539778635952268e-05, + "loss": 0.2125, + "step": 435300 + }, + { + "epoch": 2.45, + "learning_rate": 2.5534213617992435e-05, + "loss": 0.2141, + "step": 435400 + }, + { + "epoch": 2.45, + "learning_rate": 2.552859238772998e-05, + "loss": 0.2134, + "step": 435500 + }, + { + "epoch": 2.45, + "learning_rate": 2.5522971157467525e-05, + "loss": 0.2088, + "step": 435600 + }, + { + "epoch": 2.45, + "learning_rate": 2.551734992720507e-05, + "loss": 0.2143, + "step": 435700 + }, + { + "epoch": 2.45, + "learning_rate": 2.5511728696942617e-05, + "loss": 0.2148, + "step": 435800 + }, + { + "epoch": 2.45, + "learning_rate": 2.5506107466680157e-05, + "loss": 0.2131, + "step": 435900 + }, + { + "epoch": 2.45, + "learning_rate": 2.5500486236417703e-05, + "loss": 0.2076, + "step": 436000 + }, + { + "epoch": 2.45, + "learning_rate": 2.549486500615525e-05, + "loss": 0.215, + "step": 436100 + }, + { + "epoch": 2.45, + "learning_rate": 2.5489243775892792e-05, + "loss": 0.2104, + "step": 436200 + }, + { + "epoch": 2.45, + "learning_rate": 2.548362254563034e-05, + "loss": 0.2215, + "step": 436300 + }, + { + "epoch": 2.45, + "learning_rate": 2.5478001315367885e-05, + "loss": 0.2099, + "step": 436400 + }, + { + "epoch": 2.45, + "learning_rate": 2.547238008510543e-05, + "loss": 0.2108, + "step": 436500 + }, + { + "epoch": 2.45, + "learning_rate": 2.546675885484297e-05, + "loss": 0.216, + "step": 436600 + }, + { + "epoch": 2.45, + "learning_rate": 2.5461137624580517e-05, + "loss": 0.2143, + "step": 436700 + }, + { + "epoch": 2.46, + "learning_rate": 2.5455516394318063e-05, + "loss": 0.2187, + "step": 436800 + }, + { + "epoch": 2.46, + "learning_rate": 2.5449895164055603e-05, + "loss": 0.2049, + "step": 436900 + }, + { + "epoch": 2.46, + "learning_rate": 2.5444330146095774e-05, + "loss": 0.2059, + "step": 437000 + }, + { + "epoch": 2.46, + "learning_rate": 2.543870891583332e-05, + "loss": 0.2156, + "step": 437100 + }, + { + "epoch": 2.46, + "learning_rate": 2.5433087685570866e-05, + "loss": 0.2104, + "step": 437200 + }, + { + "epoch": 2.46, + "learning_rate": 2.542746645530841e-05, + "loss": 0.2164, + "step": 437300 + }, + { + "epoch": 2.46, + "learning_rate": 2.5421845225045955e-05, + "loss": 0.2079, + "step": 437400 + }, + { + "epoch": 2.46, + "learning_rate": 2.54162239947835e-05, + "loss": 0.216, + "step": 437500 + }, + { + "epoch": 2.46, + "learning_rate": 2.5410602764521048e-05, + "loss": 0.2103, + "step": 437600 + }, + { + "epoch": 2.46, + "learning_rate": 2.5404981534258587e-05, + "loss": 0.2123, + "step": 437700 + }, + { + "epoch": 2.46, + "learning_rate": 2.5399360303996134e-05, + "loss": 0.2125, + "step": 437800 + }, + { + "epoch": 2.46, + "learning_rate": 2.539373907373368e-05, + "loss": 0.2079, + "step": 437900 + }, + { + "epoch": 2.46, + "learning_rate": 2.538811784347122e-05, + "loss": 0.2063, + "step": 438000 + }, + { + "epoch": 2.46, + "learning_rate": 2.5382496613208766e-05, + "loss": 0.2094, + "step": 438100 + }, + { + "epoch": 2.46, + "learning_rate": 2.5376875382946312e-05, + "loss": 0.2118, + "step": 438200 + }, + { + "epoch": 2.46, + "learning_rate": 2.5371254152683858e-05, + "loss": 0.2137, + "step": 438300 + }, + { + "epoch": 2.46, + "learning_rate": 2.53656329224214e-05, + "loss": 0.2109, + "step": 438400 + }, + { + "epoch": 2.46, + "learning_rate": 2.5360011692158947e-05, + "loss": 0.2044, + "step": 438500 + }, + { + "epoch": 2.47, + "learning_rate": 2.5354390461896494e-05, + "loss": 0.2098, + "step": 438600 + }, + { + "epoch": 2.47, + "learning_rate": 2.5348769231634033e-05, + "loss": 0.2139, + "step": 438700 + }, + { + "epoch": 2.47, + "learning_rate": 2.534314800137158e-05, + "loss": 0.2104, + "step": 438800 + }, + { + "epoch": 2.47, + "learning_rate": 2.5337526771109126e-05, + "loss": 0.212, + "step": 438900 + }, + { + "epoch": 2.47, + "learning_rate": 2.5331905540846672e-05, + "loss": 0.2077, + "step": 439000 + }, + { + "epoch": 2.47, + "learning_rate": 2.5326284310584215e-05, + "loss": 0.2135, + "step": 439100 + }, + { + "epoch": 2.47, + "learning_rate": 2.532066308032176e-05, + "loss": 0.2062, + "step": 439200 + }, + { + "epoch": 2.47, + "learning_rate": 2.5315041850059307e-05, + "loss": 0.2138, + "step": 439300 + }, + { + "epoch": 2.47, + "learning_rate": 2.5309420619796847e-05, + "loss": 0.2164, + "step": 439400 + }, + { + "epoch": 2.47, + "learning_rate": 2.5303799389534393e-05, + "loss": 0.2085, + "step": 439500 + }, + { + "epoch": 2.47, + "learning_rate": 2.529817815927194e-05, + "loss": 0.2197, + "step": 439600 + }, + { + "epoch": 2.47, + "learning_rate": 2.5292556929009486e-05, + "loss": 0.2154, + "step": 439700 + }, + { + "epoch": 2.47, + "learning_rate": 2.528693569874703e-05, + "loss": 0.202, + "step": 439800 + }, + { + "epoch": 2.47, + "learning_rate": 2.5281314468484575e-05, + "loss": 0.2125, + "step": 439900 + }, + { + "epoch": 2.47, + "learning_rate": 2.527569323822212e-05, + "loss": 0.2094, + "step": 440000 + }, + { + "epoch": 2.47, + "learning_rate": 2.527007200795966e-05, + "loss": 0.2141, + "step": 440100 + }, + { + "epoch": 2.47, + "learning_rate": 2.5264450777697207e-05, + "loss": 0.207, + "step": 440200 + }, + { + "epoch": 2.48, + "learning_rate": 2.5258829547434753e-05, + "loss": 0.2114, + "step": 440300 + }, + { + "epoch": 2.48, + "learning_rate": 2.52532083171723e-05, + "loss": 0.2137, + "step": 440400 + }, + { + "epoch": 2.48, + "learning_rate": 2.5247587086909842e-05, + "loss": 0.211, + "step": 440500 + }, + { + "epoch": 2.48, + "learning_rate": 2.524196585664739e-05, + "loss": 0.2122, + "step": 440600 + }, + { + "epoch": 2.48, + "learning_rate": 2.5236344626384935e-05, + "loss": 0.213, + "step": 440700 + }, + { + "epoch": 2.48, + "learning_rate": 2.5230723396122474e-05, + "loss": 0.2143, + "step": 440800 + }, + { + "epoch": 2.48, + "learning_rate": 2.522510216586002e-05, + "loss": 0.2143, + "step": 440900 + }, + { + "epoch": 2.48, + "learning_rate": 2.5219480935597567e-05, + "loss": 0.2163, + "step": 441000 + }, + { + "epoch": 2.48, + "learning_rate": 2.5213859705335113e-05, + "loss": 0.2152, + "step": 441100 + }, + { + "epoch": 2.48, + "learning_rate": 2.5208294687375278e-05, + "loss": 0.2115, + "step": 441200 + }, + { + "epoch": 2.48, + "learning_rate": 2.5202673457112824e-05, + "loss": 0.2132, + "step": 441300 + }, + { + "epoch": 2.48, + "learning_rate": 2.519705222685037e-05, + "loss": 0.2143, + "step": 441400 + }, + { + "epoch": 2.48, + "learning_rate": 2.5191430996587916e-05, + "loss": 0.2146, + "step": 441500 + }, + { + "epoch": 2.48, + "learning_rate": 2.518580976632546e-05, + "loss": 0.2138, + "step": 441600 + }, + { + "epoch": 2.48, + "learning_rate": 2.5180188536063006e-05, + "loss": 0.2067, + "step": 441700 + }, + { + "epoch": 2.48, + "learning_rate": 2.5174567305800552e-05, + "loss": 0.2153, + "step": 441800 + }, + { + "epoch": 2.48, + "learning_rate": 2.516894607553809e-05, + "loss": 0.203, + "step": 441900 + }, + { + "epoch": 2.48, + "learning_rate": 2.5163324845275638e-05, + "loss": 0.2098, + "step": 442000 + }, + { + "epoch": 2.49, + "learning_rate": 2.5157703615013184e-05, + "loss": 0.2042, + "step": 442100 + }, + { + "epoch": 2.49, + "learning_rate": 2.515208238475073e-05, + "loss": 0.2129, + "step": 442200 + }, + { + "epoch": 2.49, + "learning_rate": 2.514646115448827e-05, + "loss": 0.2138, + "step": 442300 + }, + { + "epoch": 2.49, + "learning_rate": 2.5140839924225816e-05, + "loss": 0.2118, + "step": 442400 + }, + { + "epoch": 2.49, + "learning_rate": 2.5135218693963362e-05, + "loss": 0.2146, + "step": 442500 + }, + { + "epoch": 2.49, + "learning_rate": 2.5129597463700905e-05, + "loss": 0.2088, + "step": 442600 + }, + { + "epoch": 2.49, + "learning_rate": 2.512397623343845e-05, + "loss": 0.2085, + "step": 442700 + }, + { + "epoch": 2.49, + "learning_rate": 2.5118355003175998e-05, + "loss": 0.2067, + "step": 442800 + }, + { + "epoch": 2.49, + "learning_rate": 2.5112733772913544e-05, + "loss": 0.2128, + "step": 442900 + }, + { + "epoch": 2.49, + "learning_rate": 2.5107112542651083e-05, + "loss": 0.2124, + "step": 443000 + }, + { + "epoch": 2.49, + "learning_rate": 2.510149131238863e-05, + "loss": 0.2066, + "step": 443100 + }, + { + "epoch": 2.49, + "learning_rate": 2.5095870082126176e-05, + "loss": 0.2039, + "step": 443200 + }, + { + "epoch": 2.49, + "learning_rate": 2.509024885186372e-05, + "loss": 0.2146, + "step": 443300 + }, + { + "epoch": 2.49, + "learning_rate": 2.5084627621601265e-05, + "loss": 0.2067, + "step": 443400 + }, + { + "epoch": 2.49, + "learning_rate": 2.507900639133881e-05, + "loss": 0.2167, + "step": 443500 + }, + { + "epoch": 2.49, + "learning_rate": 2.5073385161076358e-05, + "loss": 0.2106, + "step": 443600 + }, + { + "epoch": 2.49, + "learning_rate": 2.5067763930813897e-05, + "loss": 0.2093, + "step": 443700 + }, + { + "epoch": 2.49, + "learning_rate": 2.5062142700551443e-05, + "loss": 0.2094, + "step": 443800 + }, + { + "epoch": 2.5, + "learning_rate": 2.505652147028899e-05, + "loss": 0.2156, + "step": 443900 + }, + { + "epoch": 2.5, + "learning_rate": 2.5050900240026533e-05, + "loss": 0.2103, + "step": 444000 + }, + { + "epoch": 2.5, + "learning_rate": 2.504527900976408e-05, + "loss": 0.2126, + "step": 444100 + }, + { + "epoch": 2.5, + "learning_rate": 2.5039657779501625e-05, + "loss": 0.2113, + "step": 444200 + }, + { + "epoch": 2.5, + "learning_rate": 2.503403654923917e-05, + "loss": 0.2021, + "step": 444300 + }, + { + "epoch": 2.5, + "learning_rate": 2.502841531897671e-05, + "loss": 0.2096, + "step": 444400 + }, + { + "epoch": 2.5, + "learning_rate": 2.5022794088714257e-05, + "loss": 0.2164, + "step": 444500 + }, + { + "epoch": 2.5, + "learning_rate": 2.5017172858451804e-05, + "loss": 0.2052, + "step": 444600 + }, + { + "epoch": 2.5, + "learning_rate": 2.5011551628189346e-05, + "loss": 0.2068, + "step": 444700 + }, + { + "epoch": 2.5, + "learning_rate": 2.5005930397926893e-05, + "loss": 0.2055, + "step": 444800 + }, + { + "epoch": 2.5, + "learning_rate": 2.500030916766444e-05, + "loss": 0.2072, + "step": 444900 + }, + { + "epoch": 2.5, + "learning_rate": 2.4994687937401982e-05, + "loss": 0.2086, + "step": 445000 + }, + { + "epoch": 2.5, + "learning_rate": 2.4989066707139525e-05, + "loss": 0.2141, + "step": 445100 + }, + { + "epoch": 2.5, + "learning_rate": 2.498344547687707e-05, + "loss": 0.2108, + "step": 445200 + }, + { + "epoch": 2.5, + "learning_rate": 2.4977824246614614e-05, + "loss": 0.2161, + "step": 445300 + }, + { + "epoch": 2.5, + "learning_rate": 2.497220301635216e-05, + "loss": 0.2074, + "step": 445400 + }, + { + "epoch": 2.5, + "learning_rate": 2.4966581786089703e-05, + "loss": 0.2146, + "step": 445500 + }, + { + "epoch": 2.5, + "learning_rate": 2.496096055582725e-05, + "loss": 0.2116, + "step": 445600 + }, + { + "epoch": 2.51, + "learning_rate": 2.4955339325564796e-05, + "loss": 0.2121, + "step": 445700 + }, + { + "epoch": 2.51, + "learning_rate": 2.494971809530234e-05, + "loss": 0.2147, + "step": 445800 + }, + { + "epoch": 2.51, + "learning_rate": 2.4944096865039885e-05, + "loss": 0.2048, + "step": 445900 + }, + { + "epoch": 2.51, + "learning_rate": 2.4938475634777428e-05, + "loss": 0.2137, + "step": 446000 + }, + { + "epoch": 2.51, + "learning_rate": 2.4932854404514974e-05, + "loss": 0.2155, + "step": 446100 + }, + { + "epoch": 2.51, + "learning_rate": 2.4927233174252517e-05, + "loss": 0.2098, + "step": 446200 + }, + { + "epoch": 2.51, + "learning_rate": 2.4921611943990063e-05, + "loss": 0.2086, + "step": 446300 + }, + { + "epoch": 2.51, + "learning_rate": 2.491599071372761e-05, + "loss": 0.209, + "step": 446400 + }, + { + "epoch": 2.51, + "learning_rate": 2.4910369483465152e-05, + "loss": 0.2157, + "step": 446500 + }, + { + "epoch": 2.51, + "learning_rate": 2.49047482532027e-05, + "loss": 0.2123, + "step": 446600 + }, + { + "epoch": 2.51, + "learning_rate": 2.489912702294024e-05, + "loss": 0.2115, + "step": 446700 + }, + { + "epoch": 2.51, + "learning_rate": 2.4893505792677788e-05, + "loss": 0.2061, + "step": 446800 + }, + { + "epoch": 2.51, + "learning_rate": 2.488788456241533e-05, + "loss": 0.2103, + "step": 446900 + }, + { + "epoch": 2.51, + "learning_rate": 2.4882263332152877e-05, + "loss": 0.2096, + "step": 447000 + }, + { + "epoch": 2.51, + "learning_rate": 2.4876698314193045e-05, + "loss": 0.2105, + "step": 447100 + }, + { + "epoch": 2.51, + "learning_rate": 2.487107708393059e-05, + "loss": 0.2198, + "step": 447200 + }, + { + "epoch": 2.51, + "learning_rate": 2.4865455853668134e-05, + "loss": 0.2101, + "step": 447300 + }, + { + "epoch": 2.51, + "learning_rate": 2.485983462340568e-05, + "loss": 0.211, + "step": 447400 + }, + { + "epoch": 2.52, + "learning_rate": 2.4854213393143226e-05, + "loss": 0.2108, + "step": 447500 + }, + { + "epoch": 2.52, + "learning_rate": 2.484859216288077e-05, + "loss": 0.2139, + "step": 447600 + }, + { + "epoch": 2.52, + "learning_rate": 2.4842970932618315e-05, + "loss": 0.2124, + "step": 447700 + }, + { + "epoch": 2.52, + "learning_rate": 2.483734970235586e-05, + "loss": 0.2112, + "step": 447800 + }, + { + "epoch": 2.52, + "learning_rate": 2.4831728472093405e-05, + "loss": 0.2147, + "step": 447900 + }, + { + "epoch": 2.52, + "learning_rate": 2.4826107241830947e-05, + "loss": 0.2132, + "step": 448000 + }, + { + "epoch": 2.52, + "learning_rate": 2.482048601156849e-05, + "loss": 0.2109, + "step": 448100 + }, + { + "epoch": 2.52, + "learning_rate": 2.4814864781306037e-05, + "loss": 0.2087, + "step": 448200 + }, + { + "epoch": 2.52, + "learning_rate": 2.4809243551043583e-05, + "loss": 0.2089, + "step": 448300 + }, + { + "epoch": 2.52, + "learning_rate": 2.480362232078113e-05, + "loss": 0.2133, + "step": 448400 + }, + { + "epoch": 2.52, + "learning_rate": 2.4798001090518672e-05, + "loss": 0.2079, + "step": 448500 + }, + { + "epoch": 2.52, + "learning_rate": 2.479237986025622e-05, + "loss": 0.209, + "step": 448600 + }, + { + "epoch": 2.52, + "learning_rate": 2.478675862999376e-05, + "loss": 0.2077, + "step": 448700 + }, + { + "epoch": 2.52, + "learning_rate": 2.4781137399731304e-05, + "loss": 0.2143, + "step": 448800 + }, + { + "epoch": 2.52, + "learning_rate": 2.477551616946885e-05, + "loss": 0.2138, + "step": 448900 + }, + { + "epoch": 2.52, + "learning_rate": 2.4769894939206397e-05, + "loss": 0.2078, + "step": 449000 + }, + { + "epoch": 2.52, + "learning_rate": 2.4764273708943943e-05, + "loss": 0.2096, + "step": 449100 + }, + { + "epoch": 2.53, + "learning_rate": 2.4758708690984107e-05, + "loss": 0.2106, + "step": 449200 + }, + { + "epoch": 2.53, + "learning_rate": 2.4753087460721653e-05, + "loss": 0.2033, + "step": 449300 + }, + { + "epoch": 2.53, + "learning_rate": 2.47474662304592e-05, + "loss": 0.2115, + "step": 449400 + }, + { + "epoch": 2.53, + "learning_rate": 2.4741845000196746e-05, + "loss": 0.2082, + "step": 449500 + }, + { + "epoch": 2.53, + "learning_rate": 2.473622376993429e-05, + "loss": 0.2139, + "step": 449600 + }, + { + "epoch": 2.53, + "learning_rate": 2.4730602539671835e-05, + "loss": 0.2079, + "step": 449700 + }, + { + "epoch": 2.53, + "learning_rate": 2.4724981309409378e-05, + "loss": 0.2119, + "step": 449800 + }, + { + "epoch": 2.53, + "learning_rate": 2.471936007914692e-05, + "loss": 0.2136, + "step": 449900 + }, + { + "epoch": 2.53, + "learning_rate": 2.4713738848884467e-05, + "loss": 0.219, + "step": 450000 + }, + { + "epoch": 2.53, + "learning_rate": 2.4708117618622014e-05, + "loss": 0.2133, + "step": 450100 + }, + { + "epoch": 2.53, + "learning_rate": 2.470249638835956e-05, + "loss": 0.214, + "step": 450200 + }, + { + "epoch": 2.53, + "learning_rate": 2.4696875158097103e-05, + "loss": 0.2106, + "step": 450300 + }, + { + "epoch": 2.53, + "learning_rate": 2.469125392783465e-05, + "loss": 0.208, + "step": 450400 + }, + { + "epoch": 2.53, + "learning_rate": 2.4685632697572192e-05, + "loss": 0.2086, + "step": 450500 + }, + { + "epoch": 2.53, + "learning_rate": 2.4680011467309735e-05, + "loss": 0.2151, + "step": 450600 + }, + { + "epoch": 2.53, + "learning_rate": 2.467439023704728e-05, + "loss": 0.2173, + "step": 450700 + }, + { + "epoch": 2.53, + "learning_rate": 2.4668769006784824e-05, + "loss": 0.2124, + "step": 450800 + }, + { + "epoch": 2.53, + "learning_rate": 2.466314777652237e-05, + "loss": 0.2064, + "step": 450900 + }, + { + "epoch": 2.54, + "learning_rate": 2.4657526546259916e-05, + "loss": 0.2087, + "step": 451000 + }, + { + "epoch": 2.54, + "learning_rate": 2.4651905315997463e-05, + "loss": 0.21, + "step": 451100 + }, + { + "epoch": 2.54, + "learning_rate": 2.4646284085735006e-05, + "loss": 0.2018, + "step": 451200 + }, + { + "epoch": 2.54, + "learning_rate": 2.4640719067775177e-05, + "loss": 0.2095, + "step": 451300 + }, + { + "epoch": 2.54, + "learning_rate": 2.463509783751272e-05, + "loss": 0.2078, + "step": 451400 + }, + { + "epoch": 2.54, + "learning_rate": 2.4629476607250266e-05, + "loss": 0.208, + "step": 451500 + }, + { + "epoch": 2.54, + "learning_rate": 2.462385537698781e-05, + "loss": 0.2086, + "step": 451600 + }, + { + "epoch": 2.54, + "learning_rate": 2.461823414672535e-05, + "loss": 0.2065, + "step": 451700 + }, + { + "epoch": 2.54, + "learning_rate": 2.4612612916462898e-05, + "loss": 0.2095, + "step": 451800 + }, + { + "epoch": 2.54, + "learning_rate": 2.460699168620044e-05, + "loss": 0.2044, + "step": 451900 + }, + { + "epoch": 2.54, + "learning_rate": 2.4601370455937987e-05, + "loss": 0.21, + "step": 452000 + }, + { + "epoch": 2.54, + "learning_rate": 2.4595749225675533e-05, + "loss": 0.2092, + "step": 452100 + }, + { + "epoch": 2.54, + "learning_rate": 2.459012799541308e-05, + "loss": 0.2085, + "step": 452200 + }, + { + "epoch": 2.54, + "learning_rate": 2.4584506765150622e-05, + "loss": 0.2085, + "step": 452300 + }, + { + "epoch": 2.54, + "learning_rate": 2.4578885534888165e-05, + "loss": 0.2098, + "step": 452400 + }, + { + "epoch": 2.54, + "learning_rate": 2.457326430462571e-05, + "loss": 0.2133, + "step": 452500 + }, + { + "epoch": 2.54, + "learning_rate": 2.4567643074363255e-05, + "loss": 0.208, + "step": 452600 + }, + { + "epoch": 2.54, + "learning_rate": 2.45620218441008e-05, + "loss": 0.2142, + "step": 452700 + }, + { + "epoch": 2.55, + "learning_rate": 2.4556400613838347e-05, + "loss": 0.209, + "step": 452800 + }, + { + "epoch": 2.55, + "learning_rate": 2.455077938357589e-05, + "loss": 0.2126, + "step": 452900 + }, + { + "epoch": 2.55, + "learning_rate": 2.4545158153313436e-05, + "loss": 0.2081, + "step": 453000 + }, + { + "epoch": 2.55, + "learning_rate": 2.453953692305098e-05, + "loss": 0.212, + "step": 453100 + }, + { + "epoch": 2.55, + "learning_rate": 2.4533915692788525e-05, + "loss": 0.1996, + "step": 453200 + }, + { + "epoch": 2.55, + "learning_rate": 2.452829446252607e-05, + "loss": 0.217, + "step": 453300 + }, + { + "epoch": 2.55, + "learning_rate": 2.4522673232263615e-05, + "loss": 0.2084, + "step": 453400 + }, + { + "epoch": 2.55, + "learning_rate": 2.4517052002001157e-05, + "loss": 0.202, + "step": 453500 + }, + { + "epoch": 2.55, + "learning_rate": 2.4511430771738704e-05, + "loss": 0.216, + "step": 453600 + }, + { + "epoch": 2.55, + "learning_rate": 2.450580954147625e-05, + "loss": 0.2084, + "step": 453700 + }, + { + "epoch": 2.55, + "learning_rate": 2.4500188311213793e-05, + "loss": 0.2167, + "step": 453800 + }, + { + "epoch": 2.55, + "learning_rate": 2.449456708095134e-05, + "loss": 0.206, + "step": 453900 + }, + { + "epoch": 2.55, + "learning_rate": 2.4488945850688882e-05, + "loss": 0.2079, + "step": 454000 + }, + { + "epoch": 2.55, + "learning_rate": 2.448332462042643e-05, + "loss": 0.2118, + "step": 454100 + }, + { + "epoch": 2.55, + "learning_rate": 2.447770339016397e-05, + "loss": 0.2106, + "step": 454200 + }, + { + "epoch": 2.55, + "learning_rate": 2.4472082159901518e-05, + "loss": 0.214, + "step": 454300 + }, + { + "epoch": 2.55, + "learning_rate": 2.4466460929639064e-05, + "loss": 0.2065, + "step": 454400 + }, + { + "epoch": 2.55, + "learning_rate": 2.4460839699376607e-05, + "loss": 0.208, + "step": 454500 + }, + { + "epoch": 2.56, + "learning_rate": 2.4455218469114153e-05, + "loss": 0.2131, + "step": 454600 + }, + { + "epoch": 2.56, + "learning_rate": 2.4449597238851696e-05, + "loss": 0.2077, + "step": 454700 + }, + { + "epoch": 2.56, + "learning_rate": 2.4443976008589242e-05, + "loss": 0.2077, + "step": 454800 + }, + { + "epoch": 2.56, + "learning_rate": 2.4438354778326785e-05, + "loss": 0.2091, + "step": 454900 + }, + { + "epoch": 2.56, + "learning_rate": 2.4432733548064328e-05, + "loss": 0.2095, + "step": 455000 + }, + { + "epoch": 2.56, + "learning_rate": 2.4427112317801874e-05, + "loss": 0.2122, + "step": 455100 + }, + { + "epoch": 2.56, + "learning_rate": 2.442149108753942e-05, + "loss": 0.2112, + "step": 455200 + }, + { + "epoch": 2.56, + "learning_rate": 2.4415869857276967e-05, + "loss": 0.2122, + "step": 455300 + }, + { + "epoch": 2.56, + "learning_rate": 2.441024862701451e-05, + "loss": 0.2151, + "step": 455400 + }, + { + "epoch": 2.56, + "learning_rate": 2.4404627396752056e-05, + "loss": 0.2146, + "step": 455500 + }, + { + "epoch": 2.56, + "learning_rate": 2.43990061664896e-05, + "loss": 0.2064, + "step": 455600 + }, + { + "epoch": 2.56, + "learning_rate": 2.4393384936227142e-05, + "loss": 0.212, + "step": 455700 + }, + { + "epoch": 2.56, + "learning_rate": 2.4387763705964688e-05, + "loss": 0.214, + "step": 455800 + }, + { + "epoch": 2.56, + "learning_rate": 2.4382142475702234e-05, + "loss": 0.2106, + "step": 455900 + }, + { + "epoch": 2.56, + "learning_rate": 2.437652124543978e-05, + "loss": 0.2097, + "step": 456000 + }, + { + "epoch": 2.56, + "learning_rate": 2.4370900015177323e-05, + "loss": 0.2108, + "step": 456100 + }, + { + "epoch": 2.56, + "learning_rate": 2.436533499721749e-05, + "loss": 0.2085, + "step": 456200 + }, + { + "epoch": 2.56, + "learning_rate": 2.4359713766955037e-05, + "loss": 0.2076, + "step": 456300 + }, + { + "epoch": 2.57, + "learning_rate": 2.4354092536692584e-05, + "loss": 0.2045, + "step": 456400 + }, + { + "epoch": 2.57, + "learning_rate": 2.4348471306430127e-05, + "loss": 0.2146, + "step": 456500 + }, + { + "epoch": 2.57, + "learning_rate": 2.4342850076167673e-05, + "loss": 0.2064, + "step": 456600 + }, + { + "epoch": 2.57, + "learning_rate": 2.4337228845905216e-05, + "loss": 0.2153, + "step": 456700 + }, + { + "epoch": 2.57, + "learning_rate": 2.433160761564276e-05, + "loss": 0.2073, + "step": 456800 + }, + { + "epoch": 2.57, + "learning_rate": 2.4325986385380305e-05, + "loss": 0.2103, + "step": 456900 + }, + { + "epoch": 2.57, + "learning_rate": 2.432036515511785e-05, + "loss": 0.2068, + "step": 457000 + }, + { + "epoch": 2.57, + "learning_rate": 2.4314743924855397e-05, + "loss": 0.2142, + "step": 457100 + }, + { + "epoch": 2.57, + "learning_rate": 2.430917890689556e-05, + "loss": 0.2011, + "step": 457200 + }, + { + "epoch": 2.57, + "learning_rate": 2.430355767663311e-05, + "loss": 0.2088, + "step": 457300 + }, + { + "epoch": 2.57, + "learning_rate": 2.4297936446370654e-05, + "loss": 0.2102, + "step": 457400 + }, + { + "epoch": 2.57, + "learning_rate": 2.42923152161082e-05, + "loss": 0.2162, + "step": 457500 + }, + { + "epoch": 2.57, + "learning_rate": 2.4286693985845743e-05, + "loss": 0.2077, + "step": 457600 + }, + { + "epoch": 2.57, + "learning_rate": 2.4281072755583286e-05, + "loss": 0.2105, + "step": 457700 + }, + { + "epoch": 2.57, + "learning_rate": 2.4275451525320833e-05, + "loss": 0.2139, + "step": 457800 + }, + { + "epoch": 2.57, + "learning_rate": 2.4269830295058375e-05, + "loss": 0.2114, + "step": 457900 + }, + { + "epoch": 2.57, + "learning_rate": 2.426420906479592e-05, + "loss": 0.2106, + "step": 458000 + }, + { + "epoch": 2.58, + "learning_rate": 2.4258587834533468e-05, + "loss": 0.2069, + "step": 458100 + }, + { + "epoch": 2.58, + "learning_rate": 2.4252966604271014e-05, + "loss": 0.2105, + "step": 458200 + }, + { + "epoch": 2.58, + "learning_rate": 2.4247345374008557e-05, + "loss": 0.2099, + "step": 458300 + }, + { + "epoch": 2.58, + "learning_rate": 2.42417241437461e-05, + "loss": 0.212, + "step": 458400 + }, + { + "epoch": 2.58, + "learning_rate": 2.4236102913483646e-05, + "loss": 0.2079, + "step": 458500 + }, + { + "epoch": 2.58, + "learning_rate": 2.423048168322119e-05, + "loss": 0.2122, + "step": 458600 + }, + { + "epoch": 2.58, + "learning_rate": 2.4224860452958735e-05, + "loss": 0.2086, + "step": 458700 + }, + { + "epoch": 2.58, + "learning_rate": 2.421923922269628e-05, + "loss": 0.2057, + "step": 458800 + }, + { + "epoch": 2.58, + "learning_rate": 2.4213617992433828e-05, + "loss": 0.2104, + "step": 458900 + }, + { + "epoch": 2.58, + "learning_rate": 2.420799676217137e-05, + "loss": 0.2095, + "step": 459000 + }, + { + "epoch": 2.58, + "learning_rate": 2.420243174421154e-05, + "loss": 0.2146, + "step": 459100 + }, + { + "epoch": 2.58, + "learning_rate": 2.4196810513949085e-05, + "loss": 0.211, + "step": 459200 + }, + { + "epoch": 2.58, + "learning_rate": 2.419118928368663e-05, + "loss": 0.2106, + "step": 459300 + }, + { + "epoch": 2.58, + "learning_rate": 2.4185568053424174e-05, + "loss": 0.2071, + "step": 459400 + }, + { + "epoch": 2.58, + "learning_rate": 2.4179946823161717e-05, + "loss": 0.2081, + "step": 459500 + }, + { + "epoch": 2.58, + "learning_rate": 2.4174325592899263e-05, + "loss": 0.2087, + "step": 459600 + }, + { + "epoch": 2.58, + "learning_rate": 2.4168704362636806e-05, + "loss": 0.2054, + "step": 459700 + }, + { + "epoch": 2.58, + "learning_rate": 2.4163083132374352e-05, + "loss": 0.2108, + "step": 459800 + }, + { + "epoch": 2.59, + "learning_rate": 2.41574619021119e-05, + "loss": 0.2105, + "step": 459900 + }, + { + "epoch": 2.59, + "learning_rate": 2.4151840671849445e-05, + "loss": 0.2045, + "step": 460000 + }, + { + "epoch": 2.59, + "learning_rate": 2.4146219441586988e-05, + "loss": 0.2112, + "step": 460100 + }, + { + "epoch": 2.59, + "learning_rate": 2.414059821132453e-05, + "loss": 0.2121, + "step": 460200 + }, + { + "epoch": 2.59, + "learning_rate": 2.4134976981062077e-05, + "loss": 0.215, + "step": 460300 + }, + { + "epoch": 2.59, + "learning_rate": 2.412935575079962e-05, + "loss": 0.2099, + "step": 460400 + }, + { + "epoch": 2.59, + "learning_rate": 2.4123734520537166e-05, + "loss": 0.2093, + "step": 460500 + }, + { + "epoch": 2.59, + "learning_rate": 2.411811329027471e-05, + "loss": 0.2142, + "step": 460600 + }, + { + "epoch": 2.59, + "learning_rate": 2.4112492060012255e-05, + "loss": 0.2117, + "step": 460700 + }, + { + "epoch": 2.59, + "learning_rate": 2.41068708297498e-05, + "loss": 0.2088, + "step": 460800 + }, + { + "epoch": 2.59, + "learning_rate": 2.4101249599487344e-05, + "loss": 0.2093, + "step": 460900 + }, + { + "epoch": 2.59, + "learning_rate": 2.409562836922489e-05, + "loss": 0.2049, + "step": 461000 + }, + { + "epoch": 2.59, + "learning_rate": 2.4090007138962434e-05, + "loss": 0.2101, + "step": 461100 + }, + { + "epoch": 2.59, + "learning_rate": 2.408438590869998e-05, + "loss": 0.2066, + "step": 461200 + }, + { + "epoch": 2.59, + "learning_rate": 2.4078764678437523e-05, + "loss": 0.2067, + "step": 461300 + }, + { + "epoch": 2.59, + "learning_rate": 2.407314344817507e-05, + "loss": 0.2119, + "step": 461400 + }, + { + "epoch": 2.59, + "learning_rate": 2.4067522217912615e-05, + "loss": 0.2114, + "step": 461500 + }, + { + "epoch": 2.59, + "learning_rate": 2.4061900987650158e-05, + "loss": 0.2054, + "step": 461600 + }, + { + "epoch": 2.6, + "learning_rate": 2.4056279757387704e-05, + "loss": 0.2145, + "step": 461700 + }, + { + "epoch": 2.6, + "learning_rate": 2.4050658527125247e-05, + "loss": 0.214, + "step": 461800 + }, + { + "epoch": 2.6, + "learning_rate": 2.4045037296862794e-05, + "loss": 0.2094, + "step": 461900 + }, + { + "epoch": 2.6, + "learning_rate": 2.4039416066600337e-05, + "loss": 0.2155, + "step": 462000 + }, + { + "epoch": 2.6, + "learning_rate": 2.4033794836337883e-05, + "loss": 0.212, + "step": 462100 + }, + { + "epoch": 2.6, + "learning_rate": 2.4028173606075426e-05, + "loss": 0.2077, + "step": 462200 + }, + { + "epoch": 2.6, + "learning_rate": 2.4022552375812972e-05, + "loss": 0.2088, + "step": 462300 + }, + { + "epoch": 2.6, + "learning_rate": 2.4016931145550518e-05, + "loss": 0.2096, + "step": 462400 + }, + { + "epoch": 2.6, + "learning_rate": 2.4011366127590686e-05, + "loss": 0.2103, + "step": 462500 + }, + { + "epoch": 2.6, + "learning_rate": 2.4005744897328232e-05, + "loss": 0.2081, + "step": 462600 + }, + { + "epoch": 2.6, + "learning_rate": 2.4000123667065775e-05, + "loss": 0.2061, + "step": 462700 + }, + { + "epoch": 2.6, + "learning_rate": 2.399450243680332e-05, + "loss": 0.211, + "step": 462800 + }, + { + "epoch": 2.6, + "learning_rate": 2.3988881206540864e-05, + "loss": 0.2116, + "step": 462900 + }, + { + "epoch": 2.6, + "learning_rate": 2.398325997627841e-05, + "loss": 0.2111, + "step": 463000 + }, + { + "epoch": 2.6, + "learning_rate": 2.3977638746015953e-05, + "loss": 0.2115, + "step": 463100 + }, + { + "epoch": 2.6, + "learning_rate": 2.3972017515753496e-05, + "loss": 0.2145, + "step": 463200 + }, + { + "epoch": 2.6, + "learning_rate": 2.3966396285491043e-05, + "loss": 0.2065, + "step": 463300 + }, + { + "epoch": 2.6, + "learning_rate": 2.396077505522859e-05, + "loss": 0.2081, + "step": 463400 + }, + { + "epoch": 2.61, + "learning_rate": 2.3955153824966135e-05, + "loss": 0.2119, + "step": 463500 + }, + { + "epoch": 2.61, + "learning_rate": 2.3949532594703678e-05, + "loss": 0.214, + "step": 463600 + }, + { + "epoch": 2.61, + "learning_rate": 2.3943911364441224e-05, + "loss": 0.2046, + "step": 463700 + }, + { + "epoch": 2.61, + "learning_rate": 2.3938290134178767e-05, + "loss": 0.2059, + "step": 463800 + }, + { + "epoch": 2.61, + "learning_rate": 2.393266890391631e-05, + "loss": 0.2086, + "step": 463900 + }, + { + "epoch": 2.61, + "learning_rate": 2.3927047673653856e-05, + "loss": 0.1978, + "step": 464000 + }, + { + "epoch": 2.61, + "learning_rate": 2.3921426443391403e-05, + "loss": 0.2105, + "step": 464100 + }, + { + "epoch": 2.61, + "learning_rate": 2.391580521312895e-05, + "loss": 0.2073, + "step": 464200 + }, + { + "epoch": 2.61, + "learning_rate": 2.3910183982866492e-05, + "loss": 0.204, + "step": 464300 + }, + { + "epoch": 2.61, + "learning_rate": 2.3904562752604038e-05, + "loss": 0.2013, + "step": 464400 + }, + { + "epoch": 2.61, + "learning_rate": 2.389894152234158e-05, + "loss": 0.2065, + "step": 464500 + }, + { + "epoch": 2.61, + "learning_rate": 2.3893320292079124e-05, + "loss": 0.2093, + "step": 464600 + }, + { + "epoch": 2.61, + "learning_rate": 2.388769906181667e-05, + "loss": 0.21, + "step": 464700 + }, + { + "epoch": 2.61, + "learning_rate": 2.3882077831554213e-05, + "loss": 0.2124, + "step": 464800 + }, + { + "epoch": 2.61, + "learning_rate": 2.387645660129176e-05, + "loss": 0.214, + "step": 464900 + }, + { + "epoch": 2.61, + "learning_rate": 2.3870835371029306e-05, + "loss": 0.2125, + "step": 465000 + }, + { + "epoch": 2.61, + "learning_rate": 2.3865270353069473e-05, + "loss": 0.2049, + "step": 465100 + }, + { + "epoch": 2.61, + "learning_rate": 2.385964912280702e-05, + "loss": 0.2092, + "step": 465200 + }, + { + "epoch": 2.62, + "learning_rate": 2.3854027892544566e-05, + "loss": 0.2057, + "step": 465300 + }, + { + "epoch": 2.62, + "learning_rate": 2.384840666228211e-05, + "loss": 0.2065, + "step": 465400 + }, + { + "epoch": 2.62, + "learning_rate": 2.3842785432019655e-05, + "loss": 0.2091, + "step": 465500 + }, + { + "epoch": 2.62, + "learning_rate": 2.3837164201757198e-05, + "loss": 0.2111, + "step": 465600 + }, + { + "epoch": 2.62, + "learning_rate": 2.383154297149474e-05, + "loss": 0.2133, + "step": 465700 + }, + { + "epoch": 2.62, + "learning_rate": 2.3825921741232287e-05, + "loss": 0.2118, + "step": 465800 + }, + { + "epoch": 2.62, + "learning_rate": 2.382030051096983e-05, + "loss": 0.2092, + "step": 465900 + }, + { + "epoch": 2.62, + "learning_rate": 2.3814679280707376e-05, + "loss": 0.2118, + "step": 466000 + }, + { + "epoch": 2.62, + "learning_rate": 2.3809058050444922e-05, + "loss": 0.2051, + "step": 466100 + }, + { + "epoch": 2.62, + "learning_rate": 2.380343682018247e-05, + "loss": 0.2075, + "step": 466200 + }, + { + "epoch": 2.62, + "learning_rate": 2.379781558992001e-05, + "loss": 0.2105, + "step": 466300 + }, + { + "epoch": 2.62, + "learning_rate": 2.3792194359657554e-05, + "loss": 0.2049, + "step": 466400 + }, + { + "epoch": 2.62, + "learning_rate": 2.37865731293951e-05, + "loss": 0.2099, + "step": 466500 + }, + { + "epoch": 2.62, + "learning_rate": 2.3780951899132644e-05, + "loss": 0.2059, + "step": 466600 + }, + { + "epoch": 2.62, + "learning_rate": 2.377533066887019e-05, + "loss": 0.2072, + "step": 466700 + }, + { + "epoch": 2.62, + "learning_rate": 2.3769709438607736e-05, + "loss": 0.2097, + "step": 466800 + }, + { + "epoch": 2.62, + "learning_rate": 2.3764088208345282e-05, + "loss": 0.2063, + "step": 466900 + }, + { + "epoch": 2.63, + "learning_rate": 2.3758466978082825e-05, + "loss": 0.2108, + "step": 467000 + }, + { + "epoch": 2.63, + "learning_rate": 2.3752845747820368e-05, + "loss": 0.2089, + "step": 467100 + }, + { + "epoch": 2.63, + "learning_rate": 2.3747224517557914e-05, + "loss": 0.2098, + "step": 467200 + }, + { + "epoch": 2.63, + "learning_rate": 2.3741603287295457e-05, + "loss": 0.2166, + "step": 467300 + }, + { + "epoch": 2.63, + "learning_rate": 2.3735982057033004e-05, + "loss": 0.2, + "step": 467400 + }, + { + "epoch": 2.63, + "learning_rate": 2.3730360826770547e-05, + "loss": 0.2105, + "step": 467500 + }, + { + "epoch": 2.63, + "learning_rate": 2.3724739596508093e-05, + "loss": 0.2079, + "step": 467600 + }, + { + "epoch": 2.63, + "learning_rate": 2.371911836624564e-05, + "loss": 0.2018, + "step": 467700 + }, + { + "epoch": 2.63, + "learning_rate": 2.3713497135983182e-05, + "loss": 0.2137, + "step": 467800 + }, + { + "epoch": 2.63, + "learning_rate": 2.3707875905720728e-05, + "loss": 0.206, + "step": 467900 + }, + { + "epoch": 2.63, + "learning_rate": 2.370225467545827e-05, + "loss": 0.2066, + "step": 468000 + }, + { + "epoch": 2.63, + "learning_rate": 2.3696633445195817e-05, + "loss": 0.2087, + "step": 468100 + }, + { + "epoch": 2.63, + "learning_rate": 2.369101221493336e-05, + "loss": 0.2058, + "step": 468200 + }, + { + "epoch": 2.63, + "learning_rate": 2.3685390984670903e-05, + "loss": 0.205, + "step": 468300 + }, + { + "epoch": 2.63, + "learning_rate": 2.3679769754408453e-05, + "loss": 0.2107, + "step": 468400 + }, + { + "epoch": 2.63, + "learning_rate": 2.3674148524145996e-05, + "loss": 0.2143, + "step": 468500 + }, + { + "epoch": 2.63, + "learning_rate": 2.3668527293883542e-05, + "loss": 0.2104, + "step": 468600 + }, + { + "epoch": 2.63, + "learning_rate": 2.3662906063621085e-05, + "loss": 0.2099, + "step": 468700 + }, + { + "epoch": 2.64, + "learning_rate": 2.365728483335863e-05, + "loss": 0.2058, + "step": 468800 + }, + { + "epoch": 2.64, + "learning_rate": 2.3651663603096174e-05, + "loss": 0.1996, + "step": 468900 + }, + { + "epoch": 2.64, + "learning_rate": 2.3646042372833717e-05, + "loss": 0.2115, + "step": 469000 + }, + { + "epoch": 2.64, + "learning_rate": 2.3640421142571263e-05, + "loss": 0.203, + "step": 469100 + }, + { + "epoch": 2.64, + "learning_rate": 2.363479991230881e-05, + "loss": 0.2076, + "step": 469200 + }, + { + "epoch": 2.64, + "learning_rate": 2.3629178682046356e-05, + "loss": 0.2122, + "step": 469300 + }, + { + "epoch": 2.64, + "learning_rate": 2.36235574517839e-05, + "loss": 0.2063, + "step": 469400 + }, + { + "epoch": 2.64, + "learning_rate": 2.3617936221521445e-05, + "loss": 0.2102, + "step": 469500 + }, + { + "epoch": 2.64, + "learning_rate": 2.3612314991258988e-05, + "loss": 0.2082, + "step": 469600 + }, + { + "epoch": 2.64, + "learning_rate": 2.360669376099653e-05, + "loss": 0.203, + "step": 469700 + }, + { + "epoch": 2.64, + "learning_rate": 2.3601072530734077e-05, + "loss": 0.2038, + "step": 469800 + }, + { + "epoch": 2.64, + "learning_rate": 2.359545130047162e-05, + "loss": 0.2047, + "step": 469900 + }, + { + "epoch": 2.64, + "learning_rate": 2.358983007020917e-05, + "loss": 0.2077, + "step": 470000 + }, + { + "epoch": 2.64, + "learning_rate": 2.3584208839946712e-05, + "loss": 0.2109, + "step": 470100 + }, + { + "epoch": 2.64, + "learning_rate": 2.357858760968426e-05, + "loss": 0.2075, + "step": 470200 + }, + { + "epoch": 2.64, + "learning_rate": 2.35729663794218e-05, + "loss": 0.2037, + "step": 470300 + }, + { + "epoch": 2.64, + "learning_rate": 2.3567345149159345e-05, + "loss": 0.2065, + "step": 470400 + }, + { + "epoch": 2.64, + "learning_rate": 2.356172391889689e-05, + "loss": 0.2033, + "step": 470500 + }, + { + "epoch": 2.65, + "learning_rate": 2.3556102688634434e-05, + "loss": 0.208, + "step": 470600 + }, + { + "epoch": 2.65, + "learning_rate": 2.355048145837198e-05, + "loss": 0.2038, + "step": 470700 + }, + { + "epoch": 2.65, + "learning_rate": 2.3544860228109526e-05, + "loss": 0.2034, + "step": 470800 + }, + { + "epoch": 2.65, + "learning_rate": 2.3539238997847073e-05, + "loss": 0.2154, + "step": 470900 + }, + { + "epoch": 2.65, + "learning_rate": 2.3533617767584615e-05, + "loss": 0.2053, + "step": 471000 + }, + { + "epoch": 2.65, + "learning_rate": 2.352799653732216e-05, + "loss": 0.2028, + "step": 471100 + }, + { + "epoch": 2.65, + "learning_rate": 2.3522375307059705e-05, + "loss": 0.2098, + "step": 471200 + }, + { + "epoch": 2.65, + "learning_rate": 2.3516754076797247e-05, + "loss": 0.2056, + "step": 471300 + }, + { + "epoch": 2.65, + "learning_rate": 2.3511132846534794e-05, + "loss": 0.2062, + "step": 471400 + }, + { + "epoch": 2.65, + "learning_rate": 2.350556782857496e-05, + "loss": 0.2116, + "step": 471500 + }, + { + "epoch": 2.65, + "learning_rate": 2.3499946598312508e-05, + "loss": 0.2112, + "step": 471600 + }, + { + "epoch": 2.65, + "learning_rate": 2.349432536805005e-05, + "loss": 0.2108, + "step": 471700 + }, + { + "epoch": 2.65, + "learning_rate": 2.3488704137787597e-05, + "loss": 0.1985, + "step": 471800 + }, + { + "epoch": 2.65, + "learning_rate": 2.3483082907525143e-05, + "loss": 0.204, + "step": 471900 + }, + { + "epoch": 2.65, + "learning_rate": 2.347746167726269e-05, + "loss": 0.2077, + "step": 472000 + }, + { + "epoch": 2.65, + "learning_rate": 2.3471840447000232e-05, + "loss": 0.2079, + "step": 472100 + }, + { + "epoch": 2.65, + "learning_rate": 2.3466219216737775e-05, + "loss": 0.2107, + "step": 472200 + }, + { + "epoch": 2.65, + "learning_rate": 2.346059798647532e-05, + "loss": 0.2064, + "step": 472300 + }, + { + "epoch": 2.66, + "learning_rate": 2.3454976756212864e-05, + "loss": 0.2068, + "step": 472400 + }, + { + "epoch": 2.66, + "learning_rate": 2.344935552595041e-05, + "loss": 0.2108, + "step": 472500 + }, + { + "epoch": 2.66, + "learning_rate": 2.3443734295687957e-05, + "loss": 0.2083, + "step": 472600 + }, + { + "epoch": 2.66, + "learning_rate": 2.34381130654255e-05, + "loss": 0.2048, + "step": 472700 + }, + { + "epoch": 2.66, + "learning_rate": 2.3432491835163046e-05, + "loss": 0.2018, + "step": 472800 + }, + { + "epoch": 2.66, + "learning_rate": 2.342687060490059e-05, + "loss": 0.206, + "step": 472900 + }, + { + "epoch": 2.66, + "learning_rate": 2.3421249374638135e-05, + "loss": 0.2079, + "step": 473000 + }, + { + "epoch": 2.66, + "learning_rate": 2.3415628144375678e-05, + "loss": 0.2119, + "step": 473100 + }, + { + "epoch": 2.66, + "learning_rate": 2.3410006914113224e-05, + "loss": 0.208, + "step": 473200 + }, + { + "epoch": 2.66, + "learning_rate": 2.3404385683850767e-05, + "loss": 0.2027, + "step": 473300 + }, + { + "epoch": 2.66, + "learning_rate": 2.3398764453588314e-05, + "loss": 0.2097, + "step": 473400 + }, + { + "epoch": 2.66, + "learning_rate": 2.339319943562848e-05, + "loss": 0.2079, + "step": 473500 + }, + { + "epoch": 2.66, + "learning_rate": 2.3387578205366027e-05, + "loss": 0.2034, + "step": 473600 + }, + { + "epoch": 2.66, + "learning_rate": 2.3381956975103574e-05, + "loss": 0.2073, + "step": 473700 + }, + { + "epoch": 2.66, + "learning_rate": 2.3376335744841117e-05, + "loss": 0.2091, + "step": 473800 + }, + { + "epoch": 2.66, + "learning_rate": 2.3370714514578663e-05, + "loss": 0.2054, + "step": 473900 + }, + { + "epoch": 2.66, + "learning_rate": 2.3365093284316206e-05, + "loss": 0.2072, + "step": 474000 + }, + { + "epoch": 2.67, + "learning_rate": 2.3359472054053752e-05, + "loss": 0.2096, + "step": 474100 + }, + { + "epoch": 2.67, + "learning_rate": 2.335390703609392e-05, + "loss": 0.2083, + "step": 474200 + }, + { + "epoch": 2.67, + "learning_rate": 2.3348285805831466e-05, + "loss": 0.2064, + "step": 474300 + }, + { + "epoch": 2.67, + "learning_rate": 2.334266457556901e-05, + "loss": 0.2094, + "step": 474400 + }, + { + "epoch": 2.67, + "learning_rate": 2.3337043345306555e-05, + "loss": 0.2106, + "step": 474500 + }, + { + "epoch": 2.67, + "learning_rate": 2.3331422115044098e-05, + "loss": 0.2069, + "step": 474600 + }, + { + "epoch": 2.67, + "learning_rate": 2.3325800884781644e-05, + "loss": 0.2079, + "step": 474700 + }, + { + "epoch": 2.67, + "learning_rate": 2.332017965451919e-05, + "loss": 0.2024, + "step": 474800 + }, + { + "epoch": 2.67, + "learning_rate": 2.3314558424256733e-05, + "loss": 0.2011, + "step": 474900 + }, + { + "epoch": 2.67, + "learning_rate": 2.330893719399428e-05, + "loss": 0.2074, + "step": 475000 + }, + { + "epoch": 2.67, + "learning_rate": 2.3303315963731823e-05, + "loss": 0.2032, + "step": 475100 + }, + { + "epoch": 2.67, + "learning_rate": 2.329769473346937e-05, + "loss": 0.2085, + "step": 475200 + }, + { + "epoch": 2.67, + "learning_rate": 2.3292073503206912e-05, + "loss": 0.2082, + "step": 475300 + }, + { + "epoch": 2.67, + "learning_rate": 2.3286452272944458e-05, + "loss": 0.2065, + "step": 475400 + }, + { + "epoch": 2.67, + "learning_rate": 2.3280831042682e-05, + "loss": 0.2137, + "step": 475500 + }, + { + "epoch": 2.67, + "learning_rate": 2.3275209812419547e-05, + "loss": 0.2117, + "step": 475600 + }, + { + "epoch": 2.67, + "learning_rate": 2.3269588582157094e-05, + "loss": 0.2059, + "step": 475700 + }, + { + "epoch": 2.67, + "learning_rate": 2.3263967351894636e-05, + "loss": 0.2095, + "step": 475800 + }, + { + "epoch": 2.68, + "learning_rate": 2.3258346121632183e-05, + "loss": 0.2071, + "step": 475900 + }, + { + "epoch": 2.68, + "learning_rate": 2.3252724891369726e-05, + "loss": 0.2064, + "step": 476000 + }, + { + "epoch": 2.68, + "learning_rate": 2.3247103661107272e-05, + "loss": 0.2071, + "step": 476100 + }, + { + "epoch": 2.68, + "learning_rate": 2.3241482430844815e-05, + "loss": 0.2062, + "step": 476200 + }, + { + "epoch": 2.68, + "learning_rate": 2.323586120058236e-05, + "loss": 0.2035, + "step": 476300 + }, + { + "epoch": 2.68, + "learning_rate": 2.3230239970319907e-05, + "loss": 0.2034, + "step": 476400 + }, + { + "epoch": 2.68, + "learning_rate": 2.322461874005745e-05, + "loss": 0.2067, + "step": 476500 + }, + { + "epoch": 2.68, + "learning_rate": 2.3218997509794996e-05, + "loss": 0.2061, + "step": 476600 + }, + { + "epoch": 2.68, + "learning_rate": 2.321337627953254e-05, + "loss": 0.2053, + "step": 476700 + }, + { + "epoch": 2.68, + "learning_rate": 2.3207755049270086e-05, + "loss": 0.2069, + "step": 476800 + }, + { + "epoch": 2.68, + "learning_rate": 2.320213381900763e-05, + "loss": 0.2067, + "step": 476900 + }, + { + "epoch": 2.68, + "learning_rate": 2.319651258874517e-05, + "loss": 0.1967, + "step": 477000 + }, + { + "epoch": 2.68, + "learning_rate": 2.3190891358482718e-05, + "loss": 0.2092, + "step": 477100 + }, + { + "epoch": 2.68, + "learning_rate": 2.3185270128220264e-05, + "loss": 0.2101, + "step": 477200 + }, + { + "epoch": 2.68, + "learning_rate": 2.317964889795781e-05, + "loss": 0.2075, + "step": 477300 + }, + { + "epoch": 2.68, + "learning_rate": 2.3174027667695353e-05, + "loss": 0.2093, + "step": 477400 + }, + { + "epoch": 2.68, + "learning_rate": 2.3168406437432896e-05, + "loss": 0.2051, + "step": 477500 + }, + { + "epoch": 2.68, + "learning_rate": 2.3162785207170442e-05, + "loss": 0.204, + "step": 477600 + }, + { + "epoch": 2.69, + "learning_rate": 2.3157163976907985e-05, + "loss": 0.212, + "step": 477700 + }, + { + "epoch": 2.69, + "learning_rate": 2.315154274664553e-05, + "loss": 0.2035, + "step": 477800 + }, + { + "epoch": 2.69, + "learning_rate": 2.3145921516383078e-05, + "loss": 0.2038, + "step": 477900 + }, + { + "epoch": 2.69, + "learning_rate": 2.3140300286120624e-05, + "loss": 0.2054, + "step": 478000 + }, + { + "epoch": 2.69, + "learning_rate": 2.3134679055858167e-05, + "loss": 0.2036, + "step": 478100 + }, + { + "epoch": 2.69, + "learning_rate": 2.312905782559571e-05, + "loss": 0.2072, + "step": 478200 + }, + { + "epoch": 2.69, + "learning_rate": 2.3123436595333256e-05, + "loss": 0.2052, + "step": 478300 + }, + { + "epoch": 2.69, + "learning_rate": 2.31178153650708e-05, + "loss": 0.2068, + "step": 478400 + }, + { + "epoch": 2.69, + "learning_rate": 2.3112194134808345e-05, + "loss": 0.2067, + "step": 478500 + }, + { + "epoch": 2.69, + "learning_rate": 2.3106572904545888e-05, + "loss": 0.2054, + "step": 478600 + }, + { + "epoch": 2.69, + "learning_rate": 2.310100788658606e-05, + "loss": 0.2028, + "step": 478700 + }, + { + "epoch": 2.69, + "learning_rate": 2.3095386656323602e-05, + "loss": 0.2025, + "step": 478800 + }, + { + "epoch": 2.69, + "learning_rate": 2.3089765426061148e-05, + "loss": 0.2107, + "step": 478900 + }, + { + "epoch": 2.69, + "learning_rate": 2.3084144195798695e-05, + "loss": 0.2016, + "step": 479000 + }, + { + "epoch": 2.69, + "learning_rate": 2.307852296553624e-05, + "loss": 0.2078, + "step": 479100 + }, + { + "epoch": 2.69, + "learning_rate": 2.3072901735273784e-05, + "loss": 0.206, + "step": 479200 + }, + { + "epoch": 2.69, + "learning_rate": 2.3067280505011327e-05, + "loss": 0.2055, + "step": 479300 + }, + { + "epoch": 2.69, + "learning_rate": 2.3061659274748873e-05, + "loss": 0.203, + "step": 479400 + }, + { + "epoch": 2.7, + "learning_rate": 2.3056094256789044e-05, + "loss": 0.202, + "step": 479500 + }, + { + "epoch": 2.7, + "learning_rate": 2.3050473026526587e-05, + "loss": 0.2173, + "step": 479600 + }, + { + "epoch": 2.7, + "learning_rate": 2.304485179626413e-05, + "loss": 0.2029, + "step": 479700 + }, + { + "epoch": 2.7, + "learning_rate": 2.3039230566001676e-05, + "loss": 0.2085, + "step": 479800 + }, + { + "epoch": 2.7, + "learning_rate": 2.303360933573922e-05, + "loss": 0.2117, + "step": 479900 + }, + { + "epoch": 2.7, + "learning_rate": 2.3027988105476765e-05, + "loss": 0.2052, + "step": 480000 + }, + { + "epoch": 2.7, + "learning_rate": 2.302236687521431e-05, + "loss": 0.2033, + "step": 480100 + }, + { + "epoch": 2.7, + "learning_rate": 2.3016745644951858e-05, + "loss": 0.204, + "step": 480200 + }, + { + "epoch": 2.7, + "learning_rate": 2.30111244146894e-05, + "loss": 0.2052, + "step": 480300 + }, + { + "epoch": 2.7, + "learning_rate": 2.3005503184426943e-05, + "loss": 0.2061, + "step": 480400 + }, + { + "epoch": 2.7, + "learning_rate": 2.299988195416449e-05, + "loss": 0.2112, + "step": 480500 + }, + { + "epoch": 2.7, + "learning_rate": 2.2994260723902033e-05, + "loss": 0.2012, + "step": 480600 + }, + { + "epoch": 2.7, + "learning_rate": 2.298863949363958e-05, + "loss": 0.2067, + "step": 480700 + }, + { + "epoch": 2.7, + "learning_rate": 2.2983018263377122e-05, + "loss": 0.205, + "step": 480800 + }, + { + "epoch": 2.7, + "learning_rate": 2.297739703311467e-05, + "loss": 0.2133, + "step": 480900 + }, + { + "epoch": 2.7, + "learning_rate": 2.2971775802852214e-05, + "loss": 0.2094, + "step": 481000 + }, + { + "epoch": 2.7, + "learning_rate": 2.2966154572589757e-05, + "loss": 0.2109, + "step": 481100 + }, + { + "epoch": 2.7, + "learning_rate": 2.2960533342327304e-05, + "loss": 0.2094, + "step": 481200 + }, + { + "epoch": 2.71, + "learning_rate": 2.2954912112064846e-05, + "loss": 0.2169, + "step": 481300 + }, + { + "epoch": 2.71, + "learning_rate": 2.2949290881802393e-05, + "loss": 0.2003, + "step": 481400 + }, + { + "epoch": 2.71, + "learning_rate": 2.2943669651539936e-05, + "loss": 0.2057, + "step": 481500 + }, + { + "epoch": 2.71, + "learning_rate": 2.2938048421277482e-05, + "loss": 0.2028, + "step": 481600 + }, + { + "epoch": 2.71, + "learning_rate": 2.2932427191015028e-05, + "loss": 0.2058, + "step": 481700 + }, + { + "epoch": 2.71, + "learning_rate": 2.292680596075257e-05, + "loss": 0.2095, + "step": 481800 + }, + { + "epoch": 2.71, + "learning_rate": 2.2921184730490117e-05, + "loss": 0.209, + "step": 481900 + }, + { + "epoch": 2.71, + "learning_rate": 2.291556350022766e-05, + "loss": 0.2081, + "step": 482000 + }, + { + "epoch": 2.71, + "learning_rate": 2.2909942269965206e-05, + "loss": 0.2065, + "step": 482100 + }, + { + "epoch": 2.71, + "learning_rate": 2.290432103970275e-05, + "loss": 0.2059, + "step": 482200 + }, + { + "epoch": 2.71, + "learning_rate": 2.2898699809440296e-05, + "loss": 0.2069, + "step": 482300 + }, + { + "epoch": 2.71, + "learning_rate": 2.2893134791480463e-05, + "loss": 0.2135, + "step": 482400 + }, + { + "epoch": 2.71, + "learning_rate": 2.288751356121801e-05, + "loss": 0.2028, + "step": 482500 + }, + { + "epoch": 2.71, + "learning_rate": 2.2881892330955552e-05, + "loss": 0.202, + "step": 482600 + }, + { + "epoch": 2.71, + "learning_rate": 2.28762711006931e-05, + "loss": 0.2042, + "step": 482700 + }, + { + "epoch": 2.71, + "learning_rate": 2.2870649870430645e-05, + "loss": 0.2075, + "step": 482800 + }, + { + "epoch": 2.71, + "learning_rate": 2.2865028640168188e-05, + "loss": 0.2028, + "step": 482900 + }, + { + "epoch": 2.72, + "learning_rate": 2.2859407409905734e-05, + "loss": 0.1945, + "step": 483000 + }, + { + "epoch": 2.72, + "learning_rate": 2.2853786179643277e-05, + "loss": 0.206, + "step": 483100 + }, + { + "epoch": 2.72, + "learning_rate": 2.2848164949380823e-05, + "loss": 0.2019, + "step": 483200 + }, + { + "epoch": 2.72, + "learning_rate": 2.2842543719118366e-05, + "loss": 0.2051, + "step": 483300 + }, + { + "epoch": 2.72, + "learning_rate": 2.283692248885591e-05, + "loss": 0.2051, + "step": 483400 + }, + { + "epoch": 2.72, + "learning_rate": 2.283130125859346e-05, + "loss": 0.2089, + "step": 483500 + }, + { + "epoch": 2.72, + "learning_rate": 2.2825680028331e-05, + "loss": 0.1984, + "step": 483600 + }, + { + "epoch": 2.72, + "learning_rate": 2.2820058798068548e-05, + "loss": 0.2069, + "step": 483700 + }, + { + "epoch": 2.72, + "learning_rate": 2.281443756780609e-05, + "loss": 0.2077, + "step": 483800 + }, + { + "epoch": 2.72, + "learning_rate": 2.2808816337543637e-05, + "loss": 0.205, + "step": 483900 + }, + { + "epoch": 2.72, + "learning_rate": 2.280319510728118e-05, + "loss": 0.2113, + "step": 484000 + }, + { + "epoch": 2.72, + "learning_rate": 2.2797573877018723e-05, + "loss": 0.2084, + "step": 484100 + }, + { + "epoch": 2.72, + "learning_rate": 2.279195264675627e-05, + "loss": 0.2062, + "step": 484200 + }, + { + "epoch": 2.72, + "learning_rate": 2.2786331416493815e-05, + "loss": 0.2013, + "step": 484300 + }, + { + "epoch": 2.72, + "learning_rate": 2.2780710186231362e-05, + "loss": 0.2099, + "step": 484400 + }, + { + "epoch": 2.72, + "learning_rate": 2.2775088955968905e-05, + "loss": 0.2053, + "step": 484500 + }, + { + "epoch": 2.72, + "learning_rate": 2.276946772570645e-05, + "loss": 0.2104, + "step": 484600 + }, + { + "epoch": 2.72, + "learning_rate": 2.2763846495443994e-05, + "loss": 0.2068, + "step": 484700 + }, + { + "epoch": 2.73, + "learning_rate": 2.2758225265181537e-05, + "loss": 0.204, + "step": 484800 + }, + { + "epoch": 2.73, + "learning_rate": 2.2752604034919083e-05, + "loss": 0.2085, + "step": 484900 + }, + { + "epoch": 2.73, + "learning_rate": 2.2746982804656626e-05, + "loss": 0.2095, + "step": 485000 + }, + { + "epoch": 2.73, + "learning_rate": 2.2741361574394175e-05, + "loss": 0.2109, + "step": 485100 + }, + { + "epoch": 2.73, + "learning_rate": 2.273574034413172e-05, + "loss": 0.2012, + "step": 485200 + }, + { + "epoch": 2.73, + "learning_rate": 2.2730119113869265e-05, + "loss": 0.2071, + "step": 485300 + }, + { + "epoch": 2.73, + "learning_rate": 2.2724497883606808e-05, + "loss": 0.2055, + "step": 485400 + }, + { + "epoch": 2.73, + "learning_rate": 2.271887665334435e-05, + "loss": 0.2143, + "step": 485500 + }, + { + "epoch": 2.73, + "learning_rate": 2.2713255423081897e-05, + "loss": 0.2094, + "step": 485600 + }, + { + "epoch": 2.73, + "learning_rate": 2.270763419281944e-05, + "loss": 0.2008, + "step": 485700 + }, + { + "epoch": 2.73, + "learning_rate": 2.2702012962556986e-05, + "loss": 0.2109, + "step": 485800 + }, + { + "epoch": 2.73, + "learning_rate": 2.2696391732294532e-05, + "loss": 0.207, + "step": 485900 + }, + { + "epoch": 2.73, + "learning_rate": 2.269077050203208e-05, + "loss": 0.1964, + "step": 486000 + }, + { + "epoch": 2.73, + "learning_rate": 2.268514927176962e-05, + "loss": 0.2095, + "step": 486100 + }, + { + "epoch": 2.73, + "learning_rate": 2.2679528041507164e-05, + "loss": 0.2025, + "step": 486200 + }, + { + "epoch": 2.73, + "learning_rate": 2.267390681124471e-05, + "loss": 0.2005, + "step": 486300 + }, + { + "epoch": 2.73, + "learning_rate": 2.2668285580982253e-05, + "loss": 0.2051, + "step": 486400 + }, + { + "epoch": 2.73, + "learning_rate": 2.26626643507198e-05, + "loss": 0.2062, + "step": 486500 + }, + { + "epoch": 2.74, + "learning_rate": 2.2657043120457343e-05, + "loss": 0.2167, + "step": 486600 + }, + { + "epoch": 2.74, + "learning_rate": 2.2651478102497514e-05, + "loss": 0.2092, + "step": 486700 + }, + { + "epoch": 2.74, + "learning_rate": 2.2645856872235056e-05, + "loss": 0.204, + "step": 486800 + }, + { + "epoch": 2.74, + "learning_rate": 2.2640235641972603e-05, + "loss": 0.2103, + "step": 486900 + }, + { + "epoch": 2.74, + "learning_rate": 2.263461441171015e-05, + "loss": 0.2073, + "step": 487000 + }, + { + "epoch": 2.74, + "learning_rate": 2.2628993181447695e-05, + "loss": 0.2079, + "step": 487100 + }, + { + "epoch": 2.74, + "learning_rate": 2.2623371951185238e-05, + "loss": 0.2078, + "step": 487200 + }, + { + "epoch": 2.74, + "learning_rate": 2.261775072092278e-05, + "loss": 0.2077, + "step": 487300 + }, + { + "epoch": 2.74, + "learning_rate": 2.2612129490660327e-05, + "loss": 0.2096, + "step": 487400 + }, + { + "epoch": 2.74, + "learning_rate": 2.260650826039787e-05, + "loss": 0.2042, + "step": 487500 + }, + { + "epoch": 2.74, + "learning_rate": 2.2600887030135416e-05, + "loss": 0.2048, + "step": 487600 + }, + { + "epoch": 2.74, + "learning_rate": 2.2595265799872963e-05, + "loss": 0.2049, + "step": 487700 + }, + { + "epoch": 2.74, + "learning_rate": 2.2589644569610506e-05, + "loss": 0.2052, + "step": 487800 + }, + { + "epoch": 2.74, + "learning_rate": 2.2584023339348052e-05, + "loss": 0.2076, + "step": 487900 + }, + { + "epoch": 2.74, + "learning_rate": 2.2578402109085595e-05, + "loss": 0.2045, + "step": 488000 + }, + { + "epoch": 2.74, + "learning_rate": 2.257278087882314e-05, + "loss": 0.1991, + "step": 488100 + }, + { + "epoch": 2.74, + "learning_rate": 2.2567159648560684e-05, + "loss": 0.2075, + "step": 488200 + }, + { + "epoch": 2.74, + "learning_rate": 2.256153841829823e-05, + "loss": 0.2017, + "step": 488300 + }, + { + "epoch": 2.75, + "learning_rate": 2.2555917188035773e-05, + "loss": 0.2054, + "step": 488400 + }, + { + "epoch": 2.75, + "learning_rate": 2.255029595777332e-05, + "loss": 0.2032, + "step": 488500 + }, + { + "epoch": 2.75, + "learning_rate": 2.2544674727510866e-05, + "loss": 0.2093, + "step": 488600 + }, + { + "epoch": 2.75, + "learning_rate": 2.253905349724841e-05, + "loss": 0.2054, + "step": 488700 + }, + { + "epoch": 2.75, + "learning_rate": 2.2533432266985955e-05, + "loss": 0.2123, + "step": 488800 + }, + { + "epoch": 2.75, + "learning_rate": 2.2527867249026123e-05, + "loss": 0.1991, + "step": 488900 + }, + { + "epoch": 2.75, + "learning_rate": 2.252224601876367e-05, + "loss": 0.2079, + "step": 489000 + }, + { + "epoch": 2.75, + "learning_rate": 2.251662478850121e-05, + "loss": 0.2086, + "step": 489100 + }, + { + "epoch": 2.75, + "learning_rate": 2.2511003558238758e-05, + "loss": 0.1983, + "step": 489200 + }, + { + "epoch": 2.75, + "learning_rate": 2.25053823279763e-05, + "loss": 0.2134, + "step": 489300 + }, + { + "epoch": 2.75, + "learning_rate": 2.2499761097713847e-05, + "loss": 0.2066, + "step": 489400 + }, + { + "epoch": 2.75, + "learning_rate": 2.249413986745139e-05, + "loss": 0.2085, + "step": 489500 + }, + { + "epoch": 2.75, + "learning_rate": 2.2488518637188936e-05, + "loss": 0.2091, + "step": 489600 + }, + { + "epoch": 2.75, + "learning_rate": 2.2482897406926483e-05, + "loss": 0.2054, + "step": 489700 + }, + { + "epoch": 2.75, + "learning_rate": 2.247733238896665e-05, + "loss": 0.2028, + "step": 489800 + }, + { + "epoch": 2.75, + "learning_rate": 2.2471711158704196e-05, + "loss": 0.2081, + "step": 489900 + }, + { + "epoch": 2.75, + "learning_rate": 2.246608992844174e-05, + "loss": 0.2101, + "step": 490000 + }, + { + "epoch": 2.75, + "learning_rate": 2.2460468698179286e-05, + "loss": 0.2138, + "step": 490100 + }, + { + "epoch": 2.76, + "learning_rate": 2.245484746791683e-05, + "loss": 0.2037, + "step": 490200 + }, + { + "epoch": 2.76, + "learning_rate": 2.2449226237654375e-05, + "loss": 0.2038, + "step": 490300 + }, + { + "epoch": 2.76, + "learning_rate": 2.2443605007391918e-05, + "loss": 0.2017, + "step": 490400 + }, + { + "epoch": 2.76, + "learning_rate": 2.2437983777129464e-05, + "loss": 0.2006, + "step": 490500 + }, + { + "epoch": 2.76, + "learning_rate": 2.2432362546867007e-05, + "loss": 0.2106, + "step": 490600 + }, + { + "epoch": 2.76, + "learning_rate": 2.2426741316604553e-05, + "loss": 0.2137, + "step": 490700 + }, + { + "epoch": 2.76, + "learning_rate": 2.24211200863421e-05, + "loss": 0.2093, + "step": 490800 + }, + { + "epoch": 2.76, + "learning_rate": 2.2415498856079642e-05, + "loss": 0.1954, + "step": 490900 + }, + { + "epoch": 2.76, + "learning_rate": 2.240987762581719e-05, + "loss": 0.2112, + "step": 491000 + }, + { + "epoch": 2.76, + "learning_rate": 2.240425639555473e-05, + "loss": 0.2072, + "step": 491100 + }, + { + "epoch": 2.76, + "learning_rate": 2.2398635165292278e-05, + "loss": 0.1997, + "step": 491200 + }, + { + "epoch": 2.76, + "learning_rate": 2.239301393502982e-05, + "loss": 0.2118, + "step": 491300 + }, + { + "epoch": 2.76, + "learning_rate": 2.2387392704767367e-05, + "loss": 0.2056, + "step": 491400 + }, + { + "epoch": 2.76, + "learning_rate": 2.2381771474504913e-05, + "loss": 0.2086, + "step": 491500 + }, + { + "epoch": 2.76, + "learning_rate": 2.2376150244242456e-05, + "loss": 0.2024, + "step": 491600 + }, + { + "epoch": 2.76, + "learning_rate": 2.2370529013980002e-05, + "loss": 0.203, + "step": 491700 + }, + { + "epoch": 2.76, + "learning_rate": 2.2364907783717545e-05, + "loss": 0.2028, + "step": 491800 + }, + { + "epoch": 2.77, + "learning_rate": 2.235928655345509e-05, + "loss": 0.2078, + "step": 491900 + }, + { + "epoch": 2.77, + "learning_rate": 2.2353665323192634e-05, + "loss": 0.2141, + "step": 492000 + }, + { + "epoch": 2.77, + "learning_rate": 2.2348044092930177e-05, + "loss": 0.2018, + "step": 492100 + }, + { + "epoch": 2.77, + "learning_rate": 2.2342422862667724e-05, + "loss": 0.2082, + "step": 492200 + }, + { + "epoch": 2.77, + "learning_rate": 2.233680163240527e-05, + "loss": 0.2064, + "step": 492300 + }, + { + "epoch": 2.77, + "learning_rate": 2.2331180402142816e-05, + "loss": 0.2001, + "step": 492400 + }, + { + "epoch": 2.77, + "learning_rate": 2.232555917188036e-05, + "loss": 0.2059, + "step": 492500 + }, + { + "epoch": 2.77, + "learning_rate": 2.2319937941617902e-05, + "loss": 0.1987, + "step": 492600 + }, + { + "epoch": 2.77, + "learning_rate": 2.2314316711355448e-05, + "loss": 0.2099, + "step": 492700 + }, + { + "epoch": 2.77, + "learning_rate": 2.230869548109299e-05, + "loss": 0.2013, + "step": 492800 + }, + { + "epoch": 2.77, + "learning_rate": 2.2303074250830537e-05, + "loss": 0.201, + "step": 492900 + }, + { + "epoch": 2.77, + "learning_rate": 2.2297453020568084e-05, + "loss": 0.2048, + "step": 493000 + }, + { + "epoch": 2.77, + "learning_rate": 2.229183179030563e-05, + "loss": 0.2014, + "step": 493100 + }, + { + "epoch": 2.77, + "learning_rate": 2.2286210560043173e-05, + "loss": 0.2027, + "step": 493200 + }, + { + "epoch": 2.77, + "learning_rate": 2.2280589329780716e-05, + "loss": 0.209, + "step": 493300 + }, + { + "epoch": 2.77, + "learning_rate": 2.2274968099518262e-05, + "loss": 0.2001, + "step": 493400 + }, + { + "epoch": 2.77, + "learning_rate": 2.2269346869255805e-05, + "loss": 0.21, + "step": 493500 + }, + { + "epoch": 2.77, + "learning_rate": 2.226372563899335e-05, + "loss": 0.2032, + "step": 493600 + }, + { + "epoch": 2.78, + "learning_rate": 2.2258104408730894e-05, + "loss": 0.2077, + "step": 493700 + }, + { + "epoch": 2.78, + "learning_rate": 2.2252539390771065e-05, + "loss": 0.2036, + "step": 493800 + }, + { + "epoch": 2.78, + "learning_rate": 2.2246918160508608e-05, + "loss": 0.1981, + "step": 493900 + }, + { + "epoch": 2.78, + "learning_rate": 2.2241296930246154e-05, + "loss": 0.2045, + "step": 494000 + }, + { + "epoch": 2.78, + "learning_rate": 2.22356756999837e-05, + "loss": 0.209, + "step": 494100 + }, + { + "epoch": 2.78, + "learning_rate": 2.2230054469721247e-05, + "loss": 0.2046, + "step": 494200 + }, + { + "epoch": 2.78, + "learning_rate": 2.222443323945879e-05, + "loss": 0.2052, + "step": 494300 + }, + { + "epoch": 2.78, + "learning_rate": 2.2218812009196333e-05, + "loss": 0.2017, + "step": 494400 + }, + { + "epoch": 2.78, + "learning_rate": 2.221319077893388e-05, + "loss": 0.2045, + "step": 494500 + }, + { + "epoch": 2.78, + "learning_rate": 2.220756954867142e-05, + "loss": 0.2056, + "step": 494600 + }, + { + "epoch": 2.78, + "learning_rate": 2.2201948318408968e-05, + "loss": 0.2159, + "step": 494700 + }, + { + "epoch": 2.78, + "learning_rate": 2.219632708814651e-05, + "loss": 0.2042, + "step": 494800 + }, + { + "epoch": 2.78, + "learning_rate": 2.2190705857884057e-05, + "loss": 0.1989, + "step": 494900 + }, + { + "epoch": 2.78, + "learning_rate": 2.2185140839924225e-05, + "loss": 0.2105, + "step": 495000 + }, + { + "epoch": 2.78, + "learning_rate": 2.217951960966177e-05, + "loss": 0.1988, + "step": 495100 + }, + { + "epoch": 2.78, + "learning_rate": 2.2173898379399317e-05, + "loss": 0.2016, + "step": 495200 + }, + { + "epoch": 2.78, + "learning_rate": 2.2168277149136864e-05, + "loss": 0.2049, + "step": 495300 + }, + { + "epoch": 2.78, + "learning_rate": 2.2162655918874406e-05, + "loss": 0.2037, + "step": 495400 + }, + { + "epoch": 2.79, + "learning_rate": 2.215703468861195e-05, + "loss": 0.2057, + "step": 495500 + }, + { + "epoch": 2.79, + "learning_rate": 2.215146967065212e-05, + "loss": 0.2002, + "step": 495600 + }, + { + "epoch": 2.79, + "learning_rate": 2.2145848440389667e-05, + "loss": 0.2119, + "step": 495700 + }, + { + "epoch": 2.79, + "learning_rate": 2.214022721012721e-05, + "loss": 0.2008, + "step": 495800 + }, + { + "epoch": 2.79, + "learning_rate": 2.2134605979864752e-05, + "loss": 0.2026, + "step": 495900 + }, + { + "epoch": 2.79, + "learning_rate": 2.21289847496023e-05, + "loss": 0.2062, + "step": 496000 + }, + { + "epoch": 2.79, + "learning_rate": 2.212336351933984e-05, + "loss": 0.2051, + "step": 496100 + }, + { + "epoch": 2.79, + "learning_rate": 2.2117742289077388e-05, + "loss": 0.2067, + "step": 496200 + }, + { + "epoch": 2.79, + "learning_rate": 2.2112121058814934e-05, + "loss": 0.2001, + "step": 496300 + }, + { + "epoch": 2.79, + "learning_rate": 2.210649982855248e-05, + "loss": 0.2119, + "step": 496400 + }, + { + "epoch": 2.79, + "learning_rate": 2.2100878598290023e-05, + "loss": 0.2066, + "step": 496500 + }, + { + "epoch": 2.79, + "learning_rate": 2.2095257368027566e-05, + "loss": 0.2061, + "step": 496600 + }, + { + "epoch": 2.79, + "learning_rate": 2.2089636137765113e-05, + "loss": 0.2011, + "step": 496700 + }, + { + "epoch": 2.79, + "learning_rate": 2.2084014907502655e-05, + "loss": 0.2004, + "step": 496800 + }, + { + "epoch": 2.79, + "learning_rate": 2.20783936772402e-05, + "loss": 0.208, + "step": 496900 + }, + { + "epoch": 2.79, + "learning_rate": 2.2072772446977748e-05, + "loss": 0.2076, + "step": 497000 + }, + { + "epoch": 2.79, + "learning_rate": 2.2067151216715294e-05, + "loss": 0.2076, + "step": 497100 + }, + { + "epoch": 2.79, + "learning_rate": 2.2061529986452837e-05, + "loss": 0.203, + "step": 497200 + }, + { + "epoch": 2.8, + "learning_rate": 2.205590875619038e-05, + "loss": 0.2031, + "step": 497300 + }, + { + "epoch": 2.8, + "learning_rate": 2.2050287525927926e-05, + "loss": 0.2032, + "step": 497400 + }, + { + "epoch": 2.8, + "learning_rate": 2.204466629566547e-05, + "loss": 0.2017, + "step": 497500 + }, + { + "epoch": 2.8, + "learning_rate": 2.2039045065403015e-05, + "loss": 0.2021, + "step": 497600 + }, + { + "epoch": 2.8, + "learning_rate": 2.203342383514056e-05, + "loss": 0.2015, + "step": 497700 + }, + { + "epoch": 2.8, + "learning_rate": 2.2027802604878105e-05, + "loss": 0.2087, + "step": 497800 + }, + { + "epoch": 2.8, + "learning_rate": 2.202218137461565e-05, + "loss": 0.2091, + "step": 497900 + }, + { + "epoch": 2.8, + "learning_rate": 2.2016560144353194e-05, + "loss": 0.2066, + "step": 498000 + }, + { + "epoch": 2.8, + "learning_rate": 2.201093891409074e-05, + "loss": 0.2054, + "step": 498100 + }, + { + "epoch": 2.8, + "learning_rate": 2.2005317683828283e-05, + "loss": 0.21, + "step": 498200 + }, + { + "epoch": 2.8, + "learning_rate": 2.199969645356583e-05, + "loss": 0.2018, + "step": 498300 + }, + { + "epoch": 2.8, + "learning_rate": 2.1994075223303372e-05, + "loss": 0.2057, + "step": 498400 + }, + { + "epoch": 2.8, + "learning_rate": 2.1988453993040915e-05, + "loss": 0.2027, + "step": 498500 + }, + { + "epoch": 2.8, + "learning_rate": 2.1982832762778465e-05, + "loss": 0.2052, + "step": 498600 + }, + { + "epoch": 2.8, + "learning_rate": 2.1977211532516008e-05, + "loss": 0.2027, + "step": 498700 + }, + { + "epoch": 2.8, + "learning_rate": 2.1971590302253554e-05, + "loss": 0.1977, + "step": 498800 + }, + { + "epoch": 2.8, + "learning_rate": 2.1965969071991097e-05, + "loss": 0.2027, + "step": 498900 + }, + { + "epoch": 2.8, + "learning_rate": 2.1960347841728643e-05, + "loss": 0.2093, + "step": 499000 + }, + { + "epoch": 2.81, + "learning_rate": 2.1954726611466186e-05, + "loss": 0.2074, + "step": 499100 + }, + { + "epoch": 2.81, + "learning_rate": 2.194910538120373e-05, + "loss": 0.2088, + "step": 499200 + }, + { + "epoch": 2.81, + "learning_rate": 2.1943484150941275e-05, + "loss": 0.2064, + "step": 499300 + }, + { + "epoch": 2.81, + "learning_rate": 2.193786292067882e-05, + "loss": 0.2043, + "step": 499400 + }, + { + "epoch": 2.81, + "learning_rate": 2.1932241690416368e-05, + "loss": 0.2033, + "step": 499500 + }, + { + "epoch": 2.81, + "learning_rate": 2.192662046015391e-05, + "loss": 0.207, + "step": 499600 + }, + { + "epoch": 2.81, + "learning_rate": 2.1920999229891457e-05, + "loss": 0.2093, + "step": 499700 + }, + { + "epoch": 2.81, + "learning_rate": 2.1915377999629e-05, + "loss": 0.2036, + "step": 499800 + }, + { + "epoch": 2.81, + "learning_rate": 2.1909756769366543e-05, + "loss": 0.2023, + "step": 499900 + }, + { + "epoch": 2.81, + "learning_rate": 2.190413553910409e-05, + "loss": 0.2078, + "step": 500000 + }, + { + "epoch": 2.81, + "learning_rate": 2.1898514308841632e-05, + "loss": 0.2007, + "step": 500100 + }, + { + "epoch": 2.81, + "learning_rate": 2.189289307857918e-05, + "loss": 0.2031, + "step": 500200 + }, + { + "epoch": 2.81, + "learning_rate": 2.1887271848316724e-05, + "loss": 0.204, + "step": 500300 + }, + { + "epoch": 2.81, + "learning_rate": 2.188165061805427e-05, + "loss": 0.2007, + "step": 500400 + }, + { + "epoch": 2.81, + "learning_rate": 2.1876029387791813e-05, + "loss": 0.2034, + "step": 500500 + }, + { + "epoch": 2.81, + "learning_rate": 2.1870408157529356e-05, + "loss": 0.2098, + "step": 500600 + }, + { + "epoch": 2.81, + "learning_rate": 2.1864786927266903e-05, + "loss": 0.2037, + "step": 500700 + }, + { + "epoch": 2.82, + "learning_rate": 2.1859221909307074e-05, + "loss": 0.2022, + "step": 500800 + }, + { + "epoch": 2.82, + "learning_rate": 2.1853600679044617e-05, + "loss": 0.2039, + "step": 500900 + }, + { + "epoch": 2.82, + "learning_rate": 2.184797944878216e-05, + "loss": 0.2087, + "step": 501000 + }, + { + "epoch": 2.82, + "learning_rate": 2.1842358218519706e-05, + "loss": 0.1974, + "step": 501100 + }, + { + "epoch": 2.82, + "learning_rate": 2.1836736988257252e-05, + "loss": 0.2075, + "step": 501200 + }, + { + "epoch": 2.82, + "learning_rate": 2.1831115757994798e-05, + "loss": 0.1997, + "step": 501300 + }, + { + "epoch": 2.82, + "learning_rate": 2.182549452773234e-05, + "loss": 0.2109, + "step": 501400 + }, + { + "epoch": 2.82, + "learning_rate": 2.1819873297469887e-05, + "loss": 0.2018, + "step": 501500 + }, + { + "epoch": 2.82, + "learning_rate": 2.181425206720743e-05, + "loss": 0.2026, + "step": 501600 + }, + { + "epoch": 2.82, + "learning_rate": 2.1808630836944973e-05, + "loss": 0.2064, + "step": 501700 + }, + { + "epoch": 2.82, + "learning_rate": 2.180300960668252e-05, + "loss": 0.2027, + "step": 501800 + }, + { + "epoch": 2.82, + "learning_rate": 2.1797388376420062e-05, + "loss": 0.2055, + "step": 501900 + }, + { + "epoch": 2.82, + "learning_rate": 2.179176714615761e-05, + "loss": 0.2056, + "step": 502000 + }, + { + "epoch": 2.82, + "learning_rate": 2.1786145915895155e-05, + "loss": 0.2048, + "step": 502100 + }, + { + "epoch": 2.82, + "learning_rate": 2.17805246856327e-05, + "loss": 0.2035, + "step": 502200 + }, + { + "epoch": 2.82, + "learning_rate": 2.1774903455370244e-05, + "loss": 0.2083, + "step": 502300 + }, + { + "epoch": 2.82, + "learning_rate": 2.1769282225107787e-05, + "loss": 0.2018, + "step": 502400 + }, + { + "epoch": 2.82, + "learning_rate": 2.1763660994845333e-05, + "loss": 0.1995, + "step": 502500 + }, + { + "epoch": 2.83, + "learning_rate": 2.1758039764582876e-05, + "loss": 0.21, + "step": 502600 + }, + { + "epoch": 2.83, + "learning_rate": 2.1752418534320422e-05, + "loss": 0.2053, + "step": 502700 + }, + { + "epoch": 2.83, + "learning_rate": 2.174685351636059e-05, + "loss": 0.2108, + "step": 502800 + }, + { + "epoch": 2.83, + "learning_rate": 2.1741232286098136e-05, + "loss": 0.2015, + "step": 502900 + }, + { + "epoch": 2.83, + "learning_rate": 2.173561105583568e-05, + "loss": 0.2063, + "step": 503000 + }, + { + "epoch": 2.83, + "learning_rate": 2.173004603787585e-05, + "loss": 0.1988, + "step": 503100 + }, + { + "epoch": 2.83, + "learning_rate": 2.1724424807613393e-05, + "loss": 0.2042, + "step": 503200 + }, + { + "epoch": 2.83, + "learning_rate": 2.171880357735094e-05, + "loss": 0.2064, + "step": 503300 + }, + { + "epoch": 2.83, + "learning_rate": 2.1713182347088486e-05, + "loss": 0.2094, + "step": 503400 + }, + { + "epoch": 2.83, + "learning_rate": 2.1707561116826032e-05, + "loss": 0.2062, + "step": 503500 + }, + { + "epoch": 2.83, + "learning_rate": 2.1701939886563575e-05, + "loss": 0.2086, + "step": 503600 + }, + { + "epoch": 2.83, + "learning_rate": 2.1696318656301118e-05, + "loss": 0.2084, + "step": 503700 + }, + { + "epoch": 2.83, + "learning_rate": 2.1690697426038664e-05, + "loss": 0.199, + "step": 503800 + }, + { + "epoch": 2.83, + "learning_rate": 2.1685076195776207e-05, + "loss": 0.2011, + "step": 503900 + }, + { + "epoch": 2.83, + "learning_rate": 2.1679454965513753e-05, + "loss": 0.1982, + "step": 504000 + }, + { + "epoch": 2.83, + "learning_rate": 2.1673833735251296e-05, + "loss": 0.2018, + "step": 504100 + }, + { + "epoch": 2.83, + "learning_rate": 2.1668212504988846e-05, + "loss": 0.2116, + "step": 504200 + }, + { + "epoch": 2.83, + "learning_rate": 2.166259127472639e-05, + "loss": 0.2065, + "step": 504300 + }, + { + "epoch": 2.84, + "learning_rate": 2.165697004446393e-05, + "loss": 0.2061, + "step": 504400 + }, + { + "epoch": 2.84, + "learning_rate": 2.1651348814201478e-05, + "loss": 0.2046, + "step": 504500 + }, + { + "epoch": 2.84, + "learning_rate": 2.164572758393902e-05, + "loss": 0.2061, + "step": 504600 + }, + { + "epoch": 2.84, + "learning_rate": 2.1640106353676567e-05, + "loss": 0.2056, + "step": 504700 + }, + { + "epoch": 2.84, + "learning_rate": 2.163448512341411e-05, + "loss": 0.2113, + "step": 504800 + }, + { + "epoch": 2.84, + "learning_rate": 2.1628863893151656e-05, + "loss": 0.2002, + "step": 504900 + }, + { + "epoch": 2.84, + "learning_rate": 2.1623242662889202e-05, + "loss": 0.2009, + "step": 505000 + }, + { + "epoch": 2.84, + "learning_rate": 2.1617621432626745e-05, + "loss": 0.21, + "step": 505100 + }, + { + "epoch": 2.84, + "learning_rate": 2.161200020236429e-05, + "loss": 0.2026, + "step": 505200 + }, + { + "epoch": 2.84, + "learning_rate": 2.1606378972101834e-05, + "loss": 0.2065, + "step": 505300 + }, + { + "epoch": 2.84, + "learning_rate": 2.160075774183938e-05, + "loss": 0.2052, + "step": 505400 + }, + { + "epoch": 2.84, + "learning_rate": 2.1595136511576924e-05, + "loss": 0.1983, + "step": 505500 + }, + { + "epoch": 2.84, + "learning_rate": 2.158951528131447e-05, + "loss": 0.2045, + "step": 505600 + }, + { + "epoch": 2.84, + "learning_rate": 2.1583894051052013e-05, + "loss": 0.2018, + "step": 505700 + }, + { + "epoch": 2.84, + "learning_rate": 2.157827282078956e-05, + "loss": 0.2027, + "step": 505800 + }, + { + "epoch": 2.84, + "learning_rate": 2.1572651590527105e-05, + "loss": 0.2045, + "step": 505900 + }, + { + "epoch": 2.84, + "learning_rate": 2.1567030360264648e-05, + "loss": 0.201, + "step": 506000 + }, + { + "epoch": 2.84, + "learning_rate": 2.1561409130002194e-05, + "loss": 0.2049, + "step": 506100 + }, + { + "epoch": 2.85, + "learning_rate": 2.1555787899739737e-05, + "loss": 0.207, + "step": 506200 + }, + { + "epoch": 2.85, + "learning_rate": 2.1550166669477284e-05, + "loss": 0.1992, + "step": 506300 + }, + { + "epoch": 2.85, + "learning_rate": 2.1544545439214827e-05, + "loss": 0.1996, + "step": 506400 + }, + { + "epoch": 2.85, + "learning_rate": 2.1538924208952373e-05, + "loss": 0.2072, + "step": 506500 + }, + { + "epoch": 2.85, + "learning_rate": 2.153330297868992e-05, + "loss": 0.205, + "step": 506600 + }, + { + "epoch": 2.85, + "learning_rate": 2.1527681748427462e-05, + "loss": 0.2027, + "step": 506700 + }, + { + "epoch": 2.85, + "learning_rate": 2.1522060518165008e-05, + "loss": 0.2044, + "step": 506800 + }, + { + "epoch": 2.85, + "learning_rate": 2.151643928790255e-05, + "loss": 0.2056, + "step": 506900 + }, + { + "epoch": 2.85, + "learning_rate": 2.1510818057640097e-05, + "loss": 0.203, + "step": 507000 + }, + { + "epoch": 2.85, + "learning_rate": 2.150519682737764e-05, + "loss": 0.2025, + "step": 507100 + }, + { + "epoch": 2.85, + "learning_rate": 2.1499575597115183e-05, + "loss": 0.2056, + "step": 507200 + }, + { + "epoch": 2.85, + "learning_rate": 2.149395436685273e-05, + "loss": 0.2034, + "step": 507300 + }, + { + "epoch": 2.85, + "learning_rate": 2.1488333136590276e-05, + "loss": 0.2046, + "step": 507400 + }, + { + "epoch": 2.85, + "learning_rate": 2.1482711906327822e-05, + "loss": 0.2024, + "step": 507500 + }, + { + "epoch": 2.85, + "learning_rate": 2.1477090676065365e-05, + "loss": 0.2096, + "step": 507600 + }, + { + "epoch": 2.85, + "learning_rate": 2.1471469445802908e-05, + "loss": 0.1999, + "step": 507700 + }, + { + "epoch": 2.85, + "learning_rate": 2.1465848215540454e-05, + "loss": 0.2016, + "step": 507800 + }, + { + "epoch": 2.86, + "learning_rate": 2.1460226985277997e-05, + "loss": 0.2057, + "step": 507900 + }, + { + "epoch": 2.86, + "learning_rate": 2.1454605755015543e-05, + "loss": 0.2018, + "step": 508000 + }, + { + "epoch": 2.86, + "learning_rate": 2.144898452475309e-05, + "loss": 0.2074, + "step": 508100 + }, + { + "epoch": 2.86, + "learning_rate": 2.1443363294490636e-05, + "loss": 0.2042, + "step": 508200 + }, + { + "epoch": 2.86, + "learning_rate": 2.143774206422818e-05, + "loss": 0.2031, + "step": 508300 + }, + { + "epoch": 2.86, + "learning_rate": 2.143212083396572e-05, + "loss": 0.203, + "step": 508400 + }, + { + "epoch": 2.86, + "learning_rate": 2.1426499603703268e-05, + "loss": 0.209, + "step": 508500 + }, + { + "epoch": 2.86, + "learning_rate": 2.142087837344081e-05, + "loss": 0.2038, + "step": 508600 + }, + { + "epoch": 2.86, + "learning_rate": 2.1415257143178357e-05, + "loss": 0.2043, + "step": 508700 + }, + { + "epoch": 2.86, + "learning_rate": 2.14096359129159e-05, + "loss": 0.2034, + "step": 508800 + }, + { + "epoch": 2.86, + "learning_rate": 2.1404014682653446e-05, + "loss": 0.2077, + "step": 508900 + }, + { + "epoch": 2.86, + "learning_rate": 2.1398393452390992e-05, + "loss": 0.2079, + "step": 509000 + }, + { + "epoch": 2.86, + "learning_rate": 2.1392772222128535e-05, + "loss": 0.2047, + "step": 509100 + }, + { + "epoch": 2.86, + "learning_rate": 2.138715099186608e-05, + "loss": 0.2027, + "step": 509200 + }, + { + "epoch": 2.86, + "learning_rate": 2.1381529761603625e-05, + "loss": 0.2001, + "step": 509300 + }, + { + "epoch": 2.86, + "learning_rate": 2.137590853134117e-05, + "loss": 0.2037, + "step": 509400 + }, + { + "epoch": 2.86, + "learning_rate": 2.1370287301078714e-05, + "loss": 0.2091, + "step": 509500 + }, + { + "epoch": 2.86, + "learning_rate": 2.1364722283118885e-05, + "loss": 0.198, + "step": 509600 + }, + { + "epoch": 2.87, + "learning_rate": 2.1359101052856428e-05, + "loss": 0.2041, + "step": 509700 + }, + { + "epoch": 2.87, + "learning_rate": 2.1353479822593974e-05, + "loss": 0.2055, + "step": 509800 + }, + { + "epoch": 2.87, + "learning_rate": 2.1347858592331517e-05, + "loss": 0.2018, + "step": 509900 + }, + { + "epoch": 2.87, + "learning_rate": 2.1342237362069063e-05, + "loss": 0.2046, + "step": 510000 + }, + { + "epoch": 2.87, + "learning_rate": 2.133661613180661e-05, + "loss": 0.2022, + "step": 510100 + }, + { + "epoch": 2.87, + "learning_rate": 2.1330994901544152e-05, + "loss": 0.2096, + "step": 510200 + }, + { + "epoch": 2.87, + "learning_rate": 2.13253736712817e-05, + "loss": 0.2055, + "step": 510300 + }, + { + "epoch": 2.87, + "learning_rate": 2.131975244101924e-05, + "loss": 0.2051, + "step": 510400 + }, + { + "epoch": 2.87, + "learning_rate": 2.1314131210756788e-05, + "loss": 0.2047, + "step": 510500 + }, + { + "epoch": 2.87, + "learning_rate": 2.130850998049433e-05, + "loss": 0.2, + "step": 510600 + }, + { + "epoch": 2.87, + "learning_rate": 2.1302888750231877e-05, + "loss": 0.2065, + "step": 510700 + }, + { + "epoch": 2.87, + "learning_rate": 2.1297267519969423e-05, + "loss": 0.2046, + "step": 510800 + }, + { + "epoch": 2.87, + "learning_rate": 2.1291646289706966e-05, + "loss": 0.2073, + "step": 510900 + }, + { + "epoch": 2.87, + "learning_rate": 2.1286025059444512e-05, + "loss": 0.2045, + "step": 511000 + }, + { + "epoch": 2.87, + "learning_rate": 2.1280403829182055e-05, + "loss": 0.206, + "step": 511100 + }, + { + "epoch": 2.87, + "learning_rate": 2.12747825989196e-05, + "loss": 0.2048, + "step": 511200 + }, + { + "epoch": 2.87, + "learning_rate": 2.1269161368657144e-05, + "loss": 0.2073, + "step": 511300 + }, + { + "epoch": 2.87, + "learning_rate": 2.126354013839469e-05, + "loss": 0.2006, + "step": 511400 + }, + { + "epoch": 2.88, + "learning_rate": 2.1257918908132233e-05, + "loss": 0.204, + "step": 511500 + }, + { + "epoch": 2.88, + "learning_rate": 2.125229767786978e-05, + "loss": 0.2063, + "step": 511600 + }, + { + "epoch": 2.88, + "learning_rate": 2.1246676447607326e-05, + "loss": 0.2074, + "step": 511700 + }, + { + "epoch": 2.88, + "learning_rate": 2.124105521734487e-05, + "loss": 0.2064, + "step": 511800 + }, + { + "epoch": 2.88, + "learning_rate": 2.1235433987082415e-05, + "loss": 0.206, + "step": 511900 + }, + { + "epoch": 2.88, + "learning_rate": 2.1229812756819958e-05, + "loss": 0.2006, + "step": 512000 + }, + { + "epoch": 2.88, + "learning_rate": 2.1224191526557504e-05, + "loss": 0.2047, + "step": 512100 + }, + { + "epoch": 2.88, + "learning_rate": 2.1218570296295047e-05, + "loss": 0.1994, + "step": 512200 + }, + { + "epoch": 2.88, + "learning_rate": 2.1212949066032594e-05, + "loss": 0.2019, + "step": 512300 + }, + { + "epoch": 2.88, + "learning_rate": 2.120732783577014e-05, + "loss": 0.21, + "step": 512400 + }, + { + "epoch": 2.88, + "learning_rate": 2.1201706605507683e-05, + "loss": 0.2048, + "step": 512500 + }, + { + "epoch": 2.88, + "learning_rate": 2.119608537524523e-05, + "loss": 0.1953, + "step": 512600 + }, + { + "epoch": 2.88, + "learning_rate": 2.1190464144982772e-05, + "loss": 0.1997, + "step": 512700 + }, + { + "epoch": 2.88, + "learning_rate": 2.1184842914720315e-05, + "loss": 0.2027, + "step": 512800 + }, + { + "epoch": 2.88, + "learning_rate": 2.117922168445786e-05, + "loss": 0.2033, + "step": 512900 + }, + { + "epoch": 2.88, + "learning_rate": 2.1173600454195404e-05, + "loss": 0.2058, + "step": 513000 + }, + { + "epoch": 2.88, + "learning_rate": 2.116797922393295e-05, + "loss": 0.2077, + "step": 513100 + }, + { + "epoch": 2.88, + "learning_rate": 2.1162357993670496e-05, + "loss": 0.2047, + "step": 513200 + }, + { + "epoch": 2.89, + "learning_rate": 2.1156736763408043e-05, + "loss": 0.1971, + "step": 513300 + }, + { + "epoch": 2.89, + "learning_rate": 2.1151115533145586e-05, + "loss": 0.1979, + "step": 513400 + }, + { + "epoch": 2.89, + "learning_rate": 2.114549430288313e-05, + "loss": 0.1977, + "step": 513500 + }, + { + "epoch": 2.89, + "learning_rate": 2.1139873072620675e-05, + "loss": 0.204, + "step": 513600 + }, + { + "epoch": 2.89, + "learning_rate": 2.1134308054660846e-05, + "loss": 0.2016, + "step": 513700 + }, + { + "epoch": 2.89, + "learning_rate": 2.112868682439839e-05, + "loss": 0.2018, + "step": 513800 + }, + { + "epoch": 2.89, + "learning_rate": 2.112306559413593e-05, + "loss": 0.2012, + "step": 513900 + }, + { + "epoch": 2.89, + "learning_rate": 2.1117444363873478e-05, + "loss": 0.2028, + "step": 514000 + }, + { + "epoch": 2.89, + "learning_rate": 2.111182313361102e-05, + "loss": 0.1987, + "step": 514100 + }, + { + "epoch": 2.89, + "learning_rate": 2.1106201903348567e-05, + "loss": 0.1963, + "step": 514200 + }, + { + "epoch": 2.89, + "learning_rate": 2.1100580673086113e-05, + "loss": 0.2026, + "step": 514300 + }, + { + "epoch": 2.89, + "learning_rate": 2.109495944282366e-05, + "loss": 0.1986, + "step": 514400 + }, + { + "epoch": 2.89, + "learning_rate": 2.1089338212561202e-05, + "loss": 0.2053, + "step": 514500 + }, + { + "epoch": 2.89, + "learning_rate": 2.1083716982298745e-05, + "loss": 0.2082, + "step": 514600 + }, + { + "epoch": 2.89, + "learning_rate": 2.107809575203629e-05, + "loss": 0.2029, + "step": 514700 + }, + { + "epoch": 2.89, + "learning_rate": 2.1072474521773835e-05, + "loss": 0.2044, + "step": 514800 + }, + { + "epoch": 2.89, + "learning_rate": 2.106685329151138e-05, + "loss": 0.2005, + "step": 514900 + }, + { + "epoch": 2.89, + "learning_rate": 2.1061232061248927e-05, + "loss": 0.1996, + "step": 515000 + }, + { + "epoch": 2.9, + "learning_rate": 2.1055610830986473e-05, + "loss": 0.2032, + "step": 515100 + }, + { + "epoch": 2.9, + "learning_rate": 2.1049989600724016e-05, + "loss": 0.2044, + "step": 515200 + }, + { + "epoch": 2.9, + "learning_rate": 2.104436837046156e-05, + "loss": 0.2081, + "step": 515300 + }, + { + "epoch": 2.9, + "learning_rate": 2.1038747140199105e-05, + "loss": 0.1985, + "step": 515400 + }, + { + "epoch": 2.9, + "learning_rate": 2.103312590993665e-05, + "loss": 0.2034, + "step": 515500 + }, + { + "epoch": 2.9, + "learning_rate": 2.1027504679674195e-05, + "loss": 0.2034, + "step": 515600 + }, + { + "epoch": 2.9, + "learning_rate": 2.1021883449411737e-05, + "loss": 0.1969, + "step": 515700 + }, + { + "epoch": 2.9, + "learning_rate": 2.1016262219149284e-05, + "loss": 0.2015, + "step": 515800 + }, + { + "epoch": 2.9, + "learning_rate": 2.101069720118945e-05, + "loss": 0.1985, + "step": 515900 + }, + { + "epoch": 2.9, + "learning_rate": 2.1005075970926998e-05, + "loss": 0.2022, + "step": 516000 + }, + { + "epoch": 2.9, + "learning_rate": 2.0999454740664544e-05, + "loss": 0.2062, + "step": 516100 + }, + { + "epoch": 2.9, + "learning_rate": 2.099383351040209e-05, + "loss": 0.1992, + "step": 516200 + }, + { + "epoch": 2.9, + "learning_rate": 2.0988212280139633e-05, + "loss": 0.2095, + "step": 516300 + }, + { + "epoch": 2.9, + "learning_rate": 2.0982591049877176e-05, + "loss": 0.2018, + "step": 516400 + }, + { + "epoch": 2.9, + "learning_rate": 2.0976969819614722e-05, + "loss": 0.2002, + "step": 516500 + }, + { + "epoch": 2.9, + "learning_rate": 2.0971348589352265e-05, + "loss": 0.1942, + "step": 516600 + }, + { + "epoch": 2.9, + "learning_rate": 2.096572735908981e-05, + "loss": 0.1996, + "step": 516700 + }, + { + "epoch": 2.91, + "learning_rate": 2.0960106128827354e-05, + "loss": 0.1993, + "step": 516800 + }, + { + "epoch": 2.91, + "learning_rate": 2.09544848985649e-05, + "loss": 0.2, + "step": 516900 + }, + { + "epoch": 2.91, + "learning_rate": 2.0948863668302447e-05, + "loss": 0.2008, + "step": 517000 + }, + { + "epoch": 2.91, + "learning_rate": 2.0943298650342615e-05, + "loss": 0.2061, + "step": 517100 + }, + { + "epoch": 2.91, + "learning_rate": 2.093767742008016e-05, + "loss": 0.1981, + "step": 517200 + }, + { + "epoch": 2.91, + "learning_rate": 2.0932056189817707e-05, + "loss": 0.2066, + "step": 517300 + }, + { + "epoch": 2.91, + "learning_rate": 2.092643495955525e-05, + "loss": 0.2039, + "step": 517400 + }, + { + "epoch": 2.91, + "learning_rate": 2.0920813729292793e-05, + "loss": 0.2036, + "step": 517500 + }, + { + "epoch": 2.91, + "learning_rate": 2.091519249903034e-05, + "loss": 0.2025, + "step": 517600 + }, + { + "epoch": 2.91, + "learning_rate": 2.0909571268767882e-05, + "loss": 0.2037, + "step": 517700 + }, + { + "epoch": 2.91, + "learning_rate": 2.0903950038505428e-05, + "loss": 0.1987, + "step": 517800 + }, + { + "epoch": 2.91, + "learning_rate": 2.0898328808242975e-05, + "loss": 0.203, + "step": 517900 + }, + { + "epoch": 2.91, + "learning_rate": 2.0892707577980517e-05, + "loss": 0.2049, + "step": 518000 + }, + { + "epoch": 2.91, + "learning_rate": 2.0887086347718064e-05, + "loss": 0.2056, + "step": 518100 + }, + { + "epoch": 2.91, + "learning_rate": 2.0881465117455607e-05, + "loss": 0.2021, + "step": 518200 + }, + { + "epoch": 2.91, + "learning_rate": 2.0875843887193153e-05, + "loss": 0.2019, + "step": 518300 + }, + { + "epoch": 2.91, + "learning_rate": 2.0870222656930696e-05, + "loss": 0.203, + "step": 518400 + }, + { + "epoch": 2.91, + "learning_rate": 2.0864601426668242e-05, + "loss": 0.2053, + "step": 518500 + }, + { + "epoch": 2.92, + "learning_rate": 2.085903640870841e-05, + "loss": 0.1974, + "step": 518600 + }, + { + "epoch": 2.92, + "learning_rate": 2.0853415178445956e-05, + "loss": 0.2042, + "step": 518700 + }, + { + "epoch": 2.92, + "learning_rate": 2.08477939481835e-05, + "loss": 0.1945, + "step": 518800 + }, + { + "epoch": 2.92, + "learning_rate": 2.0842172717921045e-05, + "loss": 0.2026, + "step": 518900 + }, + { + "epoch": 2.92, + "learning_rate": 2.083655148765859e-05, + "loss": 0.2038, + "step": 519000 + }, + { + "epoch": 2.92, + "learning_rate": 2.0830930257396134e-05, + "loss": 0.2042, + "step": 519100 + }, + { + "epoch": 2.92, + "learning_rate": 2.082530902713368e-05, + "loss": 0.2014, + "step": 519200 + }, + { + "epoch": 2.92, + "learning_rate": 2.0819687796871223e-05, + "loss": 0.2057, + "step": 519300 + }, + { + "epoch": 2.92, + "learning_rate": 2.081406656660877e-05, + "loss": 0.2081, + "step": 519400 + }, + { + "epoch": 2.92, + "learning_rate": 2.0808445336346313e-05, + "loss": 0.2031, + "step": 519500 + }, + { + "epoch": 2.92, + "learning_rate": 2.080282410608386e-05, + "loss": 0.2097, + "step": 519600 + }, + { + "epoch": 2.92, + "learning_rate": 2.0797202875821402e-05, + "loss": 0.2021, + "step": 519700 + }, + { + "epoch": 2.92, + "learning_rate": 2.0791581645558948e-05, + "loss": 0.1986, + "step": 519800 + }, + { + "epoch": 2.92, + "learning_rate": 2.0785960415296494e-05, + "loss": 0.2025, + "step": 519900 + }, + { + "epoch": 2.92, + "learning_rate": 2.0780339185034037e-05, + "loss": 0.2019, + "step": 520000 + }, + { + "epoch": 2.92, + "learning_rate": 2.0774717954771584e-05, + "loss": 0.2088, + "step": 520100 + }, + { + "epoch": 2.92, + "learning_rate": 2.0769096724509126e-05, + "loss": 0.2065, + "step": 520200 + }, + { + "epoch": 2.92, + "learning_rate": 2.0763475494246673e-05, + "loss": 0.2036, + "step": 520300 + }, + { + "epoch": 2.93, + "learning_rate": 2.0757854263984216e-05, + "loss": 0.2006, + "step": 520400 + }, + { + "epoch": 2.93, + "learning_rate": 2.0752233033721762e-05, + "loss": 0.2001, + "step": 520500 + }, + { + "epoch": 2.93, + "learning_rate": 2.0746611803459308e-05, + "loss": 0.2018, + "step": 520600 + }, + { + "epoch": 2.93, + "learning_rate": 2.074099057319685e-05, + "loss": 0.2026, + "step": 520700 + }, + { + "epoch": 2.93, + "learning_rate": 2.0735369342934397e-05, + "loss": 0.2019, + "step": 520800 + }, + { + "epoch": 2.93, + "learning_rate": 2.072974811267194e-05, + "loss": 0.198, + "step": 520900 + }, + { + "epoch": 2.93, + "learning_rate": 2.0724126882409486e-05, + "loss": 0.2106, + "step": 521000 + }, + { + "epoch": 2.93, + "learning_rate": 2.071850565214703e-05, + "loss": 0.1999, + "step": 521100 + }, + { + "epoch": 2.93, + "learning_rate": 2.0712884421884572e-05, + "loss": 0.2031, + "step": 521200 + }, + { + "epoch": 2.93, + "learning_rate": 2.070726319162212e-05, + "loss": 0.2016, + "step": 521300 + }, + { + "epoch": 2.93, + "learning_rate": 2.0701641961359665e-05, + "loss": 0.2008, + "step": 521400 + }, + { + "epoch": 2.93, + "learning_rate": 2.069602073109721e-05, + "loss": 0.2015, + "step": 521500 + }, + { + "epoch": 2.93, + "learning_rate": 2.0690399500834754e-05, + "loss": 0.2009, + "step": 521600 + }, + { + "epoch": 2.93, + "learning_rate": 2.06847782705723e-05, + "loss": 0.2046, + "step": 521700 + }, + { + "epoch": 2.93, + "learning_rate": 2.0679157040309843e-05, + "loss": 0.1961, + "step": 521800 + }, + { + "epoch": 2.93, + "learning_rate": 2.0673535810047386e-05, + "loss": 0.2065, + "step": 521900 + }, + { + "epoch": 2.93, + "learning_rate": 2.0667914579784932e-05, + "loss": 0.1993, + "step": 522000 + }, + { + "epoch": 2.93, + "learning_rate": 2.0662293349522475e-05, + "loss": 0.1947, + "step": 522100 + }, + { + "epoch": 2.94, + "learning_rate": 2.0656672119260025e-05, + "loss": 0.2015, + "step": 522200 + }, + { + "epoch": 2.94, + "learning_rate": 2.065110710130019e-05, + "loss": 0.2056, + "step": 522300 + }, + { + "epoch": 2.94, + "learning_rate": 2.0645485871037735e-05, + "loss": 0.2038, + "step": 522400 + }, + { + "epoch": 2.94, + "learning_rate": 2.063986464077528e-05, + "loss": 0.2018, + "step": 522500 + }, + { + "epoch": 2.94, + "learning_rate": 2.0634243410512828e-05, + "loss": 0.2024, + "step": 522600 + }, + { + "epoch": 2.94, + "learning_rate": 2.062862218025037e-05, + "loss": 0.2018, + "step": 522700 + }, + { + "epoch": 2.94, + "learning_rate": 2.0623000949987914e-05, + "loss": 0.2045, + "step": 522800 + }, + { + "epoch": 2.94, + "learning_rate": 2.061737971972546e-05, + "loss": 0.2015, + "step": 522900 + }, + { + "epoch": 2.94, + "learning_rate": 2.0611758489463003e-05, + "loss": 0.2002, + "step": 523000 + }, + { + "epoch": 2.94, + "learning_rate": 2.060613725920055e-05, + "loss": 0.2021, + "step": 523100 + }, + { + "epoch": 2.94, + "learning_rate": 2.0600516028938095e-05, + "loss": 0.203, + "step": 523200 + }, + { + "epoch": 2.94, + "learning_rate": 2.059489479867564e-05, + "loss": 0.2013, + "step": 523300 + }, + { + "epoch": 2.94, + "learning_rate": 2.0589273568413185e-05, + "loss": 0.2034, + "step": 523400 + }, + { + "epoch": 2.94, + "learning_rate": 2.0583652338150727e-05, + "loss": 0.2013, + "step": 523500 + }, + { + "epoch": 2.94, + "learning_rate": 2.0578031107888274e-05, + "loss": 0.2018, + "step": 523600 + }, + { + "epoch": 2.94, + "learning_rate": 2.0572409877625817e-05, + "loss": 0.1984, + "step": 523700 + }, + { + "epoch": 2.94, + "learning_rate": 2.0566788647363363e-05, + "loss": 0.2017, + "step": 523800 + }, + { + "epoch": 2.94, + "learning_rate": 2.0561167417100906e-05, + "loss": 0.2014, + "step": 523900 + }, + { + "epoch": 2.95, + "learning_rate": 2.0555546186838452e-05, + "loss": 0.201, + "step": 524000 + }, + { + "epoch": 2.95, + "learning_rate": 2.0549924956576e-05, + "loss": 0.2024, + "step": 524100 + }, + { + "epoch": 2.95, + "learning_rate": 2.054430372631354e-05, + "loss": 0.2091, + "step": 524200 + }, + { + "epoch": 2.95, + "learning_rate": 2.0538682496051088e-05, + "loss": 0.1988, + "step": 524300 + }, + { + "epoch": 2.95, + "learning_rate": 2.053311747809126e-05, + "loss": 0.1992, + "step": 524400 + }, + { + "epoch": 2.95, + "learning_rate": 2.05274962478288e-05, + "loss": 0.2016, + "step": 524500 + }, + { + "epoch": 2.95, + "learning_rate": 2.0521875017566344e-05, + "loss": 0.2046, + "step": 524600 + }, + { + "epoch": 2.95, + "learning_rate": 2.051625378730389e-05, + "loss": 0.1969, + "step": 524700 + }, + { + "epoch": 2.95, + "learning_rate": 2.0510632557041433e-05, + "loss": 0.2045, + "step": 524800 + }, + { + "epoch": 2.95, + "learning_rate": 2.050501132677898e-05, + "loss": 0.2002, + "step": 524900 + }, + { + "epoch": 2.95, + "learning_rate": 2.0499390096516523e-05, + "loss": 0.2046, + "step": 525000 + }, + { + "epoch": 2.95, + "learning_rate": 2.049376886625407e-05, + "loss": 0.1939, + "step": 525100 + }, + { + "epoch": 2.95, + "learning_rate": 2.0488147635991615e-05, + "loss": 0.2033, + "step": 525200 + }, + { + "epoch": 2.95, + "learning_rate": 2.0482526405729158e-05, + "loss": 0.1999, + "step": 525300 + }, + { + "epoch": 2.95, + "learning_rate": 2.0476905175466704e-05, + "loss": 0.2022, + "step": 525400 + }, + { + "epoch": 2.95, + "learning_rate": 2.0471283945204247e-05, + "loss": 0.2013, + "step": 525500 + }, + { + "epoch": 2.95, + "learning_rate": 2.0465662714941794e-05, + "loss": 0.1994, + "step": 525600 + }, + { + "epoch": 2.96, + "learning_rate": 2.0460041484679336e-05, + "loss": 0.2009, + "step": 525700 + }, + { + "epoch": 2.96, + "learning_rate": 2.0454420254416883e-05, + "loss": 0.1992, + "step": 525800 + }, + { + "epoch": 2.96, + "learning_rate": 2.044879902415443e-05, + "loss": 0.2095, + "step": 525900 + }, + { + "epoch": 2.96, + "learning_rate": 2.0443177793891972e-05, + "loss": 0.2009, + "step": 526000 + }, + { + "epoch": 2.96, + "learning_rate": 2.0437556563629518e-05, + "loss": 0.2051, + "step": 526100 + }, + { + "epoch": 2.96, + "learning_rate": 2.043193533336706e-05, + "loss": 0.1965, + "step": 526200 + }, + { + "epoch": 2.96, + "learning_rate": 2.0426314103104607e-05, + "loss": 0.2029, + "step": 526300 + }, + { + "epoch": 2.96, + "learning_rate": 2.042069287284215e-05, + "loss": 0.1935, + "step": 526400 + }, + { + "epoch": 2.96, + "learning_rate": 2.0415071642579696e-05, + "loss": 0.2005, + "step": 526500 + }, + { + "epoch": 2.96, + "learning_rate": 2.040945041231724e-05, + "loss": 0.2018, + "step": 526600 + }, + { + "epoch": 2.96, + "learning_rate": 2.0403829182054786e-05, + "loss": 0.1969, + "step": 526700 + }, + { + "epoch": 2.96, + "learning_rate": 2.0398207951792332e-05, + "loss": 0.2069, + "step": 526800 + }, + { + "epoch": 2.96, + "learning_rate": 2.03926429338325e-05, + "loss": 0.2031, + "step": 526900 + }, + { + "epoch": 2.96, + "learning_rate": 2.0387021703570046e-05, + "loss": 0.2001, + "step": 527000 + }, + { + "epoch": 2.96, + "learning_rate": 2.038140047330759e-05, + "loss": 0.2014, + "step": 527100 + }, + { + "epoch": 2.96, + "learning_rate": 2.0375779243045135e-05, + "loss": 0.2062, + "step": 527200 + }, + { + "epoch": 2.96, + "learning_rate": 2.0370158012782678e-05, + "loss": 0.2047, + "step": 527300 + }, + { + "epoch": 2.96, + "learning_rate": 2.0364536782520224e-05, + "loss": 0.1988, + "step": 527400 + }, + { + "epoch": 2.97, + "learning_rate": 2.0358915552257767e-05, + "loss": 0.2047, + "step": 527500 + }, + { + "epoch": 2.97, + "learning_rate": 2.035329432199531e-05, + "loss": 0.1956, + "step": 527600 + }, + { + "epoch": 2.97, + "learning_rate": 2.0347673091732856e-05, + "loss": 0.204, + "step": 527700 + }, + { + "epoch": 2.97, + "learning_rate": 2.0342051861470402e-05, + "loss": 0.1973, + "step": 527800 + }, + { + "epoch": 2.97, + "learning_rate": 2.033643063120795e-05, + "loss": 0.1975, + "step": 527900 + }, + { + "epoch": 2.97, + "learning_rate": 2.033080940094549e-05, + "loss": 0.1988, + "step": 528000 + }, + { + "epoch": 2.97, + "learning_rate": 2.0325188170683038e-05, + "loss": 0.1986, + "step": 528100 + }, + { + "epoch": 2.97, + "learning_rate": 2.031956694042058e-05, + "loss": 0.2033, + "step": 528200 + }, + { + "epoch": 2.97, + "learning_rate": 2.0313945710158124e-05, + "loss": 0.2007, + "step": 528300 + }, + { + "epoch": 2.97, + "learning_rate": 2.030832447989567e-05, + "loss": 0.2025, + "step": 528400 + }, + { + "epoch": 2.97, + "learning_rate": 2.0302703249633216e-05, + "loss": 0.201, + "step": 528500 + }, + { + "epoch": 2.97, + "learning_rate": 2.0297082019370763e-05, + "loss": 0.1986, + "step": 528600 + }, + { + "epoch": 2.97, + "learning_rate": 2.0291460789108305e-05, + "loss": 0.198, + "step": 528700 + }, + { + "epoch": 2.97, + "learning_rate": 2.0285839558845852e-05, + "loss": 0.1944, + "step": 528800 + }, + { + "epoch": 2.97, + "learning_rate": 2.0280218328583395e-05, + "loss": 0.2019, + "step": 528900 + }, + { + "epoch": 2.97, + "learning_rate": 2.0274597098320937e-05, + "loss": 0.2053, + "step": 529000 + }, + { + "epoch": 2.97, + "learning_rate": 2.0268975868058484e-05, + "loss": 0.2017, + "step": 529100 + }, + { + "epoch": 2.97, + "learning_rate": 2.0263354637796027e-05, + "loss": 0.2001, + "step": 529200 + }, + { + "epoch": 2.98, + "learning_rate": 2.0257733407533573e-05, + "loss": 0.2023, + "step": 529300 + }, + { + "epoch": 2.98, + "learning_rate": 2.025211217727112e-05, + "loss": 0.2102, + "step": 529400 + }, + { + "epoch": 2.98, + "learning_rate": 2.0246490947008665e-05, + "loss": 0.2049, + "step": 529500 + }, + { + "epoch": 2.98, + "learning_rate": 2.024086971674621e-05, + "loss": 0.2079, + "step": 529600 + }, + { + "epoch": 2.98, + "learning_rate": 2.023530469878638e-05, + "loss": 0.2022, + "step": 529700 + }, + { + "epoch": 2.98, + "learning_rate": 2.0229683468523922e-05, + "loss": 0.2034, + "step": 529800 + }, + { + "epoch": 2.98, + "learning_rate": 2.022406223826147e-05, + "loss": 0.1966, + "step": 529900 + }, + { + "epoch": 2.98, + "learning_rate": 2.021844100799901e-05, + "loss": 0.2044, + "step": 530000 + }, + { + "epoch": 2.98, + "learning_rate": 2.0212819777736554e-05, + "loss": 0.2019, + "step": 530100 + }, + { + "epoch": 2.98, + "learning_rate": 2.02071985474741e-05, + "loss": 0.2035, + "step": 530200 + }, + { + "epoch": 2.98, + "learning_rate": 2.0201577317211644e-05, + "loss": 0.1978, + "step": 530300 + }, + { + "epoch": 2.98, + "learning_rate": 2.0195956086949193e-05, + "loss": 0.1966, + "step": 530400 + }, + { + "epoch": 2.98, + "learning_rate": 2.0190334856686736e-05, + "loss": 0.1989, + "step": 530500 + }, + { + "epoch": 2.98, + "learning_rate": 2.0184713626424282e-05, + "loss": 0.2027, + "step": 530600 + }, + { + "epoch": 2.98, + "learning_rate": 2.0179092396161825e-05, + "loss": 0.2092, + "step": 530700 + }, + { + "epoch": 2.98, + "learning_rate": 2.0173471165899368e-05, + "loss": 0.2047, + "step": 530800 + }, + { + "epoch": 2.98, + "learning_rate": 2.0167849935636914e-05, + "loss": 0.198, + "step": 530900 + }, + { + "epoch": 2.98, + "learning_rate": 2.0162228705374457e-05, + "loss": 0.2015, + "step": 531000 + }, + { + "epoch": 2.99, + "learning_rate": 2.0156607475112004e-05, + "loss": 0.1997, + "step": 531100 + }, + { + "epoch": 2.99, + "learning_rate": 2.015098624484955e-05, + "loss": 0.2018, + "step": 531200 + }, + { + "epoch": 2.99, + "learning_rate": 2.0145365014587096e-05, + "loss": 0.2055, + "step": 531300 + }, + { + "epoch": 2.99, + "learning_rate": 2.013974378432464e-05, + "loss": 0.1978, + "step": 531400 + }, + { + "epoch": 2.99, + "learning_rate": 2.0134122554062182e-05, + "loss": 0.2091, + "step": 531500 + }, + { + "epoch": 2.99, + "learning_rate": 2.0128501323799728e-05, + "loss": 0.2076, + "step": 531600 + }, + { + "epoch": 2.99, + "learning_rate": 2.012288009353727e-05, + "loss": 0.197, + "step": 531700 + }, + { + "epoch": 2.99, + "learning_rate": 2.0117258863274817e-05, + "loss": 0.2009, + "step": 531800 + }, + { + "epoch": 2.99, + "learning_rate": 2.011163763301236e-05, + "loss": 0.2034, + "step": 531900 + }, + { + "epoch": 2.99, + "learning_rate": 2.0106016402749906e-05, + "loss": 0.2015, + "step": 532000 + }, + { + "epoch": 2.99, + "learning_rate": 2.0100395172487453e-05, + "loss": 0.1995, + "step": 532100 + }, + { + "epoch": 2.99, + "learning_rate": 2.0094773942224996e-05, + "loss": 0.1945, + "step": 532200 + }, + { + "epoch": 2.99, + "learning_rate": 2.0089152711962542e-05, + "loss": 0.1973, + "step": 532300 + }, + { + "epoch": 2.99, + "learning_rate": 2.0083531481700085e-05, + "loss": 0.2011, + "step": 532400 + }, + { + "epoch": 2.99, + "learning_rate": 2.007791025143763e-05, + "loss": 0.2032, + "step": 532500 + }, + { + "epoch": 2.99, + "learning_rate": 2.0072289021175174e-05, + "loss": 0.1971, + "step": 532600 + }, + { + "epoch": 2.99, + "learning_rate": 2.006666779091272e-05, + "loss": 0.206, + "step": 532700 + }, + { + "epoch": 2.99, + "learning_rate": 2.0061046560650267e-05, + "loss": 0.2048, + "step": 532800 + }, + { + "epoch": 3.0, + "learning_rate": 2.005542533038781e-05, + "loss": 0.203, + "step": 532900 + }, + { + "epoch": 3.0, + "learning_rate": 2.0049804100125356e-05, + "loss": 0.1993, + "step": 533000 + }, + { + "epoch": 3.0, + "learning_rate": 2.00441828698629e-05, + "loss": 0.1955, + "step": 533100 + }, + { + "epoch": 3.0, + "learning_rate": 2.0038561639600445e-05, + "loss": 0.2115, + "step": 533200 + }, + { + "epoch": 3.0, + "learning_rate": 2.0032940409337988e-05, + "loss": 0.2027, + "step": 533300 + }, + { + "epoch": 3.0, + "learning_rate": 2.002731917907553e-05, + "loss": 0.2022, + "step": 533400 + }, + { + "epoch": 3.0, + "learning_rate": 2.0021697948813077e-05, + "loss": 0.2045, + "step": 533500 + }, + { + "epoch": 3.0, + "learning_rate": 2.0016076718550623e-05, + "loss": 0.2043, + "step": 533600 + }, + { + "epoch": 3.0, + "eval_bleu": 77.0962, + "eval_cer": 2.307, + "eval_chrF": 95.38492169626633, + "eval_gen_len": 16.7681, + "eval_loss": 0.491799920797348, + "eval_runtime": 7195.3567, + "eval_samples_per_second": 34.745, + "eval_steps_per_second": 0.543, + "eval_wer": 12.7609, + "step": 533691 + }, + { + "epoch": 3.0, + "learning_rate": 2.001045548828817e-05, + "loss": 0.2026, + "step": 533700 + }, + { + "epoch": 3.0, + "learning_rate": 2.0004834258025712e-05, + "loss": 0.191, + "step": 533800 + }, + { + "epoch": 3.0, + "learning_rate": 1.999921302776326e-05, + "loss": 0.1869, + "step": 533900 + }, + { + "epoch": 3.0, + "learning_rate": 1.99935917975008e-05, + "loss": 0.1822, + "step": 534000 + }, + { + "epoch": 3.0, + "learning_rate": 1.9987970567238344e-05, + "loss": 0.1805, + "step": 534100 + }, + { + "epoch": 3.0, + "learning_rate": 1.998234933697589e-05, + "loss": 0.1901, + "step": 534200 + }, + { + "epoch": 3.0, + "learning_rate": 1.9976728106713437e-05, + "loss": 0.1843, + "step": 534300 + }, + { + "epoch": 3.0, + "learning_rate": 1.9971106876450983e-05, + "loss": 0.1858, + "step": 534400 + }, + { + "epoch": 3.0, + "learning_rate": 1.9965485646188526e-05, + "loss": 0.1856, + "step": 534500 + }, + { + "epoch": 3.01, + "learning_rate": 1.9959864415926072e-05, + "loss": 0.1902, + "step": 534600 + }, + { + "epoch": 3.01, + "learning_rate": 1.9954243185663615e-05, + "loss": 0.189, + "step": 534700 + }, + { + "epoch": 3.01, + "learning_rate": 1.9948621955401158e-05, + "loss": 0.1836, + "step": 534800 + }, + { + "epoch": 3.01, + "learning_rate": 1.9943000725138704e-05, + "loss": 0.1848, + "step": 534900 + }, + { + "epoch": 3.01, + "learning_rate": 1.9937379494876247e-05, + "loss": 0.1872, + "step": 535000 + }, + { + "epoch": 3.01, + "learning_rate": 1.9931758264613794e-05, + "loss": 0.1825, + "step": 535100 + }, + { + "epoch": 3.01, + "learning_rate": 1.992613703435134e-05, + "loss": 0.1763, + "step": 535200 + }, + { + "epoch": 3.01, + "learning_rate": 1.9920515804088886e-05, + "loss": 0.1904, + "step": 535300 + }, + { + "epoch": 3.01, + "learning_rate": 1.991489457382643e-05, + "loss": 0.1833, + "step": 535400 + }, + { + "epoch": 3.01, + "learning_rate": 1.9909273343563972e-05, + "loss": 0.1832, + "step": 535500 + }, + { + "epoch": 3.01, + "learning_rate": 1.9903652113301518e-05, + "loss": 0.1854, + "step": 535600 + }, + { + "epoch": 3.01, + "learning_rate": 1.989803088303906e-05, + "loss": 0.1907, + "step": 535700 + }, + { + "epoch": 3.01, + "learning_rate": 1.9892409652776607e-05, + "loss": 0.1883, + "step": 535800 + }, + { + "epoch": 3.01, + "learning_rate": 1.9886844634816775e-05, + "loss": 0.1766, + "step": 535900 + }, + { + "epoch": 3.01, + "learning_rate": 1.988122340455432e-05, + "loss": 0.1868, + "step": 536000 + }, + { + "epoch": 3.01, + "learning_rate": 1.9875602174291864e-05, + "loss": 0.1894, + "step": 536100 + }, + { + "epoch": 3.01, + "learning_rate": 1.986998094402941e-05, + "loss": 0.1837, + "step": 536200 + }, + { + "epoch": 3.01, + "learning_rate": 1.9864359713766957e-05, + "loss": 0.182, + "step": 536300 + }, + { + "epoch": 3.02, + "learning_rate": 1.9858738483504503e-05, + "loss": 0.1806, + "step": 536400 + }, + { + "epoch": 3.02, + "learning_rate": 1.9853117253242046e-05, + "loss": 0.1889, + "step": 536500 + }, + { + "epoch": 3.02, + "learning_rate": 1.984749602297959e-05, + "loss": 0.1811, + "step": 536600 + }, + { + "epoch": 3.02, + "learning_rate": 1.9841874792717135e-05, + "loss": 0.1867, + "step": 536700 + }, + { + "epoch": 3.02, + "learning_rate": 1.9836253562454678e-05, + "loss": 0.1883, + "step": 536800 + }, + { + "epoch": 3.02, + "learning_rate": 1.9830632332192224e-05, + "loss": 0.1781, + "step": 536900 + }, + { + "epoch": 3.02, + "learning_rate": 1.982501110192977e-05, + "loss": 0.1898, + "step": 537000 + }, + { + "epoch": 3.02, + "learning_rate": 1.9819389871667317e-05, + "loss": 0.1877, + "step": 537100 + }, + { + "epoch": 3.02, + "learning_rate": 1.981376864140486e-05, + "loss": 0.1797, + "step": 537200 + }, + { + "epoch": 3.02, + "learning_rate": 1.9808147411142403e-05, + "loss": 0.1896, + "step": 537300 + }, + { + "epoch": 3.02, + "learning_rate": 1.980252618087995e-05, + "loss": 0.1848, + "step": 537400 + }, + { + "epoch": 3.02, + "learning_rate": 1.9796904950617492e-05, + "loss": 0.1905, + "step": 537500 + }, + { + "epoch": 3.02, + "learning_rate": 1.9791283720355038e-05, + "loss": 0.1819, + "step": 537600 + }, + { + "epoch": 3.02, + "learning_rate": 1.978566249009258e-05, + "loss": 0.1879, + "step": 537700 + }, + { + "epoch": 3.02, + "learning_rate": 1.9780041259830127e-05, + "loss": 0.1805, + "step": 537800 + }, + { + "epoch": 3.02, + "learning_rate": 1.9774420029567673e-05, + "loss": 0.1817, + "step": 537900 + }, + { + "epoch": 3.02, + "learning_rate": 1.9768798799305216e-05, + "loss": 0.1916, + "step": 538000 + }, + { + "epoch": 3.02, + "learning_rate": 1.9763177569042763e-05, + "loss": 0.1881, + "step": 538100 + }, + { + "epoch": 3.03, + "learning_rate": 1.975761255108293e-05, + "loss": 0.1836, + "step": 538200 + }, + { + "epoch": 3.03, + "learning_rate": 1.97520475331231e-05, + "loss": 0.1816, + "step": 538300 + }, + { + "epoch": 3.03, + "learning_rate": 1.9746426302860648e-05, + "loss": 0.1832, + "step": 538400 + }, + { + "epoch": 3.03, + "learning_rate": 1.974080507259819e-05, + "loss": 0.1855, + "step": 538500 + }, + { + "epoch": 3.03, + "learning_rate": 1.9735183842335733e-05, + "loss": 0.1866, + "step": 538600 + }, + { + "epoch": 3.03, + "learning_rate": 1.972956261207328e-05, + "loss": 0.1856, + "step": 538700 + }, + { + "epoch": 3.03, + "learning_rate": 1.9723941381810823e-05, + "loss": 0.1846, + "step": 538800 + }, + { + "epoch": 3.03, + "learning_rate": 1.971832015154837e-05, + "loss": 0.1845, + "step": 538900 + }, + { + "epoch": 3.03, + "learning_rate": 1.9712698921285912e-05, + "loss": 0.1863, + "step": 539000 + }, + { + "epoch": 3.03, + "learning_rate": 1.9707077691023458e-05, + "loss": 0.1856, + "step": 539100 + }, + { + "epoch": 3.03, + "learning_rate": 1.9701456460761004e-05, + "loss": 0.1813, + "step": 539200 + }, + { + "epoch": 3.03, + "learning_rate": 1.9695835230498547e-05, + "loss": 0.1862, + "step": 539300 + }, + { + "epoch": 3.03, + "learning_rate": 1.9690214000236093e-05, + "loss": 0.1869, + "step": 539400 + }, + { + "epoch": 3.03, + "learning_rate": 1.9684592769973636e-05, + "loss": 0.1874, + "step": 539500 + }, + { + "epoch": 3.03, + "learning_rate": 1.9678971539711183e-05, + "loss": 0.1907, + "step": 539600 + }, + { + "epoch": 3.03, + "learning_rate": 1.9673350309448725e-05, + "loss": 0.19, + "step": 539700 + }, + { + "epoch": 3.03, + "learning_rate": 1.9667729079186272e-05, + "loss": 0.1875, + "step": 539800 + }, + { + "epoch": 3.03, + "learning_rate": 1.9662107848923818e-05, + "loss": 0.1842, + "step": 539900 + }, + { + "epoch": 3.04, + "learning_rate": 1.965648661866136e-05, + "loss": 0.1914, + "step": 540000 + }, + { + "epoch": 3.04, + "learning_rate": 1.9650865388398907e-05, + "loss": 0.186, + "step": 540100 + }, + { + "epoch": 3.04, + "learning_rate": 1.964524415813645e-05, + "loss": 0.1886, + "step": 540200 + }, + { + "epoch": 3.04, + "learning_rate": 1.9639622927873996e-05, + "loss": 0.1832, + "step": 540300 + }, + { + "epoch": 3.04, + "learning_rate": 1.963400169761154e-05, + "loss": 0.1872, + "step": 540400 + }, + { + "epoch": 3.04, + "learning_rate": 1.9628380467349086e-05, + "loss": 0.189, + "step": 540500 + }, + { + "epoch": 3.04, + "learning_rate": 1.962275923708663e-05, + "loss": 0.1847, + "step": 540600 + }, + { + "epoch": 3.04, + "learning_rate": 1.9617138006824175e-05, + "loss": 0.1868, + "step": 540700 + }, + { + "epoch": 3.04, + "learning_rate": 1.961151677656172e-05, + "loss": 0.1856, + "step": 540800 + }, + { + "epoch": 3.04, + "learning_rate": 1.9605895546299264e-05, + "loss": 0.1882, + "step": 540900 + }, + { + "epoch": 3.04, + "learning_rate": 1.960027431603681e-05, + "loss": 0.184, + "step": 541000 + }, + { + "epoch": 3.04, + "learning_rate": 1.9594653085774353e-05, + "loss": 0.1916, + "step": 541100 + }, + { + "epoch": 3.04, + "learning_rate": 1.95890318555119e-05, + "loss": 0.1896, + "step": 541200 + }, + { + "epoch": 3.04, + "learning_rate": 1.9583410625249442e-05, + "loss": 0.1859, + "step": 541300 + }, + { + "epoch": 3.04, + "learning_rate": 1.9577789394986985e-05, + "loss": 0.1809, + "step": 541400 + }, + { + "epoch": 3.04, + "learning_rate": 1.9572168164724535e-05, + "loss": 0.1942, + "step": 541500 + }, + { + "epoch": 3.04, + "learning_rate": 1.9566546934462078e-05, + "loss": 0.1889, + "step": 541600 + }, + { + "epoch": 3.05, + "learning_rate": 1.9560925704199624e-05, + "loss": 0.1861, + "step": 541700 + }, + { + "epoch": 3.05, + "learning_rate": 1.9555304473937167e-05, + "loss": 0.1845, + "step": 541800 + }, + { + "epoch": 3.05, + "learning_rate": 1.9549683243674713e-05, + "loss": 0.1849, + "step": 541900 + }, + { + "epoch": 3.05, + "learning_rate": 1.9544062013412256e-05, + "loss": 0.1907, + "step": 542000 + }, + { + "epoch": 3.05, + "learning_rate": 1.95384407831498e-05, + "loss": 0.1959, + "step": 542100 + }, + { + "epoch": 3.05, + "learning_rate": 1.9532819552887345e-05, + "loss": 0.1927, + "step": 542200 + }, + { + "epoch": 3.05, + "learning_rate": 1.952719832262489e-05, + "loss": 0.1861, + "step": 542300 + }, + { + "epoch": 3.05, + "learning_rate": 1.9521577092362438e-05, + "loss": 0.1894, + "step": 542400 + }, + { + "epoch": 3.05, + "learning_rate": 1.951595586209998e-05, + "loss": 0.1909, + "step": 542500 + }, + { + "epoch": 3.05, + "learning_rate": 1.9510334631837523e-05, + "loss": 0.1892, + "step": 542600 + }, + { + "epoch": 3.05, + "learning_rate": 1.950471340157507e-05, + "loss": 0.1847, + "step": 542700 + }, + { + "epoch": 3.05, + "learning_rate": 1.9499092171312613e-05, + "loss": 0.1858, + "step": 542800 + }, + { + "epoch": 3.05, + "learning_rate": 1.9493527153352784e-05, + "loss": 0.1863, + "step": 542900 + }, + { + "epoch": 3.05, + "learning_rate": 1.9487905923090327e-05, + "loss": 0.185, + "step": 543000 + }, + { + "epoch": 3.05, + "learning_rate": 1.9482284692827873e-05, + "loss": 0.1815, + "step": 543100 + }, + { + "epoch": 3.05, + "learning_rate": 1.9476663462565416e-05, + "loss": 0.1893, + "step": 543200 + }, + { + "epoch": 3.05, + "learning_rate": 1.9471042232302962e-05, + "loss": 0.1868, + "step": 543300 + }, + { + "epoch": 3.05, + "learning_rate": 1.9465421002040508e-05, + "loss": 0.1878, + "step": 543400 + }, + { + "epoch": 3.06, + "learning_rate": 1.9459799771778055e-05, + "loss": 0.1847, + "step": 543500 + }, + { + "epoch": 3.06, + "learning_rate": 1.9454234753818222e-05, + "loss": 0.1851, + "step": 543600 + }, + { + "epoch": 3.06, + "learning_rate": 1.944861352355577e-05, + "loss": 0.1829, + "step": 543700 + }, + { + "epoch": 3.06, + "learning_rate": 1.944299229329331e-05, + "loss": 0.1827, + "step": 543800 + }, + { + "epoch": 3.06, + "learning_rate": 1.9437371063030858e-05, + "loss": 0.1878, + "step": 543900 + }, + { + "epoch": 3.06, + "learning_rate": 1.94317498327684e-05, + "loss": 0.1849, + "step": 544000 + }, + { + "epoch": 3.06, + "learning_rate": 1.9426128602505943e-05, + "loss": 0.1816, + "step": 544100 + }, + { + "epoch": 3.06, + "learning_rate": 1.942050737224349e-05, + "loss": 0.1865, + "step": 544200 + }, + { + "epoch": 3.06, + "learning_rate": 1.9414886141981033e-05, + "loss": 0.1861, + "step": 544300 + }, + { + "epoch": 3.06, + "learning_rate": 1.940926491171858e-05, + "loss": 0.188, + "step": 544400 + }, + { + "epoch": 3.06, + "learning_rate": 1.9403643681456125e-05, + "loss": 0.1891, + "step": 544500 + }, + { + "epoch": 3.06, + "learning_rate": 1.9398078663496293e-05, + "loss": 0.1936, + "step": 544600 + }, + { + "epoch": 3.06, + "learning_rate": 1.939245743323384e-05, + "loss": 0.1854, + "step": 544700 + }, + { + "epoch": 3.06, + "learning_rate": 1.9386836202971385e-05, + "loss": 0.1909, + "step": 544800 + }, + { + "epoch": 3.06, + "learning_rate": 1.9381214972708928e-05, + "loss": 0.1874, + "step": 544900 + }, + { + "epoch": 3.06, + "learning_rate": 1.9375593742446474e-05, + "loss": 0.1829, + "step": 545000 + }, + { + "epoch": 3.06, + "learning_rate": 1.9369972512184017e-05, + "loss": 0.1822, + "step": 545100 + }, + { + "epoch": 3.06, + "learning_rate": 1.936435128192156e-05, + "loss": 0.1898, + "step": 545200 + }, + { + "epoch": 3.07, + "learning_rate": 1.9358730051659107e-05, + "loss": 0.1849, + "step": 545300 + }, + { + "epoch": 3.07, + "learning_rate": 1.935310882139665e-05, + "loss": 0.1917, + "step": 545400 + }, + { + "epoch": 3.07, + "learning_rate": 1.93474875911342e-05, + "loss": 0.1865, + "step": 545500 + }, + { + "epoch": 3.07, + "learning_rate": 1.9341866360871742e-05, + "loss": 0.1868, + "step": 545600 + }, + { + "epoch": 3.07, + "learning_rate": 1.9336245130609288e-05, + "loss": 0.1881, + "step": 545700 + }, + { + "epoch": 3.07, + "learning_rate": 1.933062390034683e-05, + "loss": 0.1929, + "step": 545800 + }, + { + "epoch": 3.07, + "learning_rate": 1.9325002670084374e-05, + "loss": 0.1837, + "step": 545900 + }, + { + "epoch": 3.07, + "learning_rate": 1.931938143982192e-05, + "loss": 0.189, + "step": 546000 + }, + { + "epoch": 3.07, + "learning_rate": 1.9313760209559463e-05, + "loss": 0.1876, + "step": 546100 + }, + { + "epoch": 3.07, + "learning_rate": 1.930813897929701e-05, + "loss": 0.1873, + "step": 546200 + }, + { + "epoch": 3.07, + "learning_rate": 1.9302517749034556e-05, + "loss": 0.1865, + "step": 546300 + }, + { + "epoch": 3.07, + "learning_rate": 1.9296896518772102e-05, + "loss": 0.1802, + "step": 546400 + }, + { + "epoch": 3.07, + "learning_rate": 1.9291275288509645e-05, + "loss": 0.1838, + "step": 546500 + }, + { + "epoch": 3.07, + "learning_rate": 1.9285654058247188e-05, + "loss": 0.19, + "step": 546600 + }, + { + "epoch": 3.07, + "learning_rate": 1.9280032827984734e-05, + "loss": 0.1845, + "step": 546700 + }, + { + "epoch": 3.07, + "learning_rate": 1.9274411597722277e-05, + "loss": 0.1844, + "step": 546800 + }, + { + "epoch": 3.07, + "learning_rate": 1.9268790367459823e-05, + "loss": 0.1834, + "step": 546900 + }, + { + "epoch": 3.07, + "learning_rate": 1.9263169137197366e-05, + "loss": 0.184, + "step": 547000 + }, + { + "epoch": 3.08, + "learning_rate": 1.9257547906934912e-05, + "loss": 0.1927, + "step": 547100 + }, + { + "epoch": 3.08, + "learning_rate": 1.925192667667246e-05, + "loss": 0.1861, + "step": 547200 + }, + { + "epoch": 3.08, + "learning_rate": 1.924630544641e-05, + "loss": 0.1944, + "step": 547300 + }, + { + "epoch": 3.08, + "learning_rate": 1.9240740428450173e-05, + "loss": 0.1898, + "step": 547400 + }, + { + "epoch": 3.08, + "learning_rate": 1.9235119198187715e-05, + "loss": 0.1864, + "step": 547500 + }, + { + "epoch": 3.08, + "learning_rate": 1.9229497967925262e-05, + "loss": 0.1839, + "step": 547600 + }, + { + "epoch": 3.08, + "learning_rate": 1.9223876737662805e-05, + "loss": 0.1808, + "step": 547700 + }, + { + "epoch": 3.08, + "learning_rate": 1.921825550740035e-05, + "loss": 0.1869, + "step": 547800 + }, + { + "epoch": 3.08, + "learning_rate": 1.9212634277137894e-05, + "loss": 0.1863, + "step": 547900 + }, + { + "epoch": 3.08, + "learning_rate": 1.920701304687544e-05, + "loss": 0.1809, + "step": 548000 + }, + { + "epoch": 3.08, + "learning_rate": 1.9201391816612986e-05, + "loss": 0.1801, + "step": 548100 + }, + { + "epoch": 3.08, + "learning_rate": 1.919577058635053e-05, + "loss": 0.1921, + "step": 548200 + }, + { + "epoch": 3.08, + "learning_rate": 1.9190149356088076e-05, + "loss": 0.1827, + "step": 548300 + }, + { + "epoch": 3.08, + "learning_rate": 1.918452812582562e-05, + "loss": 0.1813, + "step": 548400 + }, + { + "epoch": 3.08, + "learning_rate": 1.9178906895563165e-05, + "loss": 0.1841, + "step": 548500 + }, + { + "epoch": 3.08, + "learning_rate": 1.9173285665300708e-05, + "loss": 0.1807, + "step": 548600 + }, + { + "epoch": 3.08, + "learning_rate": 1.9167664435038254e-05, + "loss": 0.1832, + "step": 548700 + }, + { + "epoch": 3.08, + "learning_rate": 1.9162043204775797e-05, + "loss": 0.1849, + "step": 548800 + }, + { + "epoch": 3.09, + "learning_rate": 1.9156421974513343e-05, + "loss": 0.1817, + "step": 548900 + }, + { + "epoch": 3.09, + "learning_rate": 1.915080074425089e-05, + "loss": 0.1853, + "step": 549000 + }, + { + "epoch": 3.09, + "learning_rate": 1.9145179513988432e-05, + "loss": 0.1813, + "step": 549100 + }, + { + "epoch": 3.09, + "learning_rate": 1.913955828372598e-05, + "loss": 0.1872, + "step": 549200 + }, + { + "epoch": 3.09, + "learning_rate": 1.913393705346352e-05, + "loss": 0.1866, + "step": 549300 + }, + { + "epoch": 3.09, + "learning_rate": 1.9128315823201068e-05, + "loss": 0.1818, + "step": 549400 + }, + { + "epoch": 3.09, + "learning_rate": 1.912269459293861e-05, + "loss": 0.1818, + "step": 549500 + }, + { + "epoch": 3.09, + "learning_rate": 1.9117073362676153e-05, + "loss": 0.1792, + "step": 549600 + }, + { + "epoch": 3.09, + "learning_rate": 1.91114521324137e-05, + "loss": 0.1861, + "step": 549700 + }, + { + "epoch": 3.09, + "learning_rate": 1.9105830902151246e-05, + "loss": 0.1913, + "step": 549800 + }, + { + "epoch": 3.09, + "learning_rate": 1.9100209671888792e-05, + "loss": 0.1822, + "step": 549900 + }, + { + "epoch": 3.09, + "learning_rate": 1.9094588441626335e-05, + "loss": 0.1883, + "step": 550000 + }, + { + "epoch": 3.09, + "learning_rate": 1.908896721136388e-05, + "loss": 0.1885, + "step": 550100 + }, + { + "epoch": 3.09, + "learning_rate": 1.9083345981101424e-05, + "loss": 0.188, + "step": 550200 + }, + { + "epoch": 3.09, + "learning_rate": 1.9077724750838967e-05, + "loss": 0.1853, + "step": 550300 + }, + { + "epoch": 3.09, + "learning_rate": 1.9072103520576513e-05, + "loss": 0.1826, + "step": 550400 + }, + { + "epoch": 3.09, + "learning_rate": 1.906648229031406e-05, + "loss": 0.1834, + "step": 550500 + }, + { + "epoch": 3.1, + "learning_rate": 1.9060861060051606e-05, + "loss": 0.1847, + "step": 550600 + }, + { + "epoch": 3.1, + "learning_rate": 1.905523982978915e-05, + "loss": 0.1946, + "step": 550700 + }, + { + "epoch": 3.1, + "learning_rate": 1.9049618599526695e-05, + "loss": 0.183, + "step": 550800 + }, + { + "epoch": 3.1, + "learning_rate": 1.9043997369264238e-05, + "loss": 0.1896, + "step": 550900 + }, + { + "epoch": 3.1, + "learning_rate": 1.903837613900178e-05, + "loss": 0.1839, + "step": 551000 + }, + { + "epoch": 3.1, + "learning_rate": 1.9032754908739327e-05, + "loss": 0.1866, + "step": 551100 + }, + { + "epoch": 3.1, + "learning_rate": 1.902713367847687e-05, + "loss": 0.1885, + "step": 551200 + }, + { + "epoch": 3.1, + "learning_rate": 1.9021512448214416e-05, + "loss": 0.1809, + "step": 551300 + }, + { + "epoch": 3.1, + "learning_rate": 1.9015891217951963e-05, + "loss": 0.1771, + "step": 551400 + }, + { + "epoch": 3.1, + "learning_rate": 1.901026998768951e-05, + "loss": 0.1889, + "step": 551500 + }, + { + "epoch": 3.1, + "learning_rate": 1.9004704969729677e-05, + "loss": 0.1805, + "step": 551600 + }, + { + "epoch": 3.1, + "learning_rate": 1.8999083739467223e-05, + "loss": 0.1851, + "step": 551700 + }, + { + "epoch": 3.1, + "learning_rate": 1.8993462509204766e-05, + "loss": 0.1872, + "step": 551800 + }, + { + "epoch": 3.1, + "learning_rate": 1.8987841278942312e-05, + "loss": 0.18, + "step": 551900 + }, + { + "epoch": 3.1, + "learning_rate": 1.8982220048679855e-05, + "loss": 0.1888, + "step": 552000 + }, + { + "epoch": 3.1, + "learning_rate": 1.8976598818417398e-05, + "loss": 0.1844, + "step": 552100 + }, + { + "epoch": 3.1, + "learning_rate": 1.8970977588154944e-05, + "loss": 0.1894, + "step": 552200 + }, + { + "epoch": 3.1, + "learning_rate": 1.8965356357892487e-05, + "loss": 0.1913, + "step": 552300 + }, + { + "epoch": 3.11, + "learning_rate": 1.8959735127630037e-05, + "loss": 0.1904, + "step": 552400 + }, + { + "epoch": 3.11, + "learning_rate": 1.895411389736758e-05, + "loss": 0.1913, + "step": 552500 + }, + { + "epoch": 3.11, + "learning_rate": 1.8948492667105122e-05, + "loss": 0.1891, + "step": 552600 + }, + { + "epoch": 3.11, + "learning_rate": 1.894287143684267e-05, + "loss": 0.1914, + "step": 552700 + }, + { + "epoch": 3.11, + "learning_rate": 1.893725020658021e-05, + "loss": 0.1859, + "step": 552800 + }, + { + "epoch": 3.11, + "learning_rate": 1.8931628976317758e-05, + "loss": 0.1894, + "step": 552900 + }, + { + "epoch": 3.11, + "learning_rate": 1.89260077460553e-05, + "loss": 0.1877, + "step": 553000 + }, + { + "epoch": 3.11, + "learning_rate": 1.8920386515792847e-05, + "loss": 0.1873, + "step": 553100 + }, + { + "epoch": 3.11, + "learning_rate": 1.8914765285530393e-05, + "loss": 0.1894, + "step": 553200 + }, + { + "epoch": 3.11, + "learning_rate": 1.8909144055267936e-05, + "loss": 0.1841, + "step": 553300 + }, + { + "epoch": 3.11, + "learning_rate": 1.8903522825005482e-05, + "loss": 0.1908, + "step": 553400 + }, + { + "epoch": 3.11, + "learning_rate": 1.8897901594743025e-05, + "loss": 0.181, + "step": 553500 + }, + { + "epoch": 3.11, + "learning_rate": 1.889228036448057e-05, + "loss": 0.1883, + "step": 553600 + }, + { + "epoch": 3.11, + "learning_rate": 1.8886659134218115e-05, + "loss": 0.189, + "step": 553700 + }, + { + "epoch": 3.11, + "learning_rate": 1.888103790395566e-05, + "loss": 0.1856, + "step": 553800 + }, + { + "epoch": 3.11, + "learning_rate": 1.8875416673693204e-05, + "loss": 0.1885, + "step": 553900 + }, + { + "epoch": 3.11, + "learning_rate": 1.886979544343075e-05, + "loss": 0.1887, + "step": 554000 + }, + { + "epoch": 3.11, + "learning_rate": 1.8864174213168296e-05, + "loss": 0.179, + "step": 554100 + }, + { + "epoch": 3.12, + "learning_rate": 1.885855298290584e-05, + "loss": 0.1834, + "step": 554200 + }, + { + "epoch": 3.12, + "learning_rate": 1.8852931752643385e-05, + "loss": 0.1831, + "step": 554300 + }, + { + "epoch": 3.12, + "learning_rate": 1.8847310522380928e-05, + "loss": 0.1878, + "step": 554400 + }, + { + "epoch": 3.12, + "learning_rate": 1.8841689292118475e-05, + "loss": 0.1841, + "step": 554500 + }, + { + "epoch": 3.12, + "learning_rate": 1.8836068061856017e-05, + "loss": 0.1816, + "step": 554600 + }, + { + "epoch": 3.12, + "learning_rate": 1.8830446831593564e-05, + "loss": 0.1901, + "step": 554700 + }, + { + "epoch": 3.12, + "learning_rate": 1.882482560133111e-05, + "loss": 0.1777, + "step": 554800 + }, + { + "epoch": 3.12, + "learning_rate": 1.8819204371068653e-05, + "loss": 0.1867, + "step": 554900 + }, + { + "epoch": 3.12, + "learning_rate": 1.88135831408062e-05, + "loss": 0.191, + "step": 555000 + }, + { + "epoch": 3.12, + "learning_rate": 1.8807961910543742e-05, + "loss": 0.1818, + "step": 555100 + }, + { + "epoch": 3.12, + "learning_rate": 1.880234068028129e-05, + "loss": 0.19, + "step": 555200 + }, + { + "epoch": 3.12, + "learning_rate": 1.879671945001883e-05, + "loss": 0.1826, + "step": 555300 + }, + { + "epoch": 3.12, + "learning_rate": 1.8791098219756374e-05, + "loss": 0.1856, + "step": 555400 + }, + { + "epoch": 3.12, + "learning_rate": 1.878547698949392e-05, + "loss": 0.1838, + "step": 555500 + }, + { + "epoch": 3.12, + "learning_rate": 1.8779855759231467e-05, + "loss": 0.187, + "step": 555600 + }, + { + "epoch": 3.12, + "learning_rate": 1.8774234528969013e-05, + "loss": 0.1824, + "step": 555700 + }, + { + "epoch": 3.12, + "learning_rate": 1.8768613298706556e-05, + "loss": 0.1828, + "step": 555800 + }, + { + "epoch": 3.12, + "learning_rate": 1.8762992068444102e-05, + "loss": 0.1801, + "step": 555900 + }, + { + "epoch": 3.13, + "learning_rate": 1.8757370838181645e-05, + "loss": 0.1857, + "step": 556000 + }, + { + "epoch": 3.13, + "learning_rate": 1.8751749607919188e-05, + "loss": 0.1838, + "step": 556100 + }, + { + "epoch": 3.13, + "learning_rate": 1.8746128377656734e-05, + "loss": 0.1876, + "step": 556200 + }, + { + "epoch": 3.13, + "learning_rate": 1.8740563359696905e-05, + "loss": 0.1854, + "step": 556300 + }, + { + "epoch": 3.13, + "learning_rate": 1.8734942129434448e-05, + "loss": 0.189, + "step": 556400 + }, + { + "epoch": 3.13, + "learning_rate": 1.872932089917199e-05, + "loss": 0.1778, + "step": 556500 + }, + { + "epoch": 3.13, + "learning_rate": 1.872369966890954e-05, + "loss": 0.1873, + "step": 556600 + }, + { + "epoch": 3.13, + "learning_rate": 1.8718078438647084e-05, + "loss": 0.184, + "step": 556700 + }, + { + "epoch": 3.13, + "learning_rate": 1.871245720838463e-05, + "loss": 0.1862, + "step": 556800 + }, + { + "epoch": 3.13, + "learning_rate": 1.8706835978122173e-05, + "loss": 0.1823, + "step": 556900 + }, + { + "epoch": 3.13, + "learning_rate": 1.870121474785972e-05, + "loss": 0.1842, + "step": 557000 + }, + { + "epoch": 3.13, + "learning_rate": 1.8695593517597262e-05, + "loss": 0.1885, + "step": 557100 + }, + { + "epoch": 3.13, + "learning_rate": 1.8689972287334805e-05, + "loss": 0.1888, + "step": 557200 + }, + { + "epoch": 3.13, + "learning_rate": 1.868435105707235e-05, + "loss": 0.1857, + "step": 557300 + }, + { + "epoch": 3.13, + "learning_rate": 1.867878603911252e-05, + "loss": 0.1812, + "step": 557400 + }, + { + "epoch": 3.13, + "learning_rate": 1.8673164808850065e-05, + "loss": 0.1827, + "step": 557500 + }, + { + "epoch": 3.13, + "learning_rate": 1.866754357858761e-05, + "loss": 0.1841, + "step": 557600 + }, + { + "epoch": 3.13, + "learning_rate": 1.8661922348325157e-05, + "loss": 0.1842, + "step": 557700 + }, + { + "epoch": 3.14, + "learning_rate": 1.86563011180627e-05, + "loss": 0.1855, + "step": 557800 + }, + { + "epoch": 3.14, + "learning_rate": 1.8650679887800247e-05, + "loss": 0.1862, + "step": 557900 + }, + { + "epoch": 3.14, + "learning_rate": 1.864505865753779e-05, + "loss": 0.184, + "step": 558000 + }, + { + "epoch": 3.14, + "learning_rate": 1.8639437427275332e-05, + "loss": 0.1846, + "step": 558100 + }, + { + "epoch": 3.14, + "learning_rate": 1.863381619701288e-05, + "loss": 0.191, + "step": 558200 + }, + { + "epoch": 3.14, + "learning_rate": 1.862819496675042e-05, + "loss": 0.1857, + "step": 558300 + }, + { + "epoch": 3.14, + "learning_rate": 1.8622573736487968e-05, + "loss": 0.1888, + "step": 558400 + }, + { + "epoch": 3.14, + "learning_rate": 1.8616952506225514e-05, + "loss": 0.1888, + "step": 558500 + }, + { + "epoch": 3.14, + "learning_rate": 1.861133127596306e-05, + "loss": 0.1851, + "step": 558600 + }, + { + "epoch": 3.14, + "learning_rate": 1.8605710045700603e-05, + "loss": 0.1798, + "step": 558700 + }, + { + "epoch": 3.14, + "learning_rate": 1.8600088815438146e-05, + "loss": 0.1868, + "step": 558800 + }, + { + "epoch": 3.14, + "learning_rate": 1.8594467585175692e-05, + "loss": 0.1951, + "step": 558900 + }, + { + "epoch": 3.14, + "learning_rate": 1.8588846354913235e-05, + "loss": 0.1875, + "step": 559000 + }, + { + "epoch": 3.14, + "learning_rate": 1.858322512465078e-05, + "loss": 0.1876, + "step": 559100 + }, + { + "epoch": 3.14, + "learning_rate": 1.8577603894388328e-05, + "loss": 0.1887, + "step": 559200 + }, + { + "epoch": 3.14, + "learning_rate": 1.8571982664125874e-05, + "loss": 0.1892, + "step": 559300 + }, + { + "epoch": 3.14, + "learning_rate": 1.8566361433863417e-05, + "loss": 0.1844, + "step": 559400 + }, + { + "epoch": 3.15, + "learning_rate": 1.856074020360096e-05, + "loss": 0.1874, + "step": 559500 + }, + { + "epoch": 3.15, + "learning_rate": 1.8555118973338506e-05, + "loss": 0.1869, + "step": 559600 + }, + { + "epoch": 3.15, + "learning_rate": 1.8549553955378677e-05, + "loss": 0.1907, + "step": 559700 + }, + { + "epoch": 3.15, + "learning_rate": 1.854393272511622e-05, + "loss": 0.1884, + "step": 559800 + }, + { + "epoch": 3.15, + "learning_rate": 1.8538311494853763e-05, + "loss": 0.1882, + "step": 559900 + }, + { + "epoch": 3.15, + "learning_rate": 1.853269026459131e-05, + "loss": 0.1873, + "step": 560000 + }, + { + "epoch": 3.15, + "learning_rate": 1.8527069034328852e-05, + "loss": 0.1883, + "step": 560100 + }, + { + "epoch": 3.15, + "learning_rate": 1.85214478040664e-05, + "loss": 0.1866, + "step": 560200 + }, + { + "epoch": 3.15, + "learning_rate": 1.8515826573803945e-05, + "loss": 0.1812, + "step": 560300 + }, + { + "epoch": 3.15, + "learning_rate": 1.851020534354149e-05, + "loss": 0.1855, + "step": 560400 + }, + { + "epoch": 3.15, + "learning_rate": 1.8504584113279034e-05, + "loss": 0.186, + "step": 560500 + }, + { + "epoch": 3.15, + "learning_rate": 1.8498962883016577e-05, + "loss": 0.1951, + "step": 560600 + }, + { + "epoch": 3.15, + "learning_rate": 1.8493341652754123e-05, + "loss": 0.1901, + "step": 560700 + }, + { + "epoch": 3.15, + "learning_rate": 1.8487720422491666e-05, + "loss": 0.1827, + "step": 560800 + }, + { + "epoch": 3.15, + "learning_rate": 1.8482099192229212e-05, + "loss": 0.1828, + "step": 560900 + }, + { + "epoch": 3.15, + "learning_rate": 1.8476477961966755e-05, + "loss": 0.191, + "step": 561000 + }, + { + "epoch": 3.15, + "learning_rate": 1.84708567317043e-05, + "loss": 0.1869, + "step": 561100 + }, + { + "epoch": 3.15, + "learning_rate": 1.8465235501441848e-05, + "loss": 0.1936, + "step": 561200 + }, + { + "epoch": 3.16, + "learning_rate": 1.845961427117939e-05, + "loss": 0.1874, + "step": 561300 + }, + { + "epoch": 3.16, + "learning_rate": 1.8453993040916937e-05, + "loss": 0.1871, + "step": 561400 + }, + { + "epoch": 3.16, + "learning_rate": 1.844837181065448e-05, + "loss": 0.189, + "step": 561500 + }, + { + "epoch": 3.16, + "learning_rate": 1.8442750580392026e-05, + "loss": 0.1855, + "step": 561600 + }, + { + "epoch": 3.16, + "learning_rate": 1.843712935012957e-05, + "loss": 0.1874, + "step": 561700 + }, + { + "epoch": 3.16, + "learning_rate": 1.8431508119867115e-05, + "loss": 0.1856, + "step": 561800 + }, + { + "epoch": 3.16, + "learning_rate": 1.842588688960466e-05, + "loss": 0.1818, + "step": 561900 + }, + { + "epoch": 3.16, + "learning_rate": 1.8420265659342204e-05, + "loss": 0.1898, + "step": 562000 + }, + { + "epoch": 3.16, + "learning_rate": 1.841464442907975e-05, + "loss": 0.1852, + "step": 562100 + }, + { + "epoch": 3.16, + "learning_rate": 1.8409023198817294e-05, + "loss": 0.1925, + "step": 562200 + }, + { + "epoch": 3.16, + "learning_rate": 1.840340196855484e-05, + "loss": 0.1857, + "step": 562300 + }, + { + "epoch": 3.16, + "learning_rate": 1.8397780738292383e-05, + "loss": 0.1808, + "step": 562400 + }, + { + "epoch": 3.16, + "learning_rate": 1.8392159508029926e-05, + "loss": 0.1898, + "step": 562500 + }, + { + "epoch": 3.16, + "learning_rate": 1.8386538277767472e-05, + "loss": 0.1803, + "step": 562600 + }, + { + "epoch": 3.16, + "learning_rate": 1.8380917047505018e-05, + "loss": 0.1897, + "step": 562700 + }, + { + "epoch": 3.16, + "learning_rate": 1.8375295817242564e-05, + "loss": 0.189, + "step": 562800 + }, + { + "epoch": 3.16, + "learning_rate": 1.8369674586980107e-05, + "loss": 0.1895, + "step": 562900 + }, + { + "epoch": 3.16, + "learning_rate": 1.8364053356717654e-05, + "loss": 0.1861, + "step": 563000 + }, + { + "epoch": 3.17, + "learning_rate": 1.8358432126455196e-05, + "loss": 0.1863, + "step": 563100 + }, + { + "epoch": 3.17, + "learning_rate": 1.835281089619274e-05, + "loss": 0.1845, + "step": 563200 + }, + { + "epoch": 3.17, + "learning_rate": 1.8347189665930286e-05, + "loss": 0.1836, + "step": 563300 + }, + { + "epoch": 3.17, + "learning_rate": 1.834156843566783e-05, + "loss": 0.1845, + "step": 563400 + }, + { + "epoch": 3.17, + "learning_rate": 1.8335947205405378e-05, + "loss": 0.1849, + "step": 563500 + }, + { + "epoch": 3.17, + "learning_rate": 1.833032597514292e-05, + "loss": 0.1854, + "step": 563600 + }, + { + "epoch": 3.17, + "learning_rate": 1.8324704744880467e-05, + "loss": 0.1935, + "step": 563700 + }, + { + "epoch": 3.17, + "learning_rate": 1.831908351461801e-05, + "loss": 0.1837, + "step": 563800 + }, + { + "epoch": 3.17, + "learning_rate": 1.8313462284355553e-05, + "loss": 0.1811, + "step": 563900 + }, + { + "epoch": 3.17, + "learning_rate": 1.8307897266395724e-05, + "loss": 0.1835, + "step": 564000 + }, + { + "epoch": 3.17, + "learning_rate": 1.830227603613327e-05, + "loss": 0.1902, + "step": 564100 + }, + { + "epoch": 3.17, + "learning_rate": 1.8296654805870813e-05, + "loss": 0.1832, + "step": 564200 + }, + { + "epoch": 3.17, + "learning_rate": 1.8291033575608356e-05, + "loss": 0.1872, + "step": 564300 + }, + { + "epoch": 3.17, + "learning_rate": 1.8285412345345903e-05, + "loss": 0.185, + "step": 564400 + }, + { + "epoch": 3.17, + "learning_rate": 1.827979111508345e-05, + "loss": 0.1895, + "step": 564500 + }, + { + "epoch": 3.17, + "learning_rate": 1.8274169884820995e-05, + "loss": 0.185, + "step": 564600 + }, + { + "epoch": 3.17, + "learning_rate": 1.8268548654558538e-05, + "loss": 0.1858, + "step": 564700 + }, + { + "epoch": 3.17, + "learning_rate": 1.8262927424296084e-05, + "loss": 0.1845, + "step": 564800 + }, + { + "epoch": 3.18, + "learning_rate": 1.8257306194033627e-05, + "loss": 0.182, + "step": 564900 + }, + { + "epoch": 3.18, + "learning_rate": 1.825168496377117e-05, + "loss": 0.1785, + "step": 565000 + }, + { + "epoch": 3.18, + "learning_rate": 1.824611994581134e-05, + "loss": 0.1846, + "step": 565100 + }, + { + "epoch": 3.18, + "learning_rate": 1.8240498715548887e-05, + "loss": 0.1866, + "step": 565200 + }, + { + "epoch": 3.18, + "learning_rate": 1.823487748528643e-05, + "loss": 0.1822, + "step": 565300 + }, + { + "epoch": 3.18, + "learning_rate": 1.8229256255023973e-05, + "loss": 0.1819, + "step": 565400 + }, + { + "epoch": 3.18, + "learning_rate": 1.822363502476152e-05, + "loss": 0.1863, + "step": 565500 + }, + { + "epoch": 3.18, + "learning_rate": 1.8218013794499066e-05, + "loss": 0.1721, + "step": 565600 + }, + { + "epoch": 3.18, + "learning_rate": 1.8212392564236612e-05, + "loss": 0.1864, + "step": 565700 + }, + { + "epoch": 3.18, + "learning_rate": 1.8206771333974155e-05, + "loss": 0.1862, + "step": 565800 + }, + { + "epoch": 3.18, + "learning_rate": 1.82011501037117e-05, + "loss": 0.1813, + "step": 565900 + }, + { + "epoch": 3.18, + "learning_rate": 1.8195528873449244e-05, + "loss": 0.1883, + "step": 566000 + }, + { + "epoch": 3.18, + "learning_rate": 1.8189907643186787e-05, + "loss": 0.1889, + "step": 566100 + }, + { + "epoch": 3.18, + "learning_rate": 1.8184286412924333e-05, + "loss": 0.1842, + "step": 566200 + }, + { + "epoch": 3.18, + "learning_rate": 1.8178665182661876e-05, + "loss": 0.1923, + "step": 566300 + }, + { + "epoch": 3.18, + "learning_rate": 1.8173043952399422e-05, + "loss": 0.1823, + "step": 566400 + }, + { + "epoch": 3.18, + "learning_rate": 1.816742272213697e-05, + "loss": 0.179, + "step": 566500 + }, + { + "epoch": 3.18, + "learning_rate": 1.8161801491874515e-05, + "loss": 0.191, + "step": 566600 + }, + { + "epoch": 3.19, + "learning_rate": 1.8156180261612058e-05, + "loss": 0.1819, + "step": 566700 + }, + { + "epoch": 3.19, + "learning_rate": 1.81505590313496e-05, + "loss": 0.185, + "step": 566800 + }, + { + "epoch": 3.19, + "learning_rate": 1.8144937801087147e-05, + "loss": 0.1892, + "step": 566900 + }, + { + "epoch": 3.19, + "learning_rate": 1.813931657082469e-05, + "loss": 0.1822, + "step": 567000 + }, + { + "epoch": 3.19, + "learning_rate": 1.8133695340562236e-05, + "loss": 0.1801, + "step": 567100 + }, + { + "epoch": 3.19, + "learning_rate": 1.8128074110299782e-05, + "loss": 0.1833, + "step": 567200 + }, + { + "epoch": 3.19, + "learning_rate": 1.812245288003733e-05, + "loss": 0.186, + "step": 567300 + }, + { + "epoch": 3.19, + "learning_rate": 1.811683164977487e-05, + "loss": 0.189, + "step": 567400 + }, + { + "epoch": 3.19, + "learning_rate": 1.8111210419512414e-05, + "loss": 0.189, + "step": 567500 + }, + { + "epoch": 3.19, + "learning_rate": 1.810558918924996e-05, + "loss": 0.1831, + "step": 567600 + }, + { + "epoch": 3.19, + "learning_rate": 1.8099967958987504e-05, + "loss": 0.1823, + "step": 567700 + }, + { + "epoch": 3.19, + "learning_rate": 1.809434672872505e-05, + "loss": 0.1868, + "step": 567800 + }, + { + "epoch": 3.19, + "learning_rate": 1.8088725498462593e-05, + "loss": 0.1845, + "step": 567900 + }, + { + "epoch": 3.19, + "learning_rate": 1.808310426820014e-05, + "loss": 0.1885, + "step": 568000 + }, + { + "epoch": 3.19, + "learning_rate": 1.8077483037937685e-05, + "loss": 0.1838, + "step": 568100 + }, + { + "epoch": 3.19, + "learning_rate": 1.8071861807675228e-05, + "loss": 0.1885, + "step": 568200 + }, + { + "epoch": 3.19, + "learning_rate": 1.8066240577412774e-05, + "loss": 0.1846, + "step": 568300 + }, + { + "epoch": 3.2, + "learning_rate": 1.8060619347150317e-05, + "loss": 0.1869, + "step": 568400 + }, + { + "epoch": 3.2, + "learning_rate": 1.8054998116887864e-05, + "loss": 0.183, + "step": 568500 + }, + { + "epoch": 3.2, + "learning_rate": 1.8049376886625407e-05, + "loss": 0.1908, + "step": 568600 + }, + { + "epoch": 3.2, + "learning_rate": 1.8043755656362953e-05, + "loss": 0.1882, + "step": 568700 + }, + { + "epoch": 3.2, + "learning_rate": 1.80381344261005e-05, + "loss": 0.1798, + "step": 568800 + }, + { + "epoch": 3.2, + "learning_rate": 1.8032513195838042e-05, + "loss": 0.1863, + "step": 568900 + }, + { + "epoch": 3.2, + "learning_rate": 1.8026891965575588e-05, + "loss": 0.1928, + "step": 569000 + }, + { + "epoch": 3.2, + "learning_rate": 1.802127073531313e-05, + "loss": 0.1854, + "step": 569100 + }, + { + "epoch": 3.2, + "learning_rate": 1.8015649505050677e-05, + "loss": 0.1841, + "step": 569200 + }, + { + "epoch": 3.2, + "learning_rate": 1.801002827478822e-05, + "loss": 0.1837, + "step": 569300 + }, + { + "epoch": 3.2, + "learning_rate": 1.8004407044525763e-05, + "loss": 0.1879, + "step": 569400 + }, + { + "epoch": 3.2, + "learning_rate": 1.799878581426331e-05, + "loss": 0.1841, + "step": 569500 + }, + { + "epoch": 3.2, + "learning_rate": 1.7993164584000856e-05, + "loss": 0.1849, + "step": 569600 + }, + { + "epoch": 3.2, + "learning_rate": 1.7987599566041023e-05, + "loss": 0.1864, + "step": 569700 + }, + { + "epoch": 3.2, + "learning_rate": 1.798197833577857e-05, + "loss": 0.1851, + "step": 569800 + }, + { + "epoch": 3.2, + "learning_rate": 1.7976357105516116e-05, + "loss": 0.1899, + "step": 569900 + }, + { + "epoch": 3.2, + "learning_rate": 1.797073587525366e-05, + "loss": 0.1902, + "step": 570000 + }, + { + "epoch": 3.2, + "learning_rate": 1.7965114644991205e-05, + "loss": 0.1866, + "step": 570100 + }, + { + "epoch": 3.21, + "learning_rate": 1.7959493414728748e-05, + "loss": 0.1837, + "step": 570200 + }, + { + "epoch": 3.21, + "learning_rate": 1.7953872184466294e-05, + "loss": 0.1828, + "step": 570300 + }, + { + "epoch": 3.21, + "learning_rate": 1.7948250954203837e-05, + "loss": 0.1865, + "step": 570400 + }, + { + "epoch": 3.21, + "learning_rate": 1.794262972394138e-05, + "loss": 0.1832, + "step": 570500 + }, + { + "epoch": 3.21, + "learning_rate": 1.7937008493678926e-05, + "loss": 0.1815, + "step": 570600 + }, + { + "epoch": 3.21, + "learning_rate": 1.7931387263416473e-05, + "loss": 0.188, + "step": 570700 + }, + { + "epoch": 3.21, + "learning_rate": 1.792576603315402e-05, + "loss": 0.1896, + "step": 570800 + }, + { + "epoch": 3.21, + "learning_rate": 1.7920144802891562e-05, + "loss": 0.19, + "step": 570900 + }, + { + "epoch": 3.21, + "learning_rate": 1.7914523572629108e-05, + "loss": 0.1875, + "step": 571000 + }, + { + "epoch": 3.21, + "learning_rate": 1.790890234236665e-05, + "loss": 0.1815, + "step": 571100 + }, + { + "epoch": 3.21, + "learning_rate": 1.7903281112104194e-05, + "loss": 0.1842, + "step": 571200 + }, + { + "epoch": 3.21, + "learning_rate": 1.789765988184174e-05, + "loss": 0.1813, + "step": 571300 + }, + { + "epoch": 3.21, + "learning_rate": 1.7892038651579286e-05, + "loss": 0.1832, + "step": 571400 + }, + { + "epoch": 3.21, + "learning_rate": 1.7886417421316833e-05, + "loss": 0.1816, + "step": 571500 + }, + { + "epoch": 3.21, + "learning_rate": 1.7880796191054376e-05, + "loss": 0.1824, + "step": 571600 + }, + { + "epoch": 3.21, + "learning_rate": 1.7875174960791922e-05, + "loss": 0.1873, + "step": 571700 + }, + { + "epoch": 3.21, + "learning_rate": 1.7869553730529465e-05, + "loss": 0.1827, + "step": 571800 + }, + { + "epoch": 3.21, + "learning_rate": 1.7863932500267008e-05, + "loss": 0.191, + "step": 571900 + }, + { + "epoch": 3.22, + "learning_rate": 1.785836748230718e-05, + "loss": 0.1809, + "step": 572000 + }, + { + "epoch": 3.22, + "learning_rate": 1.7852746252044725e-05, + "loss": 0.1869, + "step": 572100 + }, + { + "epoch": 3.22, + "learning_rate": 1.7847125021782268e-05, + "loss": 0.1843, + "step": 572200 + }, + { + "epoch": 3.22, + "learning_rate": 1.784150379151981e-05, + "loss": 0.1854, + "step": 572300 + }, + { + "epoch": 3.22, + "learning_rate": 1.7835882561257357e-05, + "loss": 0.1869, + "step": 572400 + }, + { + "epoch": 3.22, + "learning_rate": 1.7830317543297525e-05, + "loss": 0.1915, + "step": 572500 + }, + { + "epoch": 3.22, + "learning_rate": 1.782469631303507e-05, + "loss": 0.1819, + "step": 572600 + }, + { + "epoch": 3.22, + "learning_rate": 1.7819075082772617e-05, + "loss": 0.183, + "step": 572700 + }, + { + "epoch": 3.22, + "learning_rate": 1.7813453852510163e-05, + "loss": 0.1824, + "step": 572800 + }, + { + "epoch": 3.22, + "learning_rate": 1.7807832622247706e-05, + "loss": 0.1851, + "step": 572900 + }, + { + "epoch": 3.22, + "learning_rate": 1.7802211391985253e-05, + "loss": 0.191, + "step": 573000 + }, + { + "epoch": 3.22, + "learning_rate": 1.7796590161722795e-05, + "loss": 0.1837, + "step": 573100 + }, + { + "epoch": 3.22, + "learning_rate": 1.779096893146034e-05, + "loss": 0.1856, + "step": 573200 + }, + { + "epoch": 3.22, + "learning_rate": 1.778540391350051e-05, + "loss": 0.1952, + "step": 573300 + }, + { + "epoch": 3.22, + "learning_rate": 1.7779782683238056e-05, + "loss": 0.1872, + "step": 573400 + }, + { + "epoch": 3.22, + "learning_rate": 1.77741614529756e-05, + "loss": 0.186, + "step": 573500 + }, + { + "epoch": 3.22, + "learning_rate": 1.776854022271314e-05, + "loss": 0.1851, + "step": 573600 + }, + { + "epoch": 3.22, + "learning_rate": 1.7762918992450688e-05, + "loss": 0.1858, + "step": 573700 + }, + { + "epoch": 3.23, + "learning_rate": 1.7757297762188234e-05, + "loss": 0.1913, + "step": 573800 + }, + { + "epoch": 3.23, + "learning_rate": 1.775167653192578e-05, + "loss": 0.1833, + "step": 573900 + }, + { + "epoch": 3.23, + "learning_rate": 1.7746055301663323e-05, + "loss": 0.1829, + "step": 574000 + }, + { + "epoch": 3.23, + "learning_rate": 1.774043407140087e-05, + "loss": 0.1861, + "step": 574100 + }, + { + "epoch": 3.23, + "learning_rate": 1.7734812841138412e-05, + "loss": 0.1853, + "step": 574200 + }, + { + "epoch": 3.23, + "learning_rate": 1.7729191610875955e-05, + "loss": 0.1894, + "step": 574300 + }, + { + "epoch": 3.23, + "learning_rate": 1.77235703806135e-05, + "loss": 0.1856, + "step": 574400 + }, + { + "epoch": 3.23, + "learning_rate": 1.7717949150351044e-05, + "loss": 0.1874, + "step": 574500 + }, + { + "epoch": 3.23, + "learning_rate": 1.771232792008859e-05, + "loss": 0.1856, + "step": 574600 + }, + { + "epoch": 3.23, + "learning_rate": 1.7706706689826137e-05, + "loss": 0.1799, + "step": 574700 + }, + { + "epoch": 3.23, + "learning_rate": 1.7701085459563683e-05, + "loss": 0.1799, + "step": 574800 + }, + { + "epoch": 3.23, + "learning_rate": 1.7695464229301226e-05, + "loss": 0.1826, + "step": 574900 + }, + { + "epoch": 3.23, + "learning_rate": 1.768984299903877e-05, + "loss": 0.1855, + "step": 575000 + }, + { + "epoch": 3.23, + "learning_rate": 1.7684221768776315e-05, + "loss": 0.1794, + "step": 575100 + }, + { + "epoch": 3.23, + "learning_rate": 1.7678600538513858e-05, + "loss": 0.1865, + "step": 575200 + }, + { + "epoch": 3.23, + "learning_rate": 1.7672979308251404e-05, + "loss": 0.1821, + "step": 575300 + }, + { + "epoch": 3.23, + "learning_rate": 1.766735807798895e-05, + "loss": 0.1884, + "step": 575400 + }, + { + "epoch": 3.24, + "learning_rate": 1.7661736847726497e-05, + "loss": 0.1849, + "step": 575500 + }, + { + "epoch": 3.24, + "learning_rate": 1.765611561746404e-05, + "loss": 0.1823, + "step": 575600 + }, + { + "epoch": 3.24, + "learning_rate": 1.7650494387201583e-05, + "loss": 0.1807, + "step": 575700 + }, + { + "epoch": 3.24, + "learning_rate": 1.764487315693913e-05, + "loss": 0.1809, + "step": 575800 + }, + { + "epoch": 3.24, + "learning_rate": 1.7639251926676672e-05, + "loss": 0.1919, + "step": 575900 + }, + { + "epoch": 3.24, + "learning_rate": 1.7633630696414218e-05, + "loss": 0.1846, + "step": 576000 + }, + { + "epoch": 3.24, + "learning_rate": 1.762800946615176e-05, + "loss": 0.1862, + "step": 576100 + }, + { + "epoch": 3.24, + "learning_rate": 1.7622388235889307e-05, + "loss": 0.1813, + "step": 576200 + }, + { + "epoch": 3.24, + "learning_rate": 1.7616767005626854e-05, + "loss": 0.1843, + "step": 576300 + }, + { + "epoch": 3.24, + "learning_rate": 1.7611145775364397e-05, + "loss": 0.1896, + "step": 576400 + }, + { + "epoch": 3.24, + "learning_rate": 1.7605524545101943e-05, + "loss": 0.1864, + "step": 576500 + }, + { + "epoch": 3.24, + "learning_rate": 1.7599903314839486e-05, + "loss": 0.1829, + "step": 576600 + }, + { + "epoch": 3.24, + "learning_rate": 1.7594282084577032e-05, + "loss": 0.1814, + "step": 576700 + }, + { + "epoch": 3.24, + "learning_rate": 1.7588660854314575e-05, + "loss": 0.191, + "step": 576800 + }, + { + "epoch": 3.24, + "learning_rate": 1.758303962405212e-05, + "loss": 0.1818, + "step": 576900 + }, + { + "epoch": 3.24, + "learning_rate": 1.7577418393789667e-05, + "loss": 0.1863, + "step": 577000 + }, + { + "epoch": 3.24, + "learning_rate": 1.757179716352721e-05, + "loss": 0.1922, + "step": 577100 + }, + { + "epoch": 3.24, + "learning_rate": 1.7566175933264757e-05, + "loss": 0.1923, + "step": 577200 + }, + { + "epoch": 3.25, + "learning_rate": 1.75605547030023e-05, + "loss": 0.1896, + "step": 577300 + }, + { + "epoch": 3.25, + "learning_rate": 1.7554933472739846e-05, + "loss": 0.1914, + "step": 577400 + }, + { + "epoch": 3.25, + "learning_rate": 1.754931224247739e-05, + "loss": 0.177, + "step": 577500 + }, + { + "epoch": 3.25, + "learning_rate": 1.754369101221493e-05, + "loss": 0.1916, + "step": 577600 + }, + { + "epoch": 3.25, + "learning_rate": 1.7538069781952478e-05, + "loss": 0.1827, + "step": 577700 + }, + { + "epoch": 3.25, + "learning_rate": 1.7532448551690024e-05, + "loss": 0.1908, + "step": 577800 + }, + { + "epoch": 3.25, + "learning_rate": 1.752682732142757e-05, + "loss": 0.1828, + "step": 577900 + }, + { + "epoch": 3.25, + "learning_rate": 1.7521262303467738e-05, + "loss": 0.1879, + "step": 578000 + }, + { + "epoch": 3.25, + "learning_rate": 1.7515641073205284e-05, + "loss": 0.1843, + "step": 578100 + }, + { + "epoch": 3.25, + "learning_rate": 1.7510019842942827e-05, + "loss": 0.1879, + "step": 578200 + }, + { + "epoch": 3.25, + "learning_rate": 1.7504398612680373e-05, + "loss": 0.1855, + "step": 578300 + }, + { + "epoch": 3.25, + "learning_rate": 1.7498777382417916e-05, + "loss": 0.1872, + "step": 578400 + }, + { + "epoch": 3.25, + "learning_rate": 1.7493156152155463e-05, + "loss": 0.188, + "step": 578500 + }, + { + "epoch": 3.25, + "learning_rate": 1.7487534921893005e-05, + "loss": 0.1872, + "step": 578600 + }, + { + "epoch": 3.25, + "learning_rate": 1.748191369163055e-05, + "loss": 0.1877, + "step": 578700 + }, + { + "epoch": 3.25, + "learning_rate": 1.7476292461368095e-05, + "loss": 0.1845, + "step": 578800 + }, + { + "epoch": 3.25, + "learning_rate": 1.747067123110564e-05, + "loss": 0.1863, + "step": 578900 + }, + { + "epoch": 3.25, + "learning_rate": 1.7465050000843187e-05, + "loss": 0.1862, + "step": 579000 + }, + { + "epoch": 3.26, + "learning_rate": 1.745942877058073e-05, + "loss": 0.1884, + "step": 579100 + }, + { + "epoch": 3.26, + "learning_rate": 1.7453807540318276e-05, + "loss": 0.1812, + "step": 579200 + }, + { + "epoch": 3.26, + "learning_rate": 1.744818631005582e-05, + "loss": 0.1835, + "step": 579300 + }, + { + "epoch": 3.26, + "learning_rate": 1.744262129209599e-05, + "loss": 0.1844, + "step": 579400 + }, + { + "epoch": 3.26, + "learning_rate": 1.7437000061833533e-05, + "loss": 0.1845, + "step": 579500 + }, + { + "epoch": 3.26, + "learning_rate": 1.743137883157108e-05, + "loss": 0.186, + "step": 579600 + }, + { + "epoch": 3.26, + "learning_rate": 1.7425757601308622e-05, + "loss": 0.1836, + "step": 579700 + }, + { + "epoch": 3.26, + "learning_rate": 1.7420136371046165e-05, + "loss": 0.1851, + "step": 579800 + }, + { + "epoch": 3.26, + "learning_rate": 1.741451514078371e-05, + "loss": 0.1803, + "step": 579900 + }, + { + "epoch": 3.26, + "learning_rate": 1.7408893910521258e-05, + "loss": 0.185, + "step": 580000 + }, + { + "epoch": 3.26, + "learning_rate": 1.7403272680258804e-05, + "loss": 0.1868, + "step": 580100 + }, + { + "epoch": 3.26, + "learning_rate": 1.7397651449996347e-05, + "loss": 0.1847, + "step": 580200 + }, + { + "epoch": 3.26, + "learning_rate": 1.7392030219733893e-05, + "loss": 0.1805, + "step": 580300 + }, + { + "epoch": 3.26, + "learning_rate": 1.7386408989471436e-05, + "loss": 0.19, + "step": 580400 + }, + { + "epoch": 3.26, + "learning_rate": 1.738078775920898e-05, + "loss": 0.1899, + "step": 580500 + }, + { + "epoch": 3.26, + "learning_rate": 1.7375166528946525e-05, + "loss": 0.1821, + "step": 580600 + }, + { + "epoch": 3.26, + "learning_rate": 1.736954529868407e-05, + "loss": 0.1875, + "step": 580700 + }, + { + "epoch": 3.26, + "learning_rate": 1.7363924068421618e-05, + "loss": 0.1865, + "step": 580800 + }, + { + "epoch": 3.27, + "learning_rate": 1.735830283815916e-05, + "loss": 0.1815, + "step": 580900 + }, + { + "epoch": 3.27, + "learning_rate": 1.7352681607896707e-05, + "loss": 0.181, + "step": 581000 + }, + { + "epoch": 3.27, + "learning_rate": 1.734706037763425e-05, + "loss": 0.1841, + "step": 581100 + }, + { + "epoch": 3.27, + "learning_rate": 1.7341439147371793e-05, + "loss": 0.1798, + "step": 581200 + }, + { + "epoch": 3.27, + "learning_rate": 1.733581791710934e-05, + "loss": 0.1852, + "step": 581300 + }, + { + "epoch": 3.27, + "learning_rate": 1.7330196686846882e-05, + "loss": 0.1893, + "step": 581400 + }, + { + "epoch": 3.27, + "learning_rate": 1.7324575456584428e-05, + "loss": 0.1857, + "step": 581500 + }, + { + "epoch": 3.27, + "learning_rate": 1.7318954226321974e-05, + "loss": 0.1883, + "step": 581600 + }, + { + "epoch": 3.27, + "learning_rate": 1.731333299605952e-05, + "loss": 0.1822, + "step": 581700 + }, + { + "epoch": 3.27, + "learning_rate": 1.7307711765797064e-05, + "loss": 0.1827, + "step": 581800 + }, + { + "epoch": 3.27, + "learning_rate": 1.7302090535534607e-05, + "loss": 0.1852, + "step": 581900 + }, + { + "epoch": 3.27, + "learning_rate": 1.7296469305272153e-05, + "loss": 0.1873, + "step": 582000 + }, + { + "epoch": 3.27, + "learning_rate": 1.7290848075009696e-05, + "loss": 0.1845, + "step": 582100 + }, + { + "epoch": 3.27, + "learning_rate": 1.7285226844747242e-05, + "loss": 0.1859, + "step": 582200 + }, + { + "epoch": 3.27, + "learning_rate": 1.7279605614484788e-05, + "loss": 0.1839, + "step": 582300 + }, + { + "epoch": 3.27, + "learning_rate": 1.727398438422233e-05, + "loss": 0.1829, + "step": 582400 + }, + { + "epoch": 3.27, + "learning_rate": 1.7268363153959877e-05, + "loss": 0.1903, + "step": 582500 + }, + { + "epoch": 3.27, + "learning_rate": 1.726274192369742e-05, + "loss": 0.1823, + "step": 582600 + }, + { + "epoch": 3.28, + "learning_rate": 1.7257120693434967e-05, + "loss": 0.184, + "step": 582700 + }, + { + "epoch": 3.28, + "learning_rate": 1.725149946317251e-05, + "loss": 0.1911, + "step": 582800 + }, + { + "epoch": 3.28, + "learning_rate": 1.7245878232910056e-05, + "loss": 0.1929, + "step": 582900 + }, + { + "epoch": 3.28, + "learning_rate": 1.72402570026476e-05, + "loss": 0.1885, + "step": 583000 + }, + { + "epoch": 3.28, + "learning_rate": 1.7234635772385145e-05, + "loss": 0.1846, + "step": 583100 + }, + { + "epoch": 3.28, + "learning_rate": 1.722901454212269e-05, + "loss": 0.1843, + "step": 583200 + }, + { + "epoch": 3.28, + "learning_rate": 1.7223393311860234e-05, + "loss": 0.1834, + "step": 583300 + }, + { + "epoch": 3.28, + "learning_rate": 1.721777208159778e-05, + "loss": 0.1878, + "step": 583400 + }, + { + "epoch": 3.28, + "learning_rate": 1.7212150851335323e-05, + "loss": 0.1871, + "step": 583500 + }, + { + "epoch": 3.28, + "learning_rate": 1.7206585833375494e-05, + "loss": 0.1866, + "step": 583600 + }, + { + "epoch": 3.28, + "learning_rate": 1.7200964603113037e-05, + "loss": 0.1853, + "step": 583700 + }, + { + "epoch": 3.28, + "learning_rate": 1.7195343372850583e-05, + "loss": 0.183, + "step": 583800 + }, + { + "epoch": 3.28, + "learning_rate": 1.7189722142588126e-05, + "loss": 0.1888, + "step": 583900 + }, + { + "epoch": 3.28, + "learning_rate": 1.7184100912325673e-05, + "loss": 0.1841, + "step": 584000 + }, + { + "epoch": 3.28, + "learning_rate": 1.7178479682063215e-05, + "loss": 0.1847, + "step": 584100 + }, + { + "epoch": 3.28, + "learning_rate": 1.7172858451800762e-05, + "loss": 0.1895, + "step": 584200 + }, + { + "epoch": 3.28, + "learning_rate": 1.7167237221538308e-05, + "loss": 0.1859, + "step": 584300 + }, + { + "epoch": 3.29, + "learning_rate": 1.716161599127585e-05, + "loss": 0.1919, + "step": 584400 + }, + { + "epoch": 3.29, + "learning_rate": 1.7155994761013397e-05, + "loss": 0.1835, + "step": 584500 + }, + { + "epoch": 3.29, + "learning_rate": 1.715037353075094e-05, + "loss": 0.1876, + "step": 584600 + }, + { + "epoch": 3.29, + "learning_rate": 1.7144752300488486e-05, + "loss": 0.1864, + "step": 584700 + }, + { + "epoch": 3.29, + "learning_rate": 1.713913107022603e-05, + "loss": 0.1891, + "step": 584800 + }, + { + "epoch": 3.29, + "learning_rate": 1.7133509839963576e-05, + "loss": 0.1881, + "step": 584900 + }, + { + "epoch": 3.29, + "learning_rate": 1.7127888609701122e-05, + "loss": 0.185, + "step": 585000 + }, + { + "epoch": 3.29, + "learning_rate": 1.7122267379438665e-05, + "loss": 0.1838, + "step": 585100 + }, + { + "epoch": 3.29, + "learning_rate": 1.711664614917621e-05, + "loss": 0.1867, + "step": 585200 + }, + { + "epoch": 3.29, + "learning_rate": 1.7111024918913754e-05, + "loss": 0.1872, + "step": 585300 + }, + { + "epoch": 3.29, + "learning_rate": 1.71054036886513e-05, + "loss": 0.1836, + "step": 585400 + }, + { + "epoch": 3.29, + "learning_rate": 1.7099782458388843e-05, + "loss": 0.1829, + "step": 585500 + }, + { + "epoch": 3.29, + "learning_rate": 1.7094161228126386e-05, + "loss": 0.1804, + "step": 585600 + }, + { + "epoch": 3.29, + "learning_rate": 1.7088539997863932e-05, + "loss": 0.1825, + "step": 585700 + }, + { + "epoch": 3.29, + "learning_rate": 1.708291876760148e-05, + "loss": 0.1847, + "step": 585800 + }, + { + "epoch": 3.29, + "learning_rate": 1.7077297537339025e-05, + "loss": 0.1907, + "step": 585900 + }, + { + "epoch": 3.29, + "learning_rate": 1.7071676307076568e-05, + "loss": 0.1872, + "step": 586000 + }, + { + "epoch": 3.29, + "learning_rate": 1.7066055076814114e-05, + "loss": 0.1875, + "step": 586100 + }, + { + "epoch": 3.3, + "learning_rate": 1.7060433846551657e-05, + "loss": 0.1915, + "step": 586200 + }, + { + "epoch": 3.3, + "learning_rate": 1.70548126162892e-05, + "loss": 0.1869, + "step": 586300 + }, + { + "epoch": 3.3, + "learning_rate": 1.7049191386026746e-05, + "loss": 0.1829, + "step": 586400 + }, + { + "epoch": 3.3, + "learning_rate": 1.7043570155764292e-05, + "loss": 0.1855, + "step": 586500 + }, + { + "epoch": 3.3, + "learning_rate": 1.703794892550184e-05, + "loss": 0.1848, + "step": 586600 + }, + { + "epoch": 3.3, + "learning_rate": 1.703232769523938e-05, + "loss": 0.1835, + "step": 586700 + }, + { + "epoch": 3.3, + "learning_rate": 1.7026706464976928e-05, + "loss": 0.1873, + "step": 586800 + }, + { + "epoch": 3.3, + "learning_rate": 1.702108523471447e-05, + "loss": 0.1832, + "step": 586900 + }, + { + "epoch": 3.3, + "learning_rate": 1.7015464004452013e-05, + "loss": 0.1856, + "step": 587000 + }, + { + "epoch": 3.3, + "learning_rate": 1.700984277418956e-05, + "loss": 0.1832, + "step": 587100 + }, + { + "epoch": 3.3, + "learning_rate": 1.7004221543927103e-05, + "loss": 0.1876, + "step": 587200 + }, + { + "epoch": 3.3, + "learning_rate": 1.699860031366465e-05, + "loss": 0.1867, + "step": 587300 + }, + { + "epoch": 3.3, + "learning_rate": 1.6992979083402195e-05, + "loss": 0.1779, + "step": 587400 + }, + { + "epoch": 3.3, + "learning_rate": 1.6987357853139738e-05, + "loss": 0.1825, + "step": 587500 + }, + { + "epoch": 3.3, + "learning_rate": 1.6981736622877284e-05, + "loss": 0.1883, + "step": 587600 + }, + { + "epoch": 3.3, + "learning_rate": 1.6976115392614827e-05, + "loss": 0.1859, + "step": 587700 + }, + { + "epoch": 3.3, + "learning_rate": 1.6970494162352374e-05, + "loss": 0.1812, + "step": 587800 + }, + { + "epoch": 3.3, + "learning_rate": 1.6964872932089916e-05, + "loss": 0.1864, + "step": 587900 + }, + { + "epoch": 3.31, + "learning_rate": 1.6959307914130087e-05, + "loss": 0.1868, + "step": 588000 + }, + { + "epoch": 3.31, + "learning_rate": 1.695368668386763e-05, + "loss": 0.1848, + "step": 588100 + }, + { + "epoch": 3.31, + "learning_rate": 1.6948065453605177e-05, + "loss": 0.1886, + "step": 588200 + }, + { + "epoch": 3.31, + "learning_rate": 1.694244422334272e-05, + "loss": 0.1884, + "step": 588300 + }, + { + "epoch": 3.31, + "learning_rate": 1.6936822993080266e-05, + "loss": 0.1845, + "step": 588400 + }, + { + "epoch": 3.31, + "learning_rate": 1.6931201762817812e-05, + "loss": 0.1859, + "step": 588500 + }, + { + "epoch": 3.31, + "learning_rate": 1.6925580532555355e-05, + "loss": 0.1851, + "step": 588600 + }, + { + "epoch": 3.31, + "learning_rate": 1.69199593022929e-05, + "loss": 0.1885, + "step": 588700 + }, + { + "epoch": 3.31, + "learning_rate": 1.6914338072030444e-05, + "loss": 0.1812, + "step": 588800 + }, + { + "epoch": 3.31, + "learning_rate": 1.690871684176799e-05, + "loss": 0.1853, + "step": 588900 + }, + { + "epoch": 3.31, + "learning_rate": 1.6903095611505533e-05, + "loss": 0.1865, + "step": 589000 + }, + { + "epoch": 3.31, + "learning_rate": 1.689747438124308e-05, + "loss": 0.1833, + "step": 589100 + }, + { + "epoch": 3.31, + "learning_rate": 1.6891853150980626e-05, + "loss": 0.1897, + "step": 589200 + }, + { + "epoch": 3.31, + "learning_rate": 1.688623192071817e-05, + "loss": 0.1876, + "step": 589300 + }, + { + "epoch": 3.31, + "learning_rate": 1.6880610690455715e-05, + "loss": 0.1849, + "step": 589400 + }, + { + "epoch": 3.31, + "learning_rate": 1.6874989460193258e-05, + "loss": 0.1829, + "step": 589500 + }, + { + "epoch": 3.31, + "learning_rate": 1.6869368229930804e-05, + "loss": 0.183, + "step": 589600 + }, + { + "epoch": 3.31, + "learning_rate": 1.6863746999668347e-05, + "loss": 0.1865, + "step": 589700 + }, + { + "epoch": 3.32, + "learning_rate": 1.6858125769405893e-05, + "loss": 0.1877, + "step": 589800 + }, + { + "epoch": 3.32, + "learning_rate": 1.6852504539143436e-05, + "loss": 0.1864, + "step": 589900 + }, + { + "epoch": 3.32, + "learning_rate": 1.6846883308880982e-05, + "loss": 0.183, + "step": 590000 + }, + { + "epoch": 3.32, + "learning_rate": 1.684126207861853e-05, + "loss": 0.1854, + "step": 590100 + }, + { + "epoch": 3.32, + "learning_rate": 1.6835697060658696e-05, + "loss": 0.1849, + "step": 590200 + }, + { + "epoch": 3.32, + "learning_rate": 1.6830075830396243e-05, + "loss": 0.1882, + "step": 590300 + }, + { + "epoch": 3.32, + "learning_rate": 1.6824454600133786e-05, + "loss": 0.187, + "step": 590400 + }, + { + "epoch": 3.32, + "learning_rate": 1.6818833369871332e-05, + "loss": 0.1801, + "step": 590500 + }, + { + "epoch": 3.32, + "learning_rate": 1.6813212139608875e-05, + "loss": 0.1811, + "step": 590600 + }, + { + "epoch": 3.32, + "learning_rate": 1.680759090934642e-05, + "loss": 0.1883, + "step": 590700 + }, + { + "epoch": 3.32, + "learning_rate": 1.6801969679083964e-05, + "loss": 0.1789, + "step": 590800 + }, + { + "epoch": 3.32, + "learning_rate": 1.679634844882151e-05, + "loss": 0.183, + "step": 590900 + }, + { + "epoch": 3.32, + "learning_rate": 1.6790727218559053e-05, + "loss": 0.1799, + "step": 591000 + }, + { + "epoch": 3.32, + "learning_rate": 1.67851059882966e-05, + "loss": 0.1832, + "step": 591100 + }, + { + "epoch": 3.32, + "learning_rate": 1.6779484758034146e-05, + "loss": 0.1831, + "step": 591200 + }, + { + "epoch": 3.32, + "learning_rate": 1.6773919740074313e-05, + "loss": 0.1855, + "step": 591300 + }, + { + "epoch": 3.32, + "learning_rate": 1.676829850981186e-05, + "loss": 0.1829, + "step": 591400 + }, + { + "epoch": 3.32, + "learning_rate": 1.6762677279549402e-05, + "loss": 0.1886, + "step": 591500 + }, + { + "epoch": 3.33, + "learning_rate": 1.675705604928695e-05, + "loss": 0.1896, + "step": 591600 + }, + { + "epoch": 3.33, + "learning_rate": 1.675143481902449e-05, + "loss": 0.184, + "step": 591700 + }, + { + "epoch": 3.33, + "learning_rate": 1.6745813588762038e-05, + "loss": 0.1841, + "step": 591800 + }, + { + "epoch": 3.33, + "learning_rate": 1.674019235849958e-05, + "loss": 0.1892, + "step": 591900 + }, + { + "epoch": 3.33, + "learning_rate": 1.6734571128237127e-05, + "loss": 0.1876, + "step": 592000 + }, + { + "epoch": 3.33, + "learning_rate": 1.6728949897974673e-05, + "loss": 0.1835, + "step": 592100 + }, + { + "epoch": 3.33, + "learning_rate": 1.6723328667712216e-05, + "loss": 0.1827, + "step": 592200 + }, + { + "epoch": 3.33, + "learning_rate": 1.6717707437449762e-05, + "loss": 0.1805, + "step": 592300 + }, + { + "epoch": 3.33, + "learning_rate": 1.6712086207187305e-05, + "loss": 0.1848, + "step": 592400 + }, + { + "epoch": 3.33, + "learning_rate": 1.670646497692485e-05, + "loss": 0.1851, + "step": 592500 + }, + { + "epoch": 3.33, + "learning_rate": 1.6700843746662395e-05, + "loss": 0.177, + "step": 592600 + }, + { + "epoch": 3.33, + "learning_rate": 1.6695222516399937e-05, + "loss": 0.188, + "step": 592700 + }, + { + "epoch": 3.33, + "learning_rate": 1.6689601286137484e-05, + "loss": 0.179, + "step": 592800 + }, + { + "epoch": 3.33, + "learning_rate": 1.668398005587503e-05, + "loss": 0.1893, + "step": 592900 + }, + { + "epoch": 3.33, + "learning_rate": 1.6678358825612576e-05, + "loss": 0.1811, + "step": 593000 + }, + { + "epoch": 3.33, + "learning_rate": 1.667273759535012e-05, + "loss": 0.1849, + "step": 593100 + }, + { + "epoch": 3.33, + "learning_rate": 1.6667116365087665e-05, + "loss": 0.1881, + "step": 593200 + }, + { + "epoch": 3.34, + "learning_rate": 1.6661495134825208e-05, + "loss": 0.1856, + "step": 593300 + }, + { + "epoch": 3.34, + "learning_rate": 1.665587390456275e-05, + "loss": 0.1855, + "step": 593400 + }, + { + "epoch": 3.34, + "learning_rate": 1.6650252674300297e-05, + "loss": 0.1886, + "step": 593500 + }, + { + "epoch": 3.34, + "learning_rate": 1.664463144403784e-05, + "loss": 0.1918, + "step": 593600 + }, + { + "epoch": 3.34, + "learning_rate": 1.663901021377539e-05, + "loss": 0.1846, + "step": 593700 + }, + { + "epoch": 3.34, + "learning_rate": 1.6633388983512933e-05, + "loss": 0.1864, + "step": 593800 + }, + { + "epoch": 3.34, + "learning_rate": 1.66278239655531e-05, + "loss": 0.1898, + "step": 593900 + }, + { + "epoch": 3.34, + "learning_rate": 1.6622202735290647e-05, + "loss": 0.1875, + "step": 594000 + }, + { + "epoch": 3.34, + "learning_rate": 1.6616581505028193e-05, + "loss": 0.1857, + "step": 594100 + }, + { + "epoch": 3.34, + "learning_rate": 1.6610960274765736e-05, + "loss": 0.1879, + "step": 594200 + }, + { + "epoch": 3.34, + "learning_rate": 1.6605339044503282e-05, + "loss": 0.1829, + "step": 594300 + }, + { + "epoch": 3.34, + "learning_rate": 1.6599717814240825e-05, + "loss": 0.188, + "step": 594400 + }, + { + "epoch": 3.34, + "learning_rate": 1.6594096583978368e-05, + "loss": 0.1814, + "step": 594500 + }, + { + "epoch": 3.34, + "learning_rate": 1.6588475353715914e-05, + "loss": 0.1865, + "step": 594600 + }, + { + "epoch": 3.34, + "learning_rate": 1.658285412345346e-05, + "loss": 0.1843, + "step": 594700 + }, + { + "epoch": 3.34, + "learning_rate": 1.6577232893191007e-05, + "loss": 0.1851, + "step": 594800 + }, + { + "epoch": 3.34, + "learning_rate": 1.657161166292855e-05, + "loss": 0.1832, + "step": 594900 + }, + { + "epoch": 3.34, + "learning_rate": 1.6565990432666096e-05, + "loss": 0.1869, + "step": 595000 + }, + { + "epoch": 3.35, + "learning_rate": 1.656036920240364e-05, + "loss": 0.1823, + "step": 595100 + }, + { + "epoch": 3.35, + "learning_rate": 1.6554747972141182e-05, + "loss": 0.1866, + "step": 595200 + }, + { + "epoch": 3.35, + "learning_rate": 1.6549126741878728e-05, + "loss": 0.1834, + "step": 595300 + }, + { + "epoch": 3.35, + "learning_rate": 1.654350551161627e-05, + "loss": 0.1872, + "step": 595400 + }, + { + "epoch": 3.35, + "learning_rate": 1.6537884281353817e-05, + "loss": 0.184, + "step": 595500 + }, + { + "epoch": 3.35, + "learning_rate": 1.6532263051091364e-05, + "loss": 0.1874, + "step": 595600 + }, + { + "epoch": 3.35, + "learning_rate": 1.652664182082891e-05, + "loss": 0.1874, + "step": 595700 + }, + { + "epoch": 3.35, + "learning_rate": 1.6521020590566453e-05, + "loss": 0.1828, + "step": 595800 + }, + { + "epoch": 3.35, + "learning_rate": 1.6515399360303996e-05, + "loss": 0.1869, + "step": 595900 + }, + { + "epoch": 3.35, + "learning_rate": 1.6509778130041542e-05, + "loss": 0.1827, + "step": 596000 + }, + { + "epoch": 3.35, + "learning_rate": 1.6504156899779085e-05, + "loss": 0.1883, + "step": 596100 + }, + { + "epoch": 3.35, + "learning_rate": 1.649853566951663e-05, + "loss": 0.1838, + "step": 596200 + }, + { + "epoch": 3.35, + "learning_rate": 1.6492914439254177e-05, + "loss": 0.1819, + "step": 596300 + }, + { + "epoch": 3.35, + "learning_rate": 1.6487293208991724e-05, + "loss": 0.1865, + "step": 596400 + }, + { + "epoch": 3.35, + "learning_rate": 1.6481671978729266e-05, + "loss": 0.1845, + "step": 596500 + }, + { + "epoch": 3.35, + "learning_rate": 1.647605074846681e-05, + "loss": 0.1916, + "step": 596600 + }, + { + "epoch": 3.35, + "learning_rate": 1.6470429518204356e-05, + "loss": 0.1808, + "step": 596700 + }, + { + "epoch": 3.35, + "learning_rate": 1.6464864500244527e-05, + "loss": 0.182, + "step": 596800 + }, + { + "epoch": 3.36, + "learning_rate": 1.645924326998207e-05, + "loss": 0.1849, + "step": 596900 + }, + { + "epoch": 3.36, + "learning_rate": 1.6453622039719612e-05, + "loss": 0.1781, + "step": 597000 + }, + { + "epoch": 3.36, + "learning_rate": 1.644800080945716e-05, + "loss": 0.1818, + "step": 597100 + }, + { + "epoch": 3.36, + "learning_rate": 1.64423795791947e-05, + "loss": 0.1861, + "step": 597200 + }, + { + "epoch": 3.36, + "learning_rate": 1.6436758348932248e-05, + "loss": 0.1829, + "step": 597300 + }, + { + "epoch": 3.36, + "learning_rate": 1.6431137118669794e-05, + "loss": 0.1731, + "step": 597400 + }, + { + "epoch": 3.36, + "learning_rate": 1.6425515888407337e-05, + "loss": 0.1863, + "step": 597500 + }, + { + "epoch": 3.36, + "learning_rate": 1.6419894658144883e-05, + "loss": 0.1838, + "step": 597600 + }, + { + "epoch": 3.36, + "learning_rate": 1.6414273427882426e-05, + "loss": 0.1882, + "step": 597700 + }, + { + "epoch": 3.36, + "learning_rate": 1.6408652197619972e-05, + "loss": 0.1825, + "step": 597800 + }, + { + "epoch": 3.36, + "learning_rate": 1.6403030967357515e-05, + "loss": 0.1803, + "step": 597900 + }, + { + "epoch": 3.36, + "learning_rate": 1.639740973709506e-05, + "loss": 0.1821, + "step": 598000 + }, + { + "epoch": 3.36, + "learning_rate": 1.6391788506832605e-05, + "loss": 0.1853, + "step": 598100 + }, + { + "epoch": 3.36, + "learning_rate": 1.638616727657015e-05, + "loss": 0.1885, + "step": 598200 + }, + { + "epoch": 3.36, + "learning_rate": 1.6380546046307697e-05, + "loss": 0.1835, + "step": 598300 + }, + { + "epoch": 3.36, + "learning_rate": 1.637492481604524e-05, + "loss": 0.1892, + "step": 598400 + }, + { + "epoch": 3.36, + "learning_rate": 1.6369303585782786e-05, + "loss": 0.1787, + "step": 598500 + }, + { + "epoch": 3.36, + "learning_rate": 1.636368235552033e-05, + "loss": 0.1821, + "step": 598600 + }, + { + "epoch": 3.37, + "learning_rate": 1.6358061125257875e-05, + "loss": 0.1871, + "step": 598700 + }, + { + "epoch": 3.37, + "learning_rate": 1.6352496107298043e-05, + "loss": 0.1798, + "step": 598800 + }, + { + "epoch": 3.37, + "learning_rate": 1.634687487703559e-05, + "loss": 0.183, + "step": 598900 + }, + { + "epoch": 3.37, + "learning_rate": 1.6341253646773132e-05, + "loss": 0.1868, + "step": 599000 + }, + { + "epoch": 3.37, + "learning_rate": 1.633563241651068e-05, + "loss": 0.1857, + "step": 599100 + }, + { + "epoch": 3.37, + "learning_rate": 1.633001118624822e-05, + "loss": 0.1863, + "step": 599200 + }, + { + "epoch": 3.37, + "learning_rate": 1.6324389955985768e-05, + "loss": 0.1875, + "step": 599300 + }, + { + "epoch": 3.37, + "learning_rate": 1.6318768725723314e-05, + "loss": 0.1878, + "step": 599400 + }, + { + "epoch": 3.37, + "learning_rate": 1.6313147495460857e-05, + "loss": 0.1864, + "step": 599500 + }, + { + "epoch": 3.37, + "learning_rate": 1.6307526265198403e-05, + "loss": 0.1898, + "step": 599600 + }, + { + "epoch": 3.37, + "learning_rate": 1.6301905034935946e-05, + "loss": 0.1793, + "step": 599700 + }, + { + "epoch": 3.37, + "learning_rate": 1.6296283804673492e-05, + "loss": 0.1895, + "step": 599800 + }, + { + "epoch": 3.37, + "learning_rate": 1.6290662574411035e-05, + "loss": 0.19, + "step": 599900 + }, + { + "epoch": 3.37, + "learning_rate": 1.628504134414858e-05, + "loss": 0.1929, + "step": 600000 + }, + { + "epoch": 3.37, + "learning_rate": 1.6279420113886128e-05, + "loss": 0.1971, + "step": 600100 + }, + { + "epoch": 3.37, + "learning_rate": 1.627379888362367e-05, + "loss": 0.1807, + "step": 600200 + }, + { + "epoch": 3.37, + "learning_rate": 1.6268177653361217e-05, + "loss": 0.1766, + "step": 600300 + }, + { + "epoch": 3.37, + "learning_rate": 1.626255642309876e-05, + "loss": 0.1832, + "step": 600400 + }, + { + "epoch": 3.38, + "learning_rate": 1.6256935192836306e-05, + "loss": 0.1895, + "step": 600500 + }, + { + "epoch": 3.38, + "learning_rate": 1.625131396257385e-05, + "loss": 0.1827, + "step": 600600 + }, + { + "epoch": 3.38, + "learning_rate": 1.6245692732311392e-05, + "loss": 0.1838, + "step": 600700 + }, + { + "epoch": 3.38, + "learning_rate": 1.6240071502048938e-05, + "loss": 0.1862, + "step": 600800 + }, + { + "epoch": 3.38, + "learning_rate": 1.6234450271786484e-05, + "loss": 0.1805, + "step": 600900 + }, + { + "epoch": 3.38, + "learning_rate": 1.622882904152403e-05, + "loss": 0.1878, + "step": 601000 + }, + { + "epoch": 3.38, + "learning_rate": 1.6223207811261574e-05, + "loss": 0.1831, + "step": 601100 + }, + { + "epoch": 3.38, + "learning_rate": 1.621758658099912e-05, + "loss": 0.1846, + "step": 601200 + }, + { + "epoch": 3.38, + "learning_rate": 1.6211965350736663e-05, + "loss": 0.1772, + "step": 601300 + }, + { + "epoch": 3.38, + "learning_rate": 1.6206344120474206e-05, + "loss": 0.186, + "step": 601400 + }, + { + "epoch": 3.38, + "learning_rate": 1.6200722890211752e-05, + "loss": 0.1822, + "step": 601500 + }, + { + "epoch": 3.38, + "learning_rate": 1.6195101659949298e-05, + "loss": 0.1911, + "step": 601600 + }, + { + "epoch": 3.38, + "learning_rate": 1.6189480429686844e-05, + "loss": 0.1833, + "step": 601700 + }, + { + "epoch": 3.38, + "learning_rate": 1.6183859199424387e-05, + "loss": 0.1876, + "step": 601800 + }, + { + "epoch": 3.38, + "learning_rate": 1.6178237969161934e-05, + "loss": 0.1839, + "step": 601900 + }, + { + "epoch": 3.38, + "learning_rate": 1.6172616738899476e-05, + "loss": 0.1882, + "step": 602000 + }, + { + "epoch": 3.38, + "learning_rate": 1.616699550863702e-05, + "loss": 0.1862, + "step": 602100 + }, + { + "epoch": 3.39, + "learning_rate": 1.6161374278374566e-05, + "loss": 0.1796, + "step": 602200 + }, + { + "epoch": 3.39, + "learning_rate": 1.615575304811211e-05, + "loss": 0.1806, + "step": 602300 + }, + { + "epoch": 3.39, + "learning_rate": 1.6150131817849655e-05, + "loss": 0.1816, + "step": 602400 + }, + { + "epoch": 3.39, + "learning_rate": 1.61445105875872e-05, + "loss": 0.1852, + "step": 602500 + }, + { + "epoch": 3.39, + "learning_rate": 1.6138889357324744e-05, + "loss": 0.1871, + "step": 602600 + }, + { + "epoch": 3.39, + "learning_rate": 1.613326812706229e-05, + "loss": 0.1787, + "step": 602700 + }, + { + "epoch": 3.39, + "learning_rate": 1.6127646896799833e-05, + "loss": 0.1852, + "step": 602800 + }, + { + "epoch": 3.39, + "learning_rate": 1.6122081878840004e-05, + "loss": 0.1913, + "step": 602900 + }, + { + "epoch": 3.39, + "learning_rate": 1.6116460648577547e-05, + "loss": 0.1904, + "step": 603000 + }, + { + "epoch": 3.39, + "learning_rate": 1.6110839418315093e-05, + "loss": 0.1835, + "step": 603100 + }, + { + "epoch": 3.39, + "learning_rate": 1.6105218188052636e-05, + "loss": 0.1849, + "step": 603200 + }, + { + "epoch": 3.39, + "learning_rate": 1.6099596957790182e-05, + "loss": 0.1911, + "step": 603300 + }, + { + "epoch": 3.39, + "learning_rate": 1.6093975727527725e-05, + "loss": 0.1831, + "step": 603400 + }, + { + "epoch": 3.39, + "learning_rate": 1.608835449726527e-05, + "loss": 0.1849, + "step": 603500 + }, + { + "epoch": 3.39, + "learning_rate": 1.6082733267002818e-05, + "loss": 0.1852, + "step": 603600 + }, + { + "epoch": 3.39, + "learning_rate": 1.607711203674036e-05, + "loss": 0.1834, + "step": 603700 + }, + { + "epoch": 3.39, + "learning_rate": 1.6071490806477907e-05, + "loss": 0.1809, + "step": 603800 + }, + { + "epoch": 3.39, + "learning_rate": 1.606586957621545e-05, + "loss": 0.186, + "step": 603900 + }, + { + "epoch": 3.4, + "learning_rate": 1.6060248345952996e-05, + "loss": 0.1855, + "step": 604000 + }, + { + "epoch": 3.4, + "learning_rate": 1.605462711569054e-05, + "loss": 0.1835, + "step": 604100 + }, + { + "epoch": 3.4, + "learning_rate": 1.6049005885428085e-05, + "loss": 0.1847, + "step": 604200 + }, + { + "epoch": 3.4, + "learning_rate": 1.6043384655165632e-05, + "loss": 0.1861, + "step": 604300 + }, + { + "epoch": 3.4, + "learning_rate": 1.6037763424903175e-05, + "loss": 0.19, + "step": 604400 + }, + { + "epoch": 3.4, + "learning_rate": 1.603214219464072e-05, + "loss": 0.1788, + "step": 604500 + }, + { + "epoch": 3.4, + "learning_rate": 1.6026520964378264e-05, + "loss": 0.1838, + "step": 604600 + }, + { + "epoch": 3.4, + "learning_rate": 1.602089973411581e-05, + "loss": 0.1825, + "step": 604700 + }, + { + "epoch": 3.4, + "learning_rate": 1.6015278503853353e-05, + "loss": 0.1818, + "step": 604800 + }, + { + "epoch": 3.4, + "learning_rate": 1.60096572735909e-05, + "loss": 0.1811, + "step": 604900 + }, + { + "epoch": 3.4, + "learning_rate": 1.6004036043328442e-05, + "loss": 0.1779, + "step": 605000 + }, + { + "epoch": 3.4, + "learning_rate": 1.599841481306599e-05, + "loss": 0.1876, + "step": 605100 + }, + { + "epoch": 3.4, + "learning_rate": 1.5992793582803535e-05, + "loss": 0.1899, + "step": 605200 + }, + { + "epoch": 3.4, + "learning_rate": 1.5987172352541078e-05, + "loss": 0.1841, + "step": 605300 + }, + { + "epoch": 3.4, + "learning_rate": 1.5981551122278624e-05, + "loss": 0.1885, + "step": 605400 + }, + { + "epoch": 3.4, + "learning_rate": 1.5975929892016167e-05, + "loss": 0.1853, + "step": 605500 + }, + { + "epoch": 3.4, + "learning_rate": 1.5970308661753713e-05, + "loss": 0.1849, + "step": 605600 + }, + { + "epoch": 3.4, + "learning_rate": 1.5964687431491256e-05, + "loss": 0.1828, + "step": 605700 + }, + { + "epoch": 3.41, + "learning_rate": 1.5959066201228802e-05, + "loss": 0.1855, + "step": 605800 + }, + { + "epoch": 3.41, + "learning_rate": 1.595350118326897e-05, + "loss": 0.1843, + "step": 605900 + }, + { + "epoch": 3.41, + "learning_rate": 1.5947879953006516e-05, + "loss": 0.1835, + "step": 606000 + }, + { + "epoch": 3.41, + "learning_rate": 1.594225872274406e-05, + "loss": 0.1882, + "step": 606100 + }, + { + "epoch": 3.41, + "learning_rate": 1.5936637492481605e-05, + "loss": 0.1853, + "step": 606200 + }, + { + "epoch": 3.41, + "learning_rate": 1.593101626221915e-05, + "loss": 0.1863, + "step": 606300 + }, + { + "epoch": 3.41, + "learning_rate": 1.592545124425932e-05, + "loss": 0.1919, + "step": 606400 + }, + { + "epoch": 3.41, + "learning_rate": 1.5919830013996865e-05, + "loss": 0.184, + "step": 606500 + }, + { + "epoch": 3.41, + "learning_rate": 1.5914208783734408e-05, + "loss": 0.1852, + "step": 606600 + }, + { + "epoch": 3.41, + "learning_rate": 1.5908587553471955e-05, + "loss": 0.182, + "step": 606700 + }, + { + "epoch": 3.41, + "learning_rate": 1.5902966323209497e-05, + "loss": 0.1844, + "step": 606800 + }, + { + "epoch": 3.41, + "learning_rate": 1.5897345092947044e-05, + "loss": 0.1848, + "step": 606900 + }, + { + "epoch": 3.41, + "learning_rate": 1.5891723862684587e-05, + "loss": 0.1846, + "step": 607000 + }, + { + "epoch": 3.41, + "learning_rate": 1.5886102632422133e-05, + "loss": 0.1787, + "step": 607100 + }, + { + "epoch": 3.41, + "learning_rate": 1.588048140215968e-05, + "loss": 0.1803, + "step": 607200 + }, + { + "epoch": 3.41, + "learning_rate": 1.5874860171897222e-05, + "loss": 0.1805, + "step": 607300 + }, + { + "epoch": 3.41, + "learning_rate": 1.586923894163477e-05, + "loss": 0.1825, + "step": 607400 + }, + { + "epoch": 3.41, + "learning_rate": 1.586361771137231e-05, + "loss": 0.1883, + "step": 607500 + }, + { + "epoch": 3.42, + "learning_rate": 1.5857996481109858e-05, + "loss": 0.184, + "step": 607600 + }, + { + "epoch": 3.42, + "learning_rate": 1.58523752508474e-05, + "loss": 0.1832, + "step": 607700 + }, + { + "epoch": 3.42, + "learning_rate": 1.5846754020584943e-05, + "loss": 0.1808, + "step": 607800 + }, + { + "epoch": 3.42, + "learning_rate": 1.584113279032249e-05, + "loss": 0.1782, + "step": 607900 + }, + { + "epoch": 3.42, + "learning_rate": 1.5835511560060036e-05, + "loss": 0.1874, + "step": 608000 + }, + { + "epoch": 3.42, + "learning_rate": 1.5829890329797582e-05, + "loss": 0.1817, + "step": 608100 + }, + { + "epoch": 3.42, + "learning_rate": 1.5824269099535125e-05, + "loss": 0.1862, + "step": 608200 + }, + { + "epoch": 3.42, + "learning_rate": 1.581864786927267e-05, + "loss": 0.1814, + "step": 608300 + }, + { + "epoch": 3.42, + "learning_rate": 1.5813026639010214e-05, + "loss": 0.1865, + "step": 608400 + }, + { + "epoch": 3.42, + "learning_rate": 1.5807405408747757e-05, + "loss": 0.1898, + "step": 608500 + }, + { + "epoch": 3.42, + "learning_rate": 1.5801784178485303e-05, + "loss": 0.1871, + "step": 608600 + }, + { + "epoch": 3.42, + "learning_rate": 1.579616294822285e-05, + "loss": 0.1866, + "step": 608700 + }, + { + "epoch": 3.42, + "learning_rate": 1.5790541717960396e-05, + "loss": 0.1817, + "step": 608800 + }, + { + "epoch": 3.42, + "learning_rate": 1.578492048769794e-05, + "loss": 0.1885, + "step": 608900 + }, + { + "epoch": 3.42, + "learning_rate": 1.5779299257435485e-05, + "loss": 0.1836, + "step": 609000 + }, + { + "epoch": 3.42, + "learning_rate": 1.5773678027173028e-05, + "loss": 0.1825, + "step": 609100 + }, + { + "epoch": 3.42, + "learning_rate": 1.576805679691057e-05, + "loss": 0.1744, + "step": 609200 + }, + { + "epoch": 3.43, + "learning_rate": 1.5762435566648117e-05, + "loss": 0.1837, + "step": 609300 + }, + { + "epoch": 3.43, + "learning_rate": 1.575681433638566e-05, + "loss": 0.1795, + "step": 609400 + }, + { + "epoch": 3.43, + "learning_rate": 1.5751193106123206e-05, + "loss": 0.1818, + "step": 609500 + }, + { + "epoch": 3.43, + "learning_rate": 1.5745571875860753e-05, + "loss": 0.1817, + "step": 609600 + }, + { + "epoch": 3.43, + "learning_rate": 1.57399506455983e-05, + "loss": 0.1881, + "step": 609700 + }, + { + "epoch": 3.43, + "learning_rate": 1.5734329415335842e-05, + "loss": 0.1816, + "step": 609800 + }, + { + "epoch": 3.43, + "learning_rate": 1.5728708185073385e-05, + "loss": 0.1847, + "step": 609900 + }, + { + "epoch": 3.43, + "learning_rate": 1.572308695481093e-05, + "loss": 0.189, + "step": 610000 + }, + { + "epoch": 3.43, + "learning_rate": 1.5717465724548474e-05, + "loss": 0.1822, + "step": 610100 + }, + { + "epoch": 3.43, + "learning_rate": 1.571184449428602e-05, + "loss": 0.1822, + "step": 610200 + }, + { + "epoch": 3.43, + "learning_rate": 1.5706223264023563e-05, + "loss": 0.184, + "step": 610300 + }, + { + "epoch": 3.43, + "learning_rate": 1.5700602033761113e-05, + "loss": 0.1843, + "step": 610400 + }, + { + "epoch": 3.43, + "learning_rate": 1.5694980803498655e-05, + "loss": 0.1832, + "step": 610500 + }, + { + "epoch": 3.43, + "learning_rate": 1.56893595732362e-05, + "loss": 0.1842, + "step": 610600 + }, + { + "epoch": 3.43, + "learning_rate": 1.5683738342973745e-05, + "loss": 0.1806, + "step": 610700 + }, + { + "epoch": 3.43, + "learning_rate": 1.5678117112711288e-05, + "loss": 0.1809, + "step": 610800 + }, + { + "epoch": 3.43, + "learning_rate": 1.5672495882448834e-05, + "loss": 0.1857, + "step": 610900 + }, + { + "epoch": 3.43, + "learning_rate": 1.5666874652186377e-05, + "loss": 0.1829, + "step": 611000 + }, + { + "epoch": 3.44, + "learning_rate": 1.5661253421923923e-05, + "loss": 0.1793, + "step": 611100 + }, + { + "epoch": 3.44, + "learning_rate": 1.565563219166147e-05, + "loss": 0.1761, + "step": 611200 + }, + { + "epoch": 3.44, + "learning_rate": 1.5650010961399012e-05, + "loss": 0.1826, + "step": 611300 + }, + { + "epoch": 3.44, + "learning_rate": 1.564438973113656e-05, + "loss": 0.1818, + "step": 611400 + }, + { + "epoch": 3.44, + "learning_rate": 1.56387685008741e-05, + "loss": 0.1787, + "step": 611500 + }, + { + "epoch": 3.44, + "learning_rate": 1.5633147270611648e-05, + "loss": 0.1802, + "step": 611600 + }, + { + "epoch": 3.44, + "learning_rate": 1.562752604034919e-05, + "loss": 0.185, + "step": 611700 + }, + { + "epoch": 3.44, + "learning_rate": 1.5621904810086737e-05, + "loss": 0.1871, + "step": 611800 + }, + { + "epoch": 3.44, + "learning_rate": 1.561628357982428e-05, + "loss": 0.1903, + "step": 611900 + }, + { + "epoch": 3.44, + "learning_rate": 1.5610662349561826e-05, + "loss": 0.1841, + "step": 612000 + }, + { + "epoch": 3.44, + "learning_rate": 1.5605041119299372e-05, + "loss": 0.1828, + "step": 612100 + }, + { + "epoch": 3.44, + "learning_rate": 1.5599419889036915e-05, + "loss": 0.1837, + "step": 612200 + }, + { + "epoch": 3.44, + "learning_rate": 1.559379865877446e-05, + "loss": 0.1845, + "step": 612300 + }, + { + "epoch": 3.44, + "learning_rate": 1.5588177428512004e-05, + "loss": 0.1862, + "step": 612400 + }, + { + "epoch": 3.44, + "learning_rate": 1.5582612410552175e-05, + "loss": 0.188, + "step": 612500 + }, + { + "epoch": 3.44, + "learning_rate": 1.5576991180289718e-05, + "loss": 0.183, + "step": 612600 + }, + { + "epoch": 3.44, + "learning_rate": 1.5571369950027264e-05, + "loss": 0.1823, + "step": 612700 + }, + { + "epoch": 3.44, + "learning_rate": 1.5565748719764807e-05, + "loss": 0.1846, + "step": 612800 + }, + { + "epoch": 3.45, + "learning_rate": 1.556012748950235e-05, + "loss": 0.1876, + "step": 612900 + }, + { + "epoch": 3.45, + "learning_rate": 1.55545062592399e-05, + "loss": 0.1884, + "step": 613000 + }, + { + "epoch": 3.45, + "learning_rate": 1.5548885028977443e-05, + "loss": 0.1836, + "step": 613100 + }, + { + "epoch": 3.45, + "learning_rate": 1.554326379871499e-05, + "loss": 0.1874, + "step": 613200 + }, + { + "epoch": 3.45, + "learning_rate": 1.5537642568452532e-05, + "loss": 0.1822, + "step": 613300 + }, + { + "epoch": 3.45, + "learning_rate": 1.5532021338190078e-05, + "loss": 0.1834, + "step": 613400 + }, + { + "epoch": 3.45, + "learning_rate": 1.552640010792762e-05, + "loss": 0.185, + "step": 613500 + }, + { + "epoch": 3.45, + "learning_rate": 1.5520778877665164e-05, + "loss": 0.1816, + "step": 613600 + }, + { + "epoch": 3.45, + "learning_rate": 1.551515764740271e-05, + "loss": 0.185, + "step": 613700 + }, + { + "epoch": 3.45, + "learning_rate": 1.5509536417140257e-05, + "loss": 0.1814, + "step": 613800 + }, + { + "epoch": 3.45, + "learning_rate": 1.5503915186877803e-05, + "loss": 0.1777, + "step": 613900 + }, + { + "epoch": 3.45, + "learning_rate": 1.5498293956615346e-05, + "loss": 0.1836, + "step": 614000 + }, + { + "epoch": 3.45, + "learning_rate": 1.5492672726352892e-05, + "loss": 0.1817, + "step": 614100 + }, + { + "epoch": 3.45, + "learning_rate": 1.5487051496090435e-05, + "loss": 0.1839, + "step": 614200 + }, + { + "epoch": 3.45, + "learning_rate": 1.5481430265827978e-05, + "loss": 0.1837, + "step": 614300 + }, + { + "epoch": 3.45, + "learning_rate": 1.5475809035565524e-05, + "loss": 0.1829, + "step": 614400 + }, + { + "epoch": 3.45, + "learning_rate": 1.5470187805303067e-05, + "loss": 0.1882, + "step": 614500 + }, + { + "epoch": 3.45, + "learning_rate": 1.5464622787343238e-05, + "loss": 0.1835, + "step": 614600 + }, + { + "epoch": 3.46, + "learning_rate": 1.545900155708078e-05, + "loss": 0.1833, + "step": 614700 + }, + { + "epoch": 3.46, + "learning_rate": 1.5453380326818327e-05, + "loss": 0.1827, + "step": 614800 + }, + { + "epoch": 3.46, + "learning_rate": 1.5447759096555873e-05, + "loss": 0.1904, + "step": 614900 + }, + { + "epoch": 3.46, + "learning_rate": 1.544213786629342e-05, + "loss": 0.1866, + "step": 615000 + }, + { + "epoch": 3.46, + "learning_rate": 1.5436516636030963e-05, + "loss": 0.1832, + "step": 615100 + }, + { + "epoch": 3.46, + "learning_rate": 1.5430951618071134e-05, + "loss": 0.183, + "step": 615200 + }, + { + "epoch": 3.46, + "learning_rate": 1.54253866001113e-05, + "loss": 0.1879, + "step": 615300 + }, + { + "epoch": 3.46, + "learning_rate": 1.5419765369848848e-05, + "loss": 0.1861, + "step": 615400 + }, + { + "epoch": 3.46, + "learning_rate": 1.541414413958639e-05, + "loss": 0.1879, + "step": 615500 + }, + { + "epoch": 3.46, + "learning_rate": 1.5408522909323937e-05, + "loss": 0.1807, + "step": 615600 + }, + { + "epoch": 3.46, + "learning_rate": 1.540290167906148e-05, + "loss": 0.1812, + "step": 615700 + }, + { + "epoch": 3.46, + "learning_rate": 1.5397280448799026e-05, + "loss": 0.1811, + "step": 615800 + }, + { + "epoch": 3.46, + "learning_rate": 1.539165921853657e-05, + "loss": 0.1865, + "step": 615900 + }, + { + "epoch": 3.46, + "learning_rate": 1.5386037988274115e-05, + "loss": 0.1851, + "step": 616000 + }, + { + "epoch": 3.46, + "learning_rate": 1.5380416758011658e-05, + "loss": 0.18, + "step": 616100 + }, + { + "epoch": 3.46, + "learning_rate": 1.5374795527749204e-05, + "loss": 0.1825, + "step": 616200 + }, + { + "epoch": 3.46, + "learning_rate": 1.536917429748675e-05, + "loss": 0.1902, + "step": 616300 + }, + { + "epoch": 3.46, + "learning_rate": 1.5363553067224293e-05, + "loss": 0.1869, + "step": 616400 + }, + { + "epoch": 3.47, + "learning_rate": 1.535793183696184e-05, + "loss": 0.1837, + "step": 616500 + }, + { + "epoch": 3.47, + "learning_rate": 1.5352310606699382e-05, + "loss": 0.1854, + "step": 616600 + }, + { + "epoch": 3.47, + "learning_rate": 1.534668937643693e-05, + "loss": 0.1784, + "step": 616700 + }, + { + "epoch": 3.47, + "learning_rate": 1.534106814617447e-05, + "loss": 0.1807, + "step": 616800 + }, + { + "epoch": 3.47, + "learning_rate": 1.5335446915912015e-05, + "loss": 0.179, + "step": 616900 + }, + { + "epoch": 3.47, + "learning_rate": 1.5329825685649564e-05, + "loss": 0.1837, + "step": 617000 + }, + { + "epoch": 3.47, + "learning_rate": 1.5324204455387107e-05, + "loss": 0.1842, + "step": 617100 + }, + { + "epoch": 3.47, + "learning_rate": 1.5318583225124653e-05, + "loss": 0.1824, + "step": 617200 + }, + { + "epoch": 3.47, + "learning_rate": 1.5312961994862196e-05, + "loss": 0.1863, + "step": 617300 + }, + { + "epoch": 3.47, + "learning_rate": 1.530734076459974e-05, + "loss": 0.1886, + "step": 617400 + }, + { + "epoch": 3.47, + "learning_rate": 1.5301719534337285e-05, + "loss": 0.1769, + "step": 617500 + }, + { + "epoch": 3.47, + "learning_rate": 1.529609830407483e-05, + "loss": 0.1851, + "step": 617600 + }, + { + "epoch": 3.47, + "learning_rate": 1.5290477073812375e-05, + "loss": 0.1779, + "step": 617700 + }, + { + "epoch": 3.47, + "learning_rate": 1.528485584354992e-05, + "loss": 0.1788, + "step": 617800 + }, + { + "epoch": 3.47, + "learning_rate": 1.5279234613287467e-05, + "loss": 0.1837, + "step": 617900 + }, + { + "epoch": 3.47, + "learning_rate": 1.527361338302501e-05, + "loss": 0.184, + "step": 618000 + }, + { + "epoch": 3.47, + "learning_rate": 1.5267992152762553e-05, + "loss": 0.1839, + "step": 618100 + }, + { + "epoch": 3.48, + "learning_rate": 1.52623709225001e-05, + "loss": 0.1859, + "step": 618200 + }, + { + "epoch": 3.48, + "learning_rate": 1.5256749692237642e-05, + "loss": 0.187, + "step": 618300 + }, + { + "epoch": 3.48, + "learning_rate": 1.525112846197519e-05, + "loss": 0.1815, + "step": 618400 + }, + { + "epoch": 3.48, + "learning_rate": 1.5245507231712733e-05, + "loss": 0.1803, + "step": 618500 + }, + { + "epoch": 3.48, + "learning_rate": 1.523988600145028e-05, + "loss": 0.1826, + "step": 618600 + }, + { + "epoch": 3.48, + "learning_rate": 1.5234264771187822e-05, + "loss": 0.1828, + "step": 618700 + }, + { + "epoch": 3.48, + "learning_rate": 1.5228643540925367e-05, + "loss": 0.1842, + "step": 618800 + }, + { + "epoch": 3.48, + "learning_rate": 1.5223022310662913e-05, + "loss": 0.185, + "step": 618900 + }, + { + "epoch": 3.48, + "learning_rate": 1.5217401080400456e-05, + "loss": 0.185, + "step": 619000 + }, + { + "epoch": 3.48, + "learning_rate": 1.5211779850138002e-05, + "loss": 0.1842, + "step": 619100 + }, + { + "epoch": 3.48, + "learning_rate": 1.5206158619875547e-05, + "loss": 0.1852, + "step": 619200 + }, + { + "epoch": 3.48, + "learning_rate": 1.5200537389613093e-05, + "loss": 0.1854, + "step": 619300 + }, + { + "epoch": 3.48, + "learning_rate": 1.5194916159350636e-05, + "loss": 0.1844, + "step": 619400 + }, + { + "epoch": 3.48, + "learning_rate": 1.518929492908818e-05, + "loss": 0.1873, + "step": 619500 + }, + { + "epoch": 3.48, + "learning_rate": 1.5183673698825727e-05, + "loss": 0.1838, + "step": 619600 + }, + { + "epoch": 3.48, + "learning_rate": 1.517805246856327e-05, + "loss": 0.1825, + "step": 619700 + }, + { + "epoch": 3.48, + "learning_rate": 1.5172431238300816e-05, + "loss": 0.1823, + "step": 619800 + }, + { + "epoch": 3.48, + "learning_rate": 1.5166810008038359e-05, + "loss": 0.1813, + "step": 619900 + }, + { + "epoch": 3.49, + "learning_rate": 1.5161188777775907e-05, + "loss": 0.1883, + "step": 620000 + }, + { + "epoch": 3.49, + "learning_rate": 1.515556754751345e-05, + "loss": 0.1823, + "step": 620100 + }, + { + "epoch": 3.49, + "learning_rate": 1.5150002529553619e-05, + "loss": 0.1815, + "step": 620200 + }, + { + "epoch": 3.49, + "learning_rate": 1.5144381299291164e-05, + "loss": 0.1825, + "step": 620300 + }, + { + "epoch": 3.49, + "learning_rate": 1.513876006902871e-05, + "loss": 0.1808, + "step": 620400 + }, + { + "epoch": 3.49, + "learning_rate": 1.5133138838766253e-05, + "loss": 0.1802, + "step": 620500 + }, + { + "epoch": 3.49, + "learning_rate": 1.5127517608503797e-05, + "loss": 0.182, + "step": 620600 + }, + { + "epoch": 3.49, + "learning_rate": 1.5121896378241344e-05, + "loss": 0.187, + "step": 620700 + }, + { + "epoch": 3.49, + "learning_rate": 1.5116275147978887e-05, + "loss": 0.1845, + "step": 620800 + }, + { + "epoch": 3.49, + "learning_rate": 1.5110653917716433e-05, + "loss": 0.1812, + "step": 620900 + }, + { + "epoch": 3.49, + "learning_rate": 1.5105032687453977e-05, + "loss": 0.1836, + "step": 621000 + }, + { + "epoch": 3.49, + "learning_rate": 1.5099411457191524e-05, + "loss": 0.1841, + "step": 621100 + }, + { + "epoch": 3.49, + "learning_rate": 1.5093790226929067e-05, + "loss": 0.1876, + "step": 621200 + }, + { + "epoch": 3.49, + "learning_rate": 1.508816899666661e-05, + "loss": 0.1828, + "step": 621300 + }, + { + "epoch": 3.49, + "learning_rate": 1.5082547766404156e-05, + "loss": 0.1822, + "step": 621400 + }, + { + "epoch": 3.49, + "learning_rate": 1.50769265361417e-05, + "loss": 0.1862, + "step": 621500 + }, + { + "epoch": 3.49, + "learning_rate": 1.5071305305879247e-05, + "loss": 0.1759, + "step": 621600 + }, + { + "epoch": 3.49, + "learning_rate": 1.506568407561679e-05, + "loss": 0.1859, + "step": 621700 + }, + { + "epoch": 3.5, + "learning_rate": 1.5060062845354336e-05, + "loss": 0.1804, + "step": 621800 + }, + { + "epoch": 3.5, + "learning_rate": 1.505444161509188e-05, + "loss": 0.1816, + "step": 621900 + }, + { + "epoch": 3.5, + "learning_rate": 1.5048820384829423e-05, + "loss": 0.1866, + "step": 622000 + }, + { + "epoch": 3.5, + "learning_rate": 1.504319915456697e-05, + "loss": 0.1831, + "step": 622100 + }, + { + "epoch": 3.5, + "learning_rate": 1.5037577924304514e-05, + "loss": 0.1872, + "step": 622200 + }, + { + "epoch": 3.5, + "learning_rate": 1.503195669404206e-05, + "loss": 0.1831, + "step": 622300 + }, + { + "epoch": 3.5, + "learning_rate": 1.5026335463779603e-05, + "loss": 0.1741, + "step": 622400 + }, + { + "epoch": 3.5, + "learning_rate": 1.5020714233517146e-05, + "loss": 0.1914, + "step": 622500 + }, + { + "epoch": 3.5, + "learning_rate": 1.5015149215557317e-05, + "loss": 0.1866, + "step": 622600 + }, + { + "epoch": 3.5, + "learning_rate": 1.5009527985294863e-05, + "loss": 0.1844, + "step": 622700 + }, + { + "epoch": 3.5, + "learning_rate": 1.5003906755032406e-05, + "loss": 0.1844, + "step": 622800 + }, + { + "epoch": 3.5, + "learning_rate": 1.4998285524769951e-05, + "loss": 0.1826, + "step": 622900 + }, + { + "epoch": 3.5, + "learning_rate": 1.4992664294507497e-05, + "loss": 0.1858, + "step": 623000 + }, + { + "epoch": 3.5, + "learning_rate": 1.498704306424504e-05, + "loss": 0.1744, + "step": 623100 + }, + { + "epoch": 3.5, + "learning_rate": 1.4981421833982586e-05, + "loss": 0.185, + "step": 623200 + }, + { + "epoch": 3.5, + "learning_rate": 1.4975800603720131e-05, + "loss": 0.1798, + "step": 623300 + }, + { + "epoch": 3.5, + "learning_rate": 1.4970179373457677e-05, + "loss": 0.1848, + "step": 623400 + }, + { + "epoch": 3.5, + "learning_rate": 1.496455814319522e-05, + "loss": 0.1781, + "step": 623500 + }, + { + "epoch": 3.51, + "learning_rate": 1.4958936912932765e-05, + "loss": 0.1798, + "step": 623600 + }, + { + "epoch": 3.51, + "learning_rate": 1.4953315682670311e-05, + "loss": 0.1796, + "step": 623700 + }, + { + "epoch": 3.51, + "learning_rate": 1.4947694452407854e-05, + "loss": 0.187, + "step": 623800 + }, + { + "epoch": 3.51, + "learning_rate": 1.49420732221454e-05, + "loss": 0.182, + "step": 623900 + }, + { + "epoch": 3.51, + "learning_rate": 1.4936451991882943e-05, + "loss": 0.1844, + "step": 624000 + }, + { + "epoch": 3.51, + "learning_rate": 1.493083076162049e-05, + "loss": 0.1872, + "step": 624100 + }, + { + "epoch": 3.51, + "learning_rate": 1.4925209531358034e-05, + "loss": 0.183, + "step": 624200 + }, + { + "epoch": 3.51, + "learning_rate": 1.4919588301095577e-05, + "loss": 0.1815, + "step": 624300 + }, + { + "epoch": 3.51, + "learning_rate": 1.4913967070833123e-05, + "loss": 0.1871, + "step": 624400 + }, + { + "epoch": 3.51, + "learning_rate": 1.4908345840570668e-05, + "loss": 0.1813, + "step": 624500 + }, + { + "epoch": 3.51, + "learning_rate": 1.4902780822610837e-05, + "loss": 0.1862, + "step": 624600 + }, + { + "epoch": 3.51, + "learning_rate": 1.4897159592348382e-05, + "loss": 0.1843, + "step": 624700 + }, + { + "epoch": 3.51, + "learning_rate": 1.4891538362085928e-05, + "loss": 0.1847, + "step": 624800 + }, + { + "epoch": 3.51, + "learning_rate": 1.488591713182347e-05, + "loss": 0.18, + "step": 624900 + }, + { + "epoch": 3.51, + "learning_rate": 1.4880295901561017e-05, + "loss": 0.1874, + "step": 625000 + }, + { + "epoch": 3.51, + "learning_rate": 1.4874674671298562e-05, + "loss": 0.1881, + "step": 625100 + }, + { + "epoch": 3.51, + "learning_rate": 1.4869053441036108e-05, + "loss": 0.1866, + "step": 625200 + }, + { + "epoch": 3.51, + "learning_rate": 1.486343221077365e-05, + "loss": 0.1877, + "step": 625300 + }, + { + "epoch": 3.52, + "learning_rate": 1.4857810980511194e-05, + "loss": 0.1773, + "step": 625400 + }, + { + "epoch": 3.52, + "learning_rate": 1.485218975024874e-05, + "loss": 0.1807, + "step": 625500 + }, + { + "epoch": 3.52, + "learning_rate": 1.4846568519986284e-05, + "loss": 0.1837, + "step": 625600 + }, + { + "epoch": 3.52, + "learning_rate": 1.484094728972383e-05, + "loss": 0.1777, + "step": 625700 + }, + { + "epoch": 3.52, + "learning_rate": 1.4835326059461374e-05, + "loss": 0.1795, + "step": 625800 + }, + { + "epoch": 3.52, + "learning_rate": 1.482970482919892e-05, + "loss": 0.1871, + "step": 625900 + }, + { + "epoch": 3.52, + "learning_rate": 1.4824083598936464e-05, + "loss": 0.1814, + "step": 626000 + }, + { + "epoch": 3.52, + "learning_rate": 1.4818462368674007e-05, + "loss": 0.1839, + "step": 626100 + }, + { + "epoch": 3.52, + "learning_rate": 1.4812841138411554e-05, + "loss": 0.1859, + "step": 626200 + }, + { + "epoch": 3.52, + "learning_rate": 1.4807219908149098e-05, + "loss": 0.1807, + "step": 626300 + }, + { + "epoch": 3.52, + "learning_rate": 1.4801598677886644e-05, + "loss": 0.1798, + "step": 626400 + }, + { + "epoch": 3.52, + "learning_rate": 1.4795977447624187e-05, + "loss": 0.1815, + "step": 626500 + }, + { + "epoch": 3.52, + "learning_rate": 1.4790356217361734e-05, + "loss": 0.1834, + "step": 626600 + }, + { + "epoch": 3.52, + "learning_rate": 1.4784734987099278e-05, + "loss": 0.1748, + "step": 626700 + }, + { + "epoch": 3.52, + "learning_rate": 1.4779113756836821e-05, + "loss": 0.1786, + "step": 626800 + }, + { + "epoch": 3.52, + "learning_rate": 1.4773492526574367e-05, + "loss": 0.1823, + "step": 626900 + }, + { + "epoch": 3.52, + "learning_rate": 1.476787129631191e-05, + "loss": 0.1791, + "step": 627000 + }, + { + "epoch": 3.53, + "learning_rate": 1.4762250066049457e-05, + "loss": 0.1827, + "step": 627100 + }, + { + "epoch": 3.53, + "learning_rate": 1.4756628835787001e-05, + "loss": 0.1881, + "step": 627200 + }, + { + "epoch": 3.53, + "learning_rate": 1.4751007605524547e-05, + "loss": 0.1872, + "step": 627300 + }, + { + "epoch": 3.53, + "learning_rate": 1.474538637526209e-05, + "loss": 0.1798, + "step": 627400 + }, + { + "epoch": 3.53, + "learning_rate": 1.4739765144999635e-05, + "loss": 0.1885, + "step": 627500 + }, + { + "epoch": 3.53, + "learning_rate": 1.4734143914737181e-05, + "loss": 0.185, + "step": 627600 + }, + { + "epoch": 3.53, + "learning_rate": 1.4728522684474724e-05, + "loss": 0.1796, + "step": 627700 + }, + { + "epoch": 3.53, + "learning_rate": 1.472290145421227e-05, + "loss": 0.1809, + "step": 627800 + }, + { + "epoch": 3.53, + "learning_rate": 1.4717280223949815e-05, + "loss": 0.1851, + "step": 627900 + }, + { + "epoch": 3.53, + "learning_rate": 1.4711658993687358e-05, + "loss": 0.1799, + "step": 628000 + }, + { + "epoch": 3.53, + "learning_rate": 1.4706037763424904e-05, + "loss": 0.1786, + "step": 628100 + }, + { + "epoch": 3.53, + "learning_rate": 1.4700416533162447e-05, + "loss": 0.1848, + "step": 628200 + }, + { + "epoch": 3.53, + "learning_rate": 1.4694795302899993e-05, + "loss": 0.1799, + "step": 628300 + }, + { + "epoch": 3.53, + "learning_rate": 1.4689174072637538e-05, + "loss": 0.1882, + "step": 628400 + }, + { + "epoch": 3.53, + "learning_rate": 1.4683609054677707e-05, + "loss": 0.1858, + "step": 628500 + }, + { + "epoch": 3.53, + "learning_rate": 1.4677987824415252e-05, + "loss": 0.1819, + "step": 628600 + }, + { + "epoch": 3.53, + "learning_rate": 1.4672366594152798e-05, + "loss": 0.1865, + "step": 628700 + }, + { + "epoch": 3.53, + "learning_rate": 1.4666745363890341e-05, + "loss": 0.1832, + "step": 628800 + }, + { + "epoch": 3.54, + "learning_rate": 1.4661124133627887e-05, + "loss": 0.1851, + "step": 628900 + }, + { + "epoch": 3.54, + "learning_rate": 1.4655502903365432e-05, + "loss": 0.1833, + "step": 629000 + }, + { + "epoch": 3.54, + "learning_rate": 1.4649881673102975e-05, + "loss": 0.1786, + "step": 629100 + }, + { + "epoch": 3.54, + "learning_rate": 1.4644260442840521e-05, + "loss": 0.1814, + "step": 629200 + }, + { + "epoch": 3.54, + "learning_rate": 1.4638639212578066e-05, + "loss": 0.1841, + "step": 629300 + }, + { + "epoch": 3.54, + "learning_rate": 1.4633017982315612e-05, + "loss": 0.1809, + "step": 629400 + }, + { + "epoch": 3.54, + "learning_rate": 1.4627396752053155e-05, + "loss": 0.1843, + "step": 629500 + }, + { + "epoch": 3.54, + "learning_rate": 1.4621775521790701e-05, + "loss": 0.1857, + "step": 629600 + }, + { + "epoch": 3.54, + "learning_rate": 1.4616154291528244e-05, + "loss": 0.1815, + "step": 629700 + }, + { + "epoch": 3.54, + "learning_rate": 1.4610533061265788e-05, + "loss": 0.1833, + "step": 629800 + }, + { + "epoch": 3.54, + "learning_rate": 1.4604911831003335e-05, + "loss": 0.1837, + "step": 629900 + }, + { + "epoch": 3.54, + "learning_rate": 1.4599290600740878e-05, + "loss": 0.1823, + "step": 630000 + }, + { + "epoch": 3.54, + "learning_rate": 1.4593669370478424e-05, + "loss": 0.1851, + "step": 630100 + }, + { + "epoch": 3.54, + "learning_rate": 1.4588048140215968e-05, + "loss": 0.1809, + "step": 630200 + }, + { + "epoch": 3.54, + "learning_rate": 1.4582426909953515e-05, + "loss": 0.18, + "step": 630300 + }, + { + "epoch": 3.54, + "learning_rate": 1.4576805679691058e-05, + "loss": 0.1843, + "step": 630400 + }, + { + "epoch": 3.54, + "learning_rate": 1.4571184449428602e-05, + "loss": 0.184, + "step": 630500 + }, + { + "epoch": 3.54, + "learning_rate": 1.4565563219166148e-05, + "loss": 0.1907, + "step": 630600 + }, + { + "epoch": 3.55, + "learning_rate": 1.4559941988903691e-05, + "loss": 0.1882, + "step": 630700 + }, + { + "epoch": 3.55, + "learning_rate": 1.4554320758641238e-05, + "loss": 0.1868, + "step": 630800 + }, + { + "epoch": 3.55, + "learning_rate": 1.454869952837878e-05, + "loss": 0.1809, + "step": 630900 + }, + { + "epoch": 3.55, + "learning_rate": 1.4543078298116329e-05, + "loss": 0.183, + "step": 631000 + }, + { + "epoch": 3.55, + "learning_rate": 1.4537457067853871e-05, + "loss": 0.1801, + "step": 631100 + }, + { + "epoch": 3.55, + "learning_rate": 1.4531835837591414e-05, + "loss": 0.1832, + "step": 631200 + }, + { + "epoch": 3.55, + "learning_rate": 1.452621460732896e-05, + "loss": 0.1779, + "step": 631300 + }, + { + "epoch": 3.55, + "learning_rate": 1.4520593377066505e-05, + "loss": 0.1837, + "step": 631400 + }, + { + "epoch": 3.55, + "learning_rate": 1.4514972146804051e-05, + "loss": 0.1826, + "step": 631500 + }, + { + "epoch": 3.55, + "learning_rate": 1.4509350916541594e-05, + "loss": 0.1872, + "step": 631600 + }, + { + "epoch": 3.55, + "learning_rate": 1.450372968627914e-05, + "loss": 0.1816, + "step": 631700 + }, + { + "epoch": 3.55, + "learning_rate": 1.4498108456016685e-05, + "loss": 0.1831, + "step": 631800 + }, + { + "epoch": 3.55, + "learning_rate": 1.4492487225754228e-05, + "loss": 0.1833, + "step": 631900 + }, + { + "epoch": 3.55, + "learning_rate": 1.4486865995491774e-05, + "loss": 0.1891, + "step": 632000 + }, + { + "epoch": 3.55, + "learning_rate": 1.4481244765229319e-05, + "loss": 0.1818, + "step": 632100 + }, + { + "epoch": 3.55, + "learning_rate": 1.4475623534966865e-05, + "loss": 0.1818, + "step": 632200 + }, + { + "epoch": 3.55, + "learning_rate": 1.4470002304704408e-05, + "loss": 0.1859, + "step": 632300 + }, + { + "epoch": 3.55, + "learning_rate": 1.4464381074441951e-05, + "loss": 0.179, + "step": 632400 + }, + { + "epoch": 3.56, + "learning_rate": 1.4458816056482122e-05, + "loss": 0.1887, + "step": 632500 + }, + { + "epoch": 3.56, + "learning_rate": 1.4453194826219668e-05, + "loss": 0.1822, + "step": 632600 + }, + { + "epoch": 3.56, + "learning_rate": 1.4447573595957211e-05, + "loss": 0.1795, + "step": 632700 + }, + { + "epoch": 3.56, + "learning_rate": 1.4441952365694756e-05, + "loss": 0.1843, + "step": 632800 + }, + { + "epoch": 3.56, + "learning_rate": 1.4436331135432302e-05, + "loss": 0.1818, + "step": 632900 + }, + { + "epoch": 3.56, + "learning_rate": 1.4430709905169845e-05, + "loss": 0.1832, + "step": 633000 + }, + { + "epoch": 3.56, + "learning_rate": 1.4425088674907391e-05, + "loss": 0.1816, + "step": 633100 + }, + { + "epoch": 3.56, + "learning_rate": 1.4419467444644936e-05, + "loss": 0.1773, + "step": 633200 + }, + { + "epoch": 3.56, + "learning_rate": 1.4413846214382482e-05, + "loss": 0.1893, + "step": 633300 + }, + { + "epoch": 3.56, + "learning_rate": 1.4408224984120025e-05, + "loss": 0.1796, + "step": 633400 + }, + { + "epoch": 3.56, + "learning_rate": 1.4402603753857568e-05, + "loss": 0.1842, + "step": 633500 + }, + { + "epoch": 3.56, + "learning_rate": 1.4396982523595116e-05, + "loss": 0.1865, + "step": 633600 + }, + { + "epoch": 3.56, + "learning_rate": 1.4391361293332659e-05, + "loss": 0.1796, + "step": 633700 + }, + { + "epoch": 3.56, + "learning_rate": 1.4385740063070205e-05, + "loss": 0.1811, + "step": 633800 + }, + { + "epoch": 3.56, + "learning_rate": 1.4380118832807748e-05, + "loss": 0.1796, + "step": 633900 + }, + { + "epoch": 3.56, + "learning_rate": 1.4374497602545294e-05, + "loss": 0.1801, + "step": 634000 + }, + { + "epoch": 3.56, + "learning_rate": 1.4368876372282839e-05, + "loss": 0.1806, + "step": 634100 + }, + { + "epoch": 3.56, + "learning_rate": 1.4363255142020382e-05, + "loss": 0.1834, + "step": 634200 + }, + { + "epoch": 3.57, + "learning_rate": 1.4357633911757928e-05, + "loss": 0.1784, + "step": 634300 + }, + { + "epoch": 3.57, + "learning_rate": 1.4352012681495472e-05, + "loss": 0.181, + "step": 634400 + }, + { + "epoch": 3.57, + "learning_rate": 1.4346391451233019e-05, + "loss": 0.1813, + "step": 634500 + }, + { + "epoch": 3.57, + "learning_rate": 1.4340826433273186e-05, + "loss": 0.1865, + "step": 634600 + }, + { + "epoch": 3.57, + "learning_rate": 1.4335205203010733e-05, + "loss": 0.1901, + "step": 634700 + }, + { + "epoch": 3.57, + "learning_rate": 1.4329583972748276e-05, + "loss": 0.1864, + "step": 634800 + }, + { + "epoch": 3.57, + "learning_rate": 1.4323962742485822e-05, + "loss": 0.1791, + "step": 634900 + }, + { + "epoch": 3.57, + "learning_rate": 1.4318341512223365e-05, + "loss": 0.1893, + "step": 635000 + }, + { + "epoch": 3.57, + "learning_rate": 1.4312720281960913e-05, + "loss": 0.1804, + "step": 635100 + }, + { + "epoch": 3.57, + "learning_rate": 1.4307099051698456e-05, + "loss": 0.1789, + "step": 635200 + }, + { + "epoch": 3.57, + "learning_rate": 1.4301477821435998e-05, + "loss": 0.185, + "step": 635300 + }, + { + "epoch": 3.57, + "learning_rate": 1.4295856591173545e-05, + "loss": 0.1842, + "step": 635400 + }, + { + "epoch": 3.57, + "learning_rate": 1.429023536091109e-05, + "loss": 0.1873, + "step": 635500 + }, + { + "epoch": 3.57, + "learning_rate": 1.4284614130648636e-05, + "loss": 0.1915, + "step": 635600 + }, + { + "epoch": 3.57, + "learning_rate": 1.4278992900386178e-05, + "loss": 0.1824, + "step": 635700 + }, + { + "epoch": 3.57, + "learning_rate": 1.4273371670123725e-05, + "loss": 0.184, + "step": 635800 + }, + { + "epoch": 3.57, + "learning_rate": 1.426775043986127e-05, + "loss": 0.1823, + "step": 635900 + }, + { + "epoch": 3.58, + "learning_rate": 1.4262129209598812e-05, + "loss": 0.174, + "step": 636000 + }, + { + "epoch": 3.58, + "learning_rate": 1.4256507979336359e-05, + "loss": 0.1851, + "step": 636100 + }, + { + "epoch": 3.58, + "learning_rate": 1.4250886749073903e-05, + "loss": 0.1818, + "step": 636200 + }, + { + "epoch": 3.58, + "learning_rate": 1.4245321731114072e-05, + "loss": 0.1854, + "step": 636300 + }, + { + "epoch": 3.58, + "learning_rate": 1.4239700500851615e-05, + "loss": 0.1814, + "step": 636400 + }, + { + "epoch": 3.58, + "learning_rate": 1.4234079270589162e-05, + "loss": 0.1854, + "step": 636500 + }, + { + "epoch": 3.58, + "learning_rate": 1.4228458040326706e-05, + "loss": 0.1845, + "step": 636600 + }, + { + "epoch": 3.58, + "learning_rate": 1.4222836810064252e-05, + "loss": 0.187, + "step": 636700 + }, + { + "epoch": 3.58, + "learning_rate": 1.4217215579801795e-05, + "loss": 0.1769, + "step": 636800 + }, + { + "epoch": 3.58, + "learning_rate": 1.4211594349539342e-05, + "loss": 0.1816, + "step": 636900 + }, + { + "epoch": 3.58, + "learning_rate": 1.4205973119276886e-05, + "loss": 0.174, + "step": 637000 + }, + { + "epoch": 3.58, + "learning_rate": 1.4200351889014429e-05, + "loss": 0.1819, + "step": 637100 + }, + { + "epoch": 3.58, + "learning_rate": 1.4194730658751975e-05, + "loss": 0.1887, + "step": 637200 + }, + { + "epoch": 3.58, + "learning_rate": 1.418910942848952e-05, + "loss": 0.1796, + "step": 637300 + }, + { + "epoch": 3.58, + "learning_rate": 1.4183488198227066e-05, + "loss": 0.1794, + "step": 637400 + }, + { + "epoch": 3.58, + "learning_rate": 1.4177866967964609e-05, + "loss": 0.1829, + "step": 637500 + }, + { + "epoch": 3.58, + "learning_rate": 1.4172245737702152e-05, + "loss": 0.1844, + "step": 637600 + }, + { + "epoch": 3.58, + "learning_rate": 1.41666245074397e-05, + "loss": 0.182, + "step": 637700 + }, + { + "epoch": 3.59, + "learning_rate": 1.4161003277177243e-05, + "loss": 0.1788, + "step": 637800 + }, + { + "epoch": 3.59, + "learning_rate": 1.4155382046914789e-05, + "loss": 0.1873, + "step": 637900 + }, + { + "epoch": 3.59, + "learning_rate": 1.4149760816652332e-05, + "loss": 0.1783, + "step": 638000 + }, + { + "epoch": 3.59, + "learning_rate": 1.4144139586389878e-05, + "loss": 0.1785, + "step": 638100 + }, + { + "epoch": 3.59, + "learning_rate": 1.4138518356127423e-05, + "loss": 0.1875, + "step": 638200 + }, + { + "epoch": 3.59, + "learning_rate": 1.4132897125864966e-05, + "loss": 0.1868, + "step": 638300 + }, + { + "epoch": 3.59, + "learning_rate": 1.4127275895602512e-05, + "loss": 0.182, + "step": 638400 + }, + { + "epoch": 3.59, + "learning_rate": 1.4121654665340057e-05, + "loss": 0.1842, + "step": 638500 + }, + { + "epoch": 3.59, + "learning_rate": 1.4116033435077603e-05, + "loss": 0.1816, + "step": 638600 + }, + { + "epoch": 3.59, + "learning_rate": 1.4110412204815146e-05, + "loss": 0.1828, + "step": 638700 + }, + { + "epoch": 3.59, + "learning_rate": 1.4104790974552692e-05, + "loss": 0.1811, + "step": 638800 + }, + { + "epoch": 3.59, + "learning_rate": 1.4099169744290237e-05, + "loss": 0.1847, + "step": 638900 + }, + { + "epoch": 3.59, + "learning_rate": 1.409354851402778e-05, + "loss": 0.1796, + "step": 639000 + }, + { + "epoch": 3.59, + "learning_rate": 1.4087927283765326e-05, + "loss": 0.1804, + "step": 639100 + }, + { + "epoch": 3.59, + "learning_rate": 1.4082306053502869e-05, + "loss": 0.1814, + "step": 639200 + }, + { + "epoch": 3.59, + "learning_rate": 1.4076684823240415e-05, + "loss": 0.1807, + "step": 639300 + }, + { + "epoch": 3.59, + "learning_rate": 1.407106359297796e-05, + "loss": 0.1842, + "step": 639400 + }, + { + "epoch": 3.59, + "learning_rate": 1.4065442362715506e-05, + "loss": 0.1859, + "step": 639500 + }, + { + "epoch": 3.6, + "learning_rate": 1.4059821132453049e-05, + "loss": 0.1844, + "step": 639600 + }, + { + "epoch": 3.6, + "learning_rate": 1.4054199902190593e-05, + "loss": 0.1789, + "step": 639700 + }, + { + "epoch": 3.6, + "learning_rate": 1.404857867192814e-05, + "loss": 0.1839, + "step": 639800 + }, + { + "epoch": 3.6, + "learning_rate": 1.4042957441665682e-05, + "loss": 0.1868, + "step": 639900 + }, + { + "epoch": 3.6, + "learning_rate": 1.4037336211403229e-05, + "loss": 0.184, + "step": 640000 + }, + { + "epoch": 3.6, + "learning_rate": 1.4031714981140773e-05, + "loss": 0.1771, + "step": 640100 + }, + { + "epoch": 3.6, + "learning_rate": 1.402609375087832e-05, + "loss": 0.1815, + "step": 640200 + }, + { + "epoch": 3.6, + "learning_rate": 1.4020472520615863e-05, + "loss": 0.1888, + "step": 640300 + }, + { + "epoch": 3.6, + "learning_rate": 1.4014851290353407e-05, + "loss": 0.1734, + "step": 640400 + }, + { + "epoch": 3.6, + "learning_rate": 1.4009286272393576e-05, + "loss": 0.1838, + "step": 640500 + }, + { + "epoch": 3.6, + "learning_rate": 1.4003665042131123e-05, + "loss": 0.1869, + "step": 640600 + }, + { + "epoch": 3.6, + "learning_rate": 1.3998043811868666e-05, + "loss": 0.1755, + "step": 640700 + }, + { + "epoch": 3.6, + "learning_rate": 1.399242258160621e-05, + "loss": 0.189, + "step": 640800 + }, + { + "epoch": 3.6, + "learning_rate": 1.3986801351343756e-05, + "loss": 0.1827, + "step": 640900 + }, + { + "epoch": 3.6, + "learning_rate": 1.39811801210813e-05, + "loss": 0.184, + "step": 641000 + }, + { + "epoch": 3.6, + "learning_rate": 1.3975558890818846e-05, + "loss": 0.1845, + "step": 641100 + }, + { + "epoch": 3.6, + "learning_rate": 1.396993766055639e-05, + "loss": 0.1828, + "step": 641200 + }, + { + "epoch": 3.6, + "learning_rate": 1.3964316430293936e-05, + "loss": 0.1801, + "step": 641300 + }, + { + "epoch": 3.61, + "learning_rate": 1.395869520003148e-05, + "loss": 0.1726, + "step": 641400 + }, + { + "epoch": 3.61, + "learning_rate": 1.3953073969769024e-05, + "loss": 0.1811, + "step": 641500 + }, + { + "epoch": 3.61, + "learning_rate": 1.394745273950657e-05, + "loss": 0.1779, + "step": 641600 + }, + { + "epoch": 3.61, + "learning_rate": 1.3941831509244113e-05, + "loss": 0.1857, + "step": 641700 + }, + { + "epoch": 3.61, + "learning_rate": 1.393621027898166e-05, + "loss": 0.1831, + "step": 641800 + }, + { + "epoch": 3.61, + "learning_rate": 1.3930589048719202e-05, + "loss": 0.1807, + "step": 641900 + }, + { + "epoch": 3.61, + "learning_rate": 1.392496781845675e-05, + "loss": 0.1844, + "step": 642000 + }, + { + "epoch": 3.61, + "learning_rate": 1.3919346588194293e-05, + "loss": 0.1832, + "step": 642100 + }, + { + "epoch": 3.61, + "learning_rate": 1.3913725357931836e-05, + "loss": 0.1825, + "step": 642200 + }, + { + "epoch": 3.61, + "learning_rate": 1.3908104127669382e-05, + "loss": 0.1842, + "step": 642300 + }, + { + "epoch": 3.61, + "learning_rate": 1.3902482897406927e-05, + "loss": 0.1793, + "step": 642400 + }, + { + "epoch": 3.61, + "learning_rate": 1.3896861667144473e-05, + "loss": 0.1827, + "step": 642500 + }, + { + "epoch": 3.61, + "learning_rate": 1.3891240436882016e-05, + "loss": 0.1788, + "step": 642600 + }, + { + "epoch": 3.61, + "learning_rate": 1.388561920661956e-05, + "loss": 0.1744, + "step": 642700 + }, + { + "epoch": 3.61, + "learning_rate": 1.3879997976357107e-05, + "loss": 0.1812, + "step": 642800 + }, + { + "epoch": 3.61, + "learning_rate": 1.3874432958397276e-05, + "loss": 0.181, + "step": 642900 + }, + { + "epoch": 3.61, + "learning_rate": 1.3868867940437444e-05, + "loss": 0.1823, + "step": 643000 + }, + { + "epoch": 3.62, + "learning_rate": 1.386324671017499e-05, + "loss": 0.1743, + "step": 643100 + }, + { + "epoch": 3.62, + "learning_rate": 1.3857625479912533e-05, + "loss": 0.183, + "step": 643200 + }, + { + "epoch": 3.62, + "learning_rate": 1.385200424965008e-05, + "loss": 0.1856, + "step": 643300 + }, + { + "epoch": 3.62, + "learning_rate": 1.3846383019387624e-05, + "loss": 0.1801, + "step": 643400 + }, + { + "epoch": 3.62, + "learning_rate": 1.3840761789125167e-05, + "loss": 0.1866, + "step": 643500 + }, + { + "epoch": 3.62, + "learning_rate": 1.3835140558862713e-05, + "loss": 0.1832, + "step": 643600 + }, + { + "epoch": 3.62, + "learning_rate": 1.3829519328600258e-05, + "loss": 0.1781, + "step": 643700 + }, + { + "epoch": 3.62, + "learning_rate": 1.3823898098337804e-05, + "loss": 0.1848, + "step": 643800 + }, + { + "epoch": 3.62, + "learning_rate": 1.3818276868075347e-05, + "loss": 0.1805, + "step": 643900 + }, + { + "epoch": 3.62, + "learning_rate": 1.3812655637812893e-05, + "loss": 0.1765, + "step": 644000 + }, + { + "epoch": 3.62, + "learning_rate": 1.3807034407550438e-05, + "loss": 0.1802, + "step": 644100 + }, + { + "epoch": 3.62, + "learning_rate": 1.380141317728798e-05, + "loss": 0.1778, + "step": 644200 + }, + { + "epoch": 3.62, + "learning_rate": 1.3795791947025527e-05, + "loss": 0.1827, + "step": 644300 + }, + { + "epoch": 3.62, + "learning_rate": 1.3790170716763071e-05, + "loss": 0.1833, + "step": 644400 + }, + { + "epoch": 3.62, + "learning_rate": 1.3784549486500618e-05, + "loss": 0.1824, + "step": 644500 + }, + { + "epoch": 3.62, + "learning_rate": 1.377892825623816e-05, + "loss": 0.1804, + "step": 644600 + }, + { + "epoch": 3.62, + "learning_rate": 1.3773307025975707e-05, + "loss": 0.1812, + "step": 644700 + }, + { + "epoch": 3.62, + "learning_rate": 1.376768579571325e-05, + "loss": 0.1778, + "step": 644800 + }, + { + "epoch": 3.63, + "learning_rate": 1.3762064565450794e-05, + "loss": 0.1801, + "step": 644900 + }, + { + "epoch": 3.63, + "learning_rate": 1.375644333518834e-05, + "loss": 0.1885, + "step": 645000 + }, + { + "epoch": 3.63, + "learning_rate": 1.3750822104925884e-05, + "loss": 0.1806, + "step": 645100 + }, + { + "epoch": 3.63, + "learning_rate": 1.374520087466343e-05, + "loss": 0.1862, + "step": 645200 + }, + { + "epoch": 3.63, + "learning_rate": 1.3739579644400974e-05, + "loss": 0.1781, + "step": 645300 + }, + { + "epoch": 3.63, + "learning_rate": 1.373395841413852e-05, + "loss": 0.1807, + "step": 645400 + }, + { + "epoch": 3.63, + "learning_rate": 1.3728393396178688e-05, + "loss": 0.1782, + "step": 645500 + }, + { + "epoch": 3.63, + "learning_rate": 1.3722772165916235e-05, + "loss": 0.1827, + "step": 645600 + }, + { + "epoch": 3.63, + "learning_rate": 1.3717150935653777e-05, + "loss": 0.1846, + "step": 645700 + }, + { + "epoch": 3.63, + "learning_rate": 1.3711529705391324e-05, + "loss": 0.1819, + "step": 645800 + }, + { + "epoch": 3.63, + "learning_rate": 1.3705908475128867e-05, + "loss": 0.1793, + "step": 645900 + }, + { + "epoch": 3.63, + "learning_rate": 1.3700287244866411e-05, + "loss": 0.1792, + "step": 646000 + }, + { + "epoch": 3.63, + "learning_rate": 1.3694666014603957e-05, + "loss": 0.1888, + "step": 646100 + }, + { + "epoch": 3.63, + "learning_rate": 1.36890447843415e-05, + "loss": 0.1853, + "step": 646200 + }, + { + "epoch": 3.63, + "learning_rate": 1.3683423554079047e-05, + "loss": 0.1881, + "step": 646300 + }, + { + "epoch": 3.63, + "learning_rate": 1.3677802323816591e-05, + "loss": 0.1824, + "step": 646400 + }, + { + "epoch": 3.63, + "learning_rate": 1.3672181093554137e-05, + "loss": 0.1827, + "step": 646500 + }, + { + "epoch": 3.63, + "learning_rate": 1.366655986329168e-05, + "loss": 0.182, + "step": 646600 + }, + { + "epoch": 3.64, + "learning_rate": 1.3660938633029225e-05, + "loss": 0.1817, + "step": 646700 + }, + { + "epoch": 3.64, + "learning_rate": 1.3655317402766771e-05, + "loss": 0.1798, + "step": 646800 + }, + { + "epoch": 3.64, + "learning_rate": 1.3649696172504314e-05, + "loss": 0.178, + "step": 646900 + }, + { + "epoch": 3.64, + "learning_rate": 1.364407494224186e-05, + "loss": 0.1817, + "step": 647000 + }, + { + "epoch": 3.64, + "learning_rate": 1.3638453711979405e-05, + "loss": 0.1806, + "step": 647100 + }, + { + "epoch": 3.64, + "learning_rate": 1.3632832481716951e-05, + "loss": 0.1814, + "step": 647200 + }, + { + "epoch": 3.64, + "learning_rate": 1.3627267463757117e-05, + "loss": 0.185, + "step": 647300 + }, + { + "epoch": 3.64, + "learning_rate": 1.3621646233494663e-05, + "loss": 0.185, + "step": 647400 + }, + { + "epoch": 3.64, + "learning_rate": 1.3616025003232208e-05, + "loss": 0.1815, + "step": 647500 + }, + { + "epoch": 3.64, + "learning_rate": 1.3610403772969751e-05, + "loss": 0.1828, + "step": 647600 + }, + { + "epoch": 3.64, + "learning_rate": 1.3604782542707297e-05, + "loss": 0.1812, + "step": 647700 + }, + { + "epoch": 3.64, + "learning_rate": 1.3599161312444842e-05, + "loss": 0.1822, + "step": 647800 + }, + { + "epoch": 3.64, + "learning_rate": 1.3593540082182388e-05, + "loss": 0.1786, + "step": 647900 + }, + { + "epoch": 3.64, + "learning_rate": 1.3587918851919931e-05, + "loss": 0.1822, + "step": 648000 + }, + { + "epoch": 3.64, + "learning_rate": 1.3582297621657477e-05, + "loss": 0.1823, + "step": 648100 + }, + { + "epoch": 3.64, + "learning_rate": 1.3576676391395022e-05, + "loss": 0.1769, + "step": 648200 + }, + { + "epoch": 3.64, + "learning_rate": 1.3571055161132565e-05, + "loss": 0.1783, + "step": 648300 + }, + { + "epoch": 3.64, + "learning_rate": 1.3565433930870111e-05, + "loss": 0.1875, + "step": 648400 + }, + { + "epoch": 3.65, + "learning_rate": 1.3559812700607654e-05, + "loss": 0.1806, + "step": 648500 + }, + { + "epoch": 3.65, + "learning_rate": 1.3554191470345202e-05, + "loss": 0.1815, + "step": 648600 + }, + { + "epoch": 3.65, + "learning_rate": 1.3548570240082745e-05, + "loss": 0.1836, + "step": 648700 + }, + { + "epoch": 3.65, + "learning_rate": 1.3542949009820291e-05, + "loss": 0.1765, + "step": 648800 + }, + { + "epoch": 3.65, + "learning_rate": 1.3537327779557834e-05, + "loss": 0.1966, + "step": 648900 + }, + { + "epoch": 3.65, + "learning_rate": 1.3531706549295379e-05, + "loss": 0.1764, + "step": 649000 + }, + { + "epoch": 3.65, + "learning_rate": 1.3526085319032925e-05, + "loss": 0.183, + "step": 649100 + }, + { + "epoch": 3.65, + "learning_rate": 1.3520464088770468e-05, + "loss": 0.1787, + "step": 649200 + }, + { + "epoch": 3.65, + "learning_rate": 1.3514842858508014e-05, + "loss": 0.1818, + "step": 649300 + }, + { + "epoch": 3.65, + "learning_rate": 1.3509221628245559e-05, + "loss": 0.1825, + "step": 649400 + }, + { + "epoch": 3.65, + "learning_rate": 1.3503600397983105e-05, + "loss": 0.1792, + "step": 649500 + }, + { + "epoch": 3.65, + "learning_rate": 1.3497979167720648e-05, + "loss": 0.1825, + "step": 649600 + }, + { + "epoch": 3.65, + "learning_rate": 1.3492357937458192e-05, + "loss": 0.1838, + "step": 649700 + }, + { + "epoch": 3.65, + "learning_rate": 1.3486736707195739e-05, + "loss": 0.1802, + "step": 649800 + }, + { + "epoch": 3.65, + "learning_rate": 1.3481115476933281e-05, + "loss": 0.1829, + "step": 649900 + }, + { + "epoch": 3.65, + "learning_rate": 1.3475494246670828e-05, + "loss": 0.1749, + "step": 650000 + }, + { + "epoch": 3.65, + "learning_rate": 1.346987301640837e-05, + "loss": 0.1827, + "step": 650100 + }, + { + "epoch": 3.65, + "learning_rate": 1.3464251786145919e-05, + "loss": 0.1838, + "step": 650200 + }, + { + "epoch": 3.66, + "learning_rate": 1.3458630555883461e-05, + "loss": 0.1783, + "step": 650300 + }, + { + "epoch": 3.66, + "learning_rate": 1.3453009325621004e-05, + "loss": 0.1718, + "step": 650400 + }, + { + "epoch": 3.66, + "learning_rate": 1.344738809535855e-05, + "loss": 0.1862, + "step": 650500 + }, + { + "epoch": 3.66, + "learning_rate": 1.3441766865096095e-05, + "loss": 0.1847, + "step": 650600 + }, + { + "epoch": 3.66, + "learning_rate": 1.3436145634833641e-05, + "loss": 0.1805, + "step": 650700 + }, + { + "epoch": 3.66, + "learning_rate": 1.3430524404571184e-05, + "loss": 0.185, + "step": 650800 + }, + { + "epoch": 3.66, + "learning_rate": 1.342490317430873e-05, + "loss": 0.1802, + "step": 650900 + }, + { + "epoch": 3.66, + "learning_rate": 1.3419281944046275e-05, + "loss": 0.1823, + "step": 651000 + }, + { + "epoch": 3.66, + "learning_rate": 1.3413660713783818e-05, + "loss": 0.179, + "step": 651100 + }, + { + "epoch": 3.66, + "learning_rate": 1.3408039483521364e-05, + "loss": 0.1828, + "step": 651200 + }, + { + "epoch": 3.66, + "learning_rate": 1.3402418253258909e-05, + "loss": 0.189, + "step": 651300 + }, + { + "epoch": 3.66, + "learning_rate": 1.3396797022996455e-05, + "loss": 0.1819, + "step": 651400 + }, + { + "epoch": 3.66, + "learning_rate": 1.3391175792733998e-05, + "loss": 0.1832, + "step": 651500 + }, + { + "epoch": 3.66, + "learning_rate": 1.3385554562471544e-05, + "loss": 0.1776, + "step": 651600 + }, + { + "epoch": 3.66, + "learning_rate": 1.3379933332209087e-05, + "loss": 0.1767, + "step": 651700 + }, + { + "epoch": 3.66, + "learning_rate": 1.3374312101946632e-05, + "loss": 0.1847, + "step": 651800 + }, + { + "epoch": 3.66, + "learning_rate": 1.3368690871684178e-05, + "loss": 0.1815, + "step": 651900 + }, + { + "epoch": 3.67, + "learning_rate": 1.3363069641421721e-05, + "loss": 0.1792, + "step": 652000 + }, + { + "epoch": 3.67, + "learning_rate": 1.3357448411159267e-05, + "loss": 0.1793, + "step": 652100 + }, + { + "epoch": 3.67, + "learning_rate": 1.3351827180896812e-05, + "loss": 0.1835, + "step": 652200 + }, + { + "epoch": 3.67, + "learning_rate": 1.3346205950634358e-05, + "loss": 0.1867, + "step": 652300 + }, + { + "epoch": 3.67, + "learning_rate": 1.3340640932674526e-05, + "loss": 0.1828, + "step": 652400 + }, + { + "epoch": 3.67, + "learning_rate": 1.3335019702412072e-05, + "loss": 0.1874, + "step": 652500 + }, + { + "epoch": 3.67, + "learning_rate": 1.3329398472149615e-05, + "loss": 0.1849, + "step": 652600 + }, + { + "epoch": 3.67, + "learning_rate": 1.3323777241887158e-05, + "loss": 0.1816, + "step": 652700 + }, + { + "epoch": 3.67, + "learning_rate": 1.3318156011624706e-05, + "loss": 0.1826, + "step": 652800 + }, + { + "epoch": 3.67, + "learning_rate": 1.3312534781362249e-05, + "loss": 0.1783, + "step": 652900 + }, + { + "epoch": 3.67, + "learning_rate": 1.3306913551099795e-05, + "loss": 0.1829, + "step": 653000 + }, + { + "epoch": 3.67, + "learning_rate": 1.3301292320837338e-05, + "loss": 0.1773, + "step": 653100 + }, + { + "epoch": 3.67, + "learning_rate": 1.3295671090574884e-05, + "loss": 0.1845, + "step": 653200 + }, + { + "epoch": 3.67, + "learning_rate": 1.3290049860312429e-05, + "loss": 0.1844, + "step": 653300 + }, + { + "epoch": 3.67, + "learning_rate": 1.3284428630049972e-05, + "loss": 0.1818, + "step": 653400 + }, + { + "epoch": 3.67, + "learning_rate": 1.3278807399787518e-05, + "loss": 0.1784, + "step": 653500 + }, + { + "epoch": 3.67, + "learning_rate": 1.3273186169525063e-05, + "loss": 0.1807, + "step": 653600 + }, + { + "epoch": 3.67, + "learning_rate": 1.3267564939262609e-05, + "loss": 0.1807, + "step": 653700 + }, + { + "epoch": 3.68, + "learning_rate": 1.3261943709000152e-05, + "loss": 0.1744, + "step": 653800 + }, + { + "epoch": 3.68, + "learning_rate": 1.3256322478737698e-05, + "loss": 0.1802, + "step": 653900 + }, + { + "epoch": 3.68, + "learning_rate": 1.3250701248475243e-05, + "loss": 0.1808, + "step": 654000 + }, + { + "epoch": 3.68, + "learning_rate": 1.3245080018212785e-05, + "loss": 0.1775, + "step": 654100 + }, + { + "epoch": 3.68, + "learning_rate": 1.3239458787950332e-05, + "loss": 0.1834, + "step": 654200 + }, + { + "epoch": 3.68, + "learning_rate": 1.3233837557687875e-05, + "loss": 0.1755, + "step": 654300 + }, + { + "epoch": 3.68, + "learning_rate": 1.3228216327425421e-05, + "loss": 0.1806, + "step": 654400 + }, + { + "epoch": 3.68, + "learning_rate": 1.3222595097162965e-05, + "loss": 0.1723, + "step": 654500 + }, + { + "epoch": 3.68, + "learning_rate": 1.3216973866900512e-05, + "loss": 0.179, + "step": 654600 + }, + { + "epoch": 3.68, + "learning_rate": 1.3211352636638055e-05, + "loss": 0.1738, + "step": 654700 + }, + { + "epoch": 3.68, + "learning_rate": 1.32057314063756e-05, + "loss": 0.1781, + "step": 654800 + }, + { + "epoch": 3.68, + "learning_rate": 1.3200110176113146e-05, + "loss": 0.1827, + "step": 654900 + }, + { + "epoch": 3.68, + "learning_rate": 1.3194488945850688e-05, + "loss": 0.1786, + "step": 655000 + }, + { + "epoch": 3.68, + "learning_rate": 1.3188867715588235e-05, + "loss": 0.182, + "step": 655100 + }, + { + "epoch": 3.68, + "learning_rate": 1.318324648532578e-05, + "loss": 0.1812, + "step": 655200 + }, + { + "epoch": 3.68, + "learning_rate": 1.3177625255063326e-05, + "loss": 0.1792, + "step": 655300 + }, + { + "epoch": 3.68, + "learning_rate": 1.3172004024800868e-05, + "loss": 0.1817, + "step": 655400 + }, + { + "epoch": 3.68, + "learning_rate": 1.3166382794538413e-05, + "loss": 0.1814, + "step": 655500 + }, + { + "epoch": 3.69, + "learning_rate": 1.316076156427596e-05, + "loss": 0.1789, + "step": 655600 + }, + { + "epoch": 3.69, + "learning_rate": 1.3155140334013502e-05, + "loss": 0.181, + "step": 655700 + }, + { + "epoch": 3.69, + "learning_rate": 1.3149519103751048e-05, + "loss": 0.1793, + "step": 655800 + }, + { + "epoch": 3.69, + "learning_rate": 1.3143897873488591e-05, + "loss": 0.1818, + "step": 655900 + }, + { + "epoch": 3.69, + "learning_rate": 1.3138276643226138e-05, + "loss": 0.1789, + "step": 656000 + }, + { + "epoch": 3.69, + "learning_rate": 1.3132711625266305e-05, + "loss": 0.186, + "step": 656100 + }, + { + "epoch": 3.69, + "learning_rate": 1.3127090395003852e-05, + "loss": 0.1833, + "step": 656200 + }, + { + "epoch": 3.69, + "learning_rate": 1.3121469164741396e-05, + "loss": 0.1791, + "step": 656300 + }, + { + "epoch": 3.69, + "learning_rate": 1.3115847934478942e-05, + "loss": 0.1849, + "step": 656400 + }, + { + "epoch": 3.69, + "learning_rate": 1.3110226704216485e-05, + "loss": 0.1815, + "step": 656500 + }, + { + "epoch": 3.69, + "learning_rate": 1.310460547395403e-05, + "loss": 0.1801, + "step": 656600 + }, + { + "epoch": 3.69, + "learning_rate": 1.3098984243691576e-05, + "loss": 0.1817, + "step": 656700 + }, + { + "epoch": 3.69, + "learning_rate": 1.3093363013429119e-05, + "loss": 0.1838, + "step": 656800 + }, + { + "epoch": 3.69, + "learning_rate": 1.308779799546929e-05, + "loss": 0.1772, + "step": 656900 + }, + { + "epoch": 3.69, + "learning_rate": 1.3082176765206833e-05, + "loss": 0.1744, + "step": 657000 + }, + { + "epoch": 3.69, + "learning_rate": 1.307655553494438e-05, + "loss": 0.1771, + "step": 657100 + }, + { + "epoch": 3.69, + "learning_rate": 1.3070934304681922e-05, + "loss": 0.1843, + "step": 657200 + }, + { + "epoch": 3.69, + "learning_rate": 1.3065313074419468e-05, + "loss": 0.183, + "step": 657300 + }, + { + "epoch": 3.7, + "learning_rate": 1.3059691844157013e-05, + "loss": 0.1894, + "step": 657400 + }, + { + "epoch": 3.7, + "learning_rate": 1.3054070613894556e-05, + "loss": 0.1774, + "step": 657500 + }, + { + "epoch": 3.7, + "learning_rate": 1.3048449383632102e-05, + "loss": 0.1835, + "step": 657600 + }, + { + "epoch": 3.7, + "learning_rate": 1.3042828153369647e-05, + "loss": 0.1789, + "step": 657700 + }, + { + "epoch": 3.7, + "learning_rate": 1.3037206923107193e-05, + "loss": 0.1796, + "step": 657800 + }, + { + "epoch": 3.7, + "learning_rate": 1.3031585692844736e-05, + "loss": 0.1784, + "step": 657900 + }, + { + "epoch": 3.7, + "learning_rate": 1.3025964462582282e-05, + "loss": 0.1818, + "step": 658000 + }, + { + "epoch": 3.7, + "learning_rate": 1.3020343232319827e-05, + "loss": 0.1785, + "step": 658100 + }, + { + "epoch": 3.7, + "learning_rate": 1.301472200205737e-05, + "loss": 0.1788, + "step": 658200 + }, + { + "epoch": 3.7, + "learning_rate": 1.3009100771794916e-05, + "loss": 0.1826, + "step": 658300 + }, + { + "epoch": 3.7, + "learning_rate": 1.3003479541532459e-05, + "loss": 0.1837, + "step": 658400 + }, + { + "epoch": 3.7, + "learning_rate": 1.2997858311270005e-05, + "loss": 0.1799, + "step": 658500 + }, + { + "epoch": 3.7, + "learning_rate": 1.299223708100755e-05, + "loss": 0.1859, + "step": 658600 + }, + { + "epoch": 3.7, + "learning_rate": 1.2986615850745096e-05, + "loss": 0.1763, + "step": 658700 + }, + { + "epoch": 3.7, + "learning_rate": 1.2980994620482639e-05, + "loss": 0.1781, + "step": 658800 + }, + { + "epoch": 3.7, + "learning_rate": 1.2975373390220183e-05, + "loss": 0.18, + "step": 658900 + }, + { + "epoch": 3.7, + "learning_rate": 1.296975215995773e-05, + "loss": 0.1836, + "step": 659000 + }, + { + "epoch": 3.7, + "learning_rate": 1.2964130929695273e-05, + "loss": 0.1853, + "step": 659100 + }, + { + "epoch": 3.71, + "learning_rate": 1.2958509699432819e-05, + "loss": 0.1809, + "step": 659200 + }, + { + "epoch": 3.71, + "learning_rate": 1.2952888469170363e-05, + "loss": 0.1798, + "step": 659300 + }, + { + "epoch": 3.71, + "learning_rate": 1.294726723890791e-05, + "loss": 0.1821, + "step": 659400 + }, + { + "epoch": 3.71, + "learning_rate": 1.2941646008645453e-05, + "loss": 0.1824, + "step": 659500 + }, + { + "epoch": 3.71, + "learning_rate": 1.2936024778382995e-05, + "loss": 0.1813, + "step": 659600 + }, + { + "epoch": 3.71, + "learning_rate": 1.2930403548120543e-05, + "loss": 0.1844, + "step": 659700 + }, + { + "epoch": 3.71, + "learning_rate": 1.2924782317858086e-05, + "loss": 0.1847, + "step": 659800 + }, + { + "epoch": 3.71, + "learning_rate": 1.2919161087595633e-05, + "loss": 0.176, + "step": 659900 + }, + { + "epoch": 3.71, + "learning_rate": 1.2913539857333175e-05, + "loss": 0.1914, + "step": 660000 + }, + { + "epoch": 3.71, + "learning_rate": 1.2907918627070722e-05, + "loss": 0.1845, + "step": 660100 + }, + { + "epoch": 3.71, + "learning_rate": 1.2902297396808266e-05, + "loss": 0.181, + "step": 660200 + }, + { + "epoch": 3.71, + "learning_rate": 1.289667616654581e-05, + "loss": 0.1708, + "step": 660300 + }, + { + "epoch": 3.71, + "learning_rate": 1.2891054936283356e-05, + "loss": 0.1815, + "step": 660400 + }, + { + "epoch": 3.71, + "learning_rate": 1.28854337060209e-05, + "loss": 0.1831, + "step": 660500 + }, + { + "epoch": 3.71, + "learning_rate": 1.287986868806107e-05, + "loss": 0.1838, + "step": 660600 + }, + { + "epoch": 3.71, + "learning_rate": 1.2874247457798614e-05, + "loss": 0.1806, + "step": 660700 + }, + { + "epoch": 3.71, + "learning_rate": 1.286862622753616e-05, + "loss": 0.1758, + "step": 660800 + }, + { + "epoch": 3.72, + "learning_rate": 1.2863004997273703e-05, + "loss": 0.1816, + "step": 660900 + }, + { + "epoch": 3.72, + "learning_rate": 1.285738376701125e-05, + "loss": 0.1893, + "step": 661000 + }, + { + "epoch": 3.72, + "learning_rate": 1.2851762536748792e-05, + "loss": 0.1777, + "step": 661100 + }, + { + "epoch": 3.72, + "learning_rate": 1.284614130648634e-05, + "loss": 0.1832, + "step": 661200 + }, + { + "epoch": 3.72, + "learning_rate": 1.2840520076223883e-05, + "loss": 0.1813, + "step": 661300 + }, + { + "epoch": 3.72, + "learning_rate": 1.2834898845961426e-05, + "loss": 0.1821, + "step": 661400 + }, + { + "epoch": 3.72, + "learning_rate": 1.2829277615698972e-05, + "loss": 0.1714, + "step": 661500 + }, + { + "epoch": 3.72, + "learning_rate": 1.2823656385436517e-05, + "loss": 0.1811, + "step": 661600 + }, + { + "epoch": 3.72, + "learning_rate": 1.2818035155174063e-05, + "loss": 0.1826, + "step": 661700 + }, + { + "epoch": 3.72, + "learning_rate": 1.2812413924911606e-05, + "loss": 0.1797, + "step": 661800 + }, + { + "epoch": 3.72, + "learning_rate": 1.2806792694649152e-05, + "loss": 0.1832, + "step": 661900 + }, + { + "epoch": 3.72, + "learning_rate": 1.2801171464386697e-05, + "loss": 0.1792, + "step": 662000 + }, + { + "epoch": 3.72, + "learning_rate": 1.279555023412424e-05, + "loss": 0.1794, + "step": 662100 + }, + { + "epoch": 3.72, + "learning_rate": 1.2789929003861786e-05, + "loss": 0.1778, + "step": 662200 + }, + { + "epoch": 3.72, + "learning_rate": 1.278430777359933e-05, + "loss": 0.18, + "step": 662300 + }, + { + "epoch": 3.72, + "learning_rate": 1.2778686543336877e-05, + "loss": 0.18, + "step": 662400 + }, + { + "epoch": 3.72, + "learning_rate": 1.277306531307442e-05, + "loss": 0.1842, + "step": 662500 + }, + { + "epoch": 3.72, + "learning_rate": 1.2767444082811963e-05, + "loss": 0.1818, + "step": 662600 + }, + { + "epoch": 3.73, + "learning_rate": 1.2761822852549509e-05, + "loss": 0.1801, + "step": 662700 + }, + { + "epoch": 3.73, + "learning_rate": 1.2756201622287054e-05, + "loss": 0.188, + "step": 662800 + }, + { + "epoch": 3.73, + "learning_rate": 1.27505803920246e-05, + "loss": 0.1815, + "step": 662900 + }, + { + "epoch": 3.73, + "learning_rate": 1.2744959161762143e-05, + "loss": 0.1838, + "step": 663000 + }, + { + "epoch": 3.73, + "learning_rate": 1.2739337931499689e-05, + "loss": 0.1837, + "step": 663100 + }, + { + "epoch": 3.73, + "learning_rate": 1.2733716701237234e-05, + "loss": 0.1743, + "step": 663200 + }, + { + "epoch": 3.73, + "learning_rate": 1.2728095470974777e-05, + "loss": 0.1785, + "step": 663300 + }, + { + "epoch": 3.73, + "learning_rate": 1.2722474240712323e-05, + "loss": 0.1806, + "step": 663400 + }, + { + "epoch": 3.73, + "learning_rate": 1.2716909222752494e-05, + "loss": 0.1773, + "step": 663500 + }, + { + "epoch": 3.73, + "learning_rate": 1.2711287992490037e-05, + "loss": 0.1846, + "step": 663600 + }, + { + "epoch": 3.73, + "learning_rate": 1.270566676222758e-05, + "loss": 0.1826, + "step": 663700 + }, + { + "epoch": 3.73, + "learning_rate": 1.2700045531965128e-05, + "loss": 0.1921, + "step": 663800 + }, + { + "epoch": 3.73, + "learning_rate": 1.269442430170267e-05, + "loss": 0.1849, + "step": 663900 + }, + { + "epoch": 3.73, + "learning_rate": 1.2688803071440217e-05, + "loss": 0.1826, + "step": 664000 + }, + { + "epoch": 3.73, + "learning_rate": 1.268318184117776e-05, + "loss": 0.1797, + "step": 664100 + }, + { + "epoch": 3.73, + "learning_rate": 1.2677560610915306e-05, + "loss": 0.1811, + "step": 664200 + }, + { + "epoch": 3.73, + "learning_rate": 1.267193938065285e-05, + "loss": 0.1759, + "step": 664300 + }, + { + "epoch": 3.73, + "learning_rate": 1.2666318150390393e-05, + "loss": 0.1867, + "step": 664400 + }, + { + "epoch": 3.74, + "learning_rate": 1.266069692012794e-05, + "loss": 0.1803, + "step": 664500 + }, + { + "epoch": 3.74, + "learning_rate": 1.2655075689865484e-05, + "loss": 0.1799, + "step": 664600 + }, + { + "epoch": 3.74, + "learning_rate": 1.264945445960303e-05, + "loss": 0.174, + "step": 664700 + }, + { + "epoch": 3.74, + "learning_rate": 1.2643833229340573e-05, + "loss": 0.1809, + "step": 664800 + }, + { + "epoch": 3.74, + "learning_rate": 1.263821199907812e-05, + "loss": 0.1752, + "step": 664900 + }, + { + "epoch": 3.74, + "learning_rate": 1.2632590768815664e-05, + "loss": 0.1841, + "step": 665000 + }, + { + "epoch": 3.74, + "learning_rate": 1.2626969538553207e-05, + "loss": 0.1766, + "step": 665100 + }, + { + "epoch": 3.74, + "learning_rate": 1.2621348308290753e-05, + "loss": 0.1844, + "step": 665200 + }, + { + "epoch": 3.74, + "learning_rate": 1.2615727078028296e-05, + "loss": 0.1847, + "step": 665300 + }, + { + "epoch": 3.74, + "learning_rate": 1.2610105847765843e-05, + "loss": 0.1816, + "step": 665400 + }, + { + "epoch": 3.74, + "learning_rate": 1.2604484617503387e-05, + "loss": 0.1794, + "step": 665500 + }, + { + "epoch": 3.74, + "learning_rate": 1.2598863387240933e-05, + "loss": 0.18, + "step": 665600 + }, + { + "epoch": 3.74, + "learning_rate": 1.2593242156978476e-05, + "loss": 0.1828, + "step": 665700 + }, + { + "epoch": 3.74, + "learning_rate": 1.2587620926716021e-05, + "loss": 0.1786, + "step": 665800 + }, + { + "epoch": 3.74, + "learning_rate": 1.2581999696453567e-05, + "loss": 0.1775, + "step": 665900 + }, + { + "epoch": 3.74, + "learning_rate": 1.257637846619111e-05, + "loss": 0.1829, + "step": 666000 + }, + { + "epoch": 3.74, + "learning_rate": 1.2570813448231281e-05, + "loss": 0.1769, + "step": 666100 + }, + { + "epoch": 3.74, + "learning_rate": 1.2565192217968824e-05, + "loss": 0.1783, + "step": 666200 + }, + { + "epoch": 3.75, + "learning_rate": 1.255957098770637e-05, + "loss": 0.1839, + "step": 666300 + }, + { + "epoch": 3.75, + "learning_rate": 1.2553949757443915e-05, + "loss": 0.1805, + "step": 666400 + }, + { + "epoch": 3.75, + "learning_rate": 1.2548328527181461e-05, + "loss": 0.1805, + "step": 666500 + }, + { + "epoch": 3.75, + "learning_rate": 1.2542707296919004e-05, + "loss": 0.1796, + "step": 666600 + }, + { + "epoch": 3.75, + "learning_rate": 1.253708606665655e-05, + "loss": 0.1813, + "step": 666700 + }, + { + "epoch": 3.75, + "learning_rate": 1.2531464836394093e-05, + "loss": 0.1818, + "step": 666800 + }, + { + "epoch": 3.75, + "learning_rate": 1.2525843606131638e-05, + "loss": 0.178, + "step": 666900 + }, + { + "epoch": 3.75, + "learning_rate": 1.2520222375869184e-05, + "loss": 0.1862, + "step": 667000 + }, + { + "epoch": 3.75, + "learning_rate": 1.2514601145606727e-05, + "loss": 0.1829, + "step": 667100 + }, + { + "epoch": 3.75, + "learning_rate": 1.2508979915344273e-05, + "loss": 0.1813, + "step": 667200 + }, + { + "epoch": 3.75, + "learning_rate": 1.2503358685081818e-05, + "loss": 0.1842, + "step": 667300 + }, + { + "epoch": 3.75, + "learning_rate": 1.2497737454819362e-05, + "loss": 0.1852, + "step": 667400 + }, + { + "epoch": 3.75, + "learning_rate": 1.2492116224556907e-05, + "loss": 0.1835, + "step": 667500 + }, + { + "epoch": 3.75, + "learning_rate": 1.2486494994294452e-05, + "loss": 0.1859, + "step": 667600 + }, + { + "epoch": 3.75, + "learning_rate": 1.2480873764031998e-05, + "loss": 0.1781, + "step": 667700 + }, + { + "epoch": 3.75, + "learning_rate": 1.247525253376954e-05, + "loss": 0.1814, + "step": 667800 + }, + { + "epoch": 3.75, + "learning_rate": 1.2469631303507085e-05, + "loss": 0.1854, + "step": 667900 + }, + { + "epoch": 3.75, + "learning_rate": 1.2464010073244632e-05, + "loss": 0.1767, + "step": 668000 + }, + { + "epoch": 3.76, + "learning_rate": 1.2458388842982176e-05, + "loss": 0.1767, + "step": 668100 + }, + { + "epoch": 3.76, + "learning_rate": 1.245276761271972e-05, + "loss": 0.1759, + "step": 668200 + }, + { + "epoch": 3.76, + "learning_rate": 1.2447146382457265e-05, + "loss": 0.1723, + "step": 668300 + }, + { + "epoch": 3.76, + "learning_rate": 1.244152515219481e-05, + "loss": 0.1847, + "step": 668400 + }, + { + "epoch": 3.76, + "learning_rate": 1.2435903921932355e-05, + "loss": 0.1832, + "step": 668500 + }, + { + "epoch": 3.76, + "learning_rate": 1.2430282691669899e-05, + "loss": 0.1781, + "step": 668600 + }, + { + "epoch": 3.76, + "learning_rate": 1.2424661461407444e-05, + "loss": 0.1791, + "step": 668700 + }, + { + "epoch": 3.76, + "learning_rate": 1.2419040231144988e-05, + "loss": 0.1838, + "step": 668800 + }, + { + "epoch": 3.76, + "learning_rate": 1.2413419000882535e-05, + "loss": 0.1792, + "step": 668900 + }, + { + "epoch": 3.76, + "learning_rate": 1.2407797770620079e-05, + "loss": 0.1797, + "step": 669000 + }, + { + "epoch": 3.76, + "learning_rate": 1.2402176540357622e-05, + "loss": 0.185, + "step": 669100 + }, + { + "epoch": 3.76, + "learning_rate": 1.2396555310095168e-05, + "loss": 0.186, + "step": 669200 + }, + { + "epoch": 3.76, + "learning_rate": 1.2390934079832713e-05, + "loss": 0.1848, + "step": 669300 + }, + { + "epoch": 3.76, + "learning_rate": 1.2385312849570257e-05, + "loss": 0.1759, + "step": 669400 + }, + { + "epoch": 3.76, + "learning_rate": 1.2379691619307802e-05, + "loss": 0.1785, + "step": 669500 + }, + { + "epoch": 3.76, + "learning_rate": 1.2374070389045347e-05, + "loss": 0.1811, + "step": 669600 + }, + { + "epoch": 3.76, + "learning_rate": 1.2368449158782893e-05, + "loss": 0.1765, + "step": 669700 + }, + { + "epoch": 3.77, + "learning_rate": 1.2362827928520436e-05, + "loss": 0.181, + "step": 669800 + }, + { + "epoch": 3.77, + "learning_rate": 1.235720669825798e-05, + "loss": 0.1808, + "step": 669900 + }, + { + "epoch": 3.77, + "learning_rate": 1.2351585467995527e-05, + "loss": 0.1762, + "step": 670000 + }, + { + "epoch": 3.77, + "learning_rate": 1.2345964237733071e-05, + "loss": 0.1828, + "step": 670100 + }, + { + "epoch": 3.77, + "learning_rate": 1.2340343007470616e-05, + "loss": 0.1805, + "step": 670200 + }, + { + "epoch": 3.77, + "learning_rate": 1.233472177720816e-05, + "loss": 0.1771, + "step": 670300 + }, + { + "epoch": 3.77, + "learning_rate": 1.2329100546945705e-05, + "loss": 0.1836, + "step": 670400 + }, + { + "epoch": 3.77, + "learning_rate": 1.232347931668325e-05, + "loss": 0.1794, + "step": 670500 + }, + { + "epoch": 3.77, + "learning_rate": 1.2317858086420794e-05, + "loss": 0.1851, + "step": 670600 + }, + { + "epoch": 3.77, + "learning_rate": 1.2312236856158339e-05, + "loss": 0.1836, + "step": 670700 + }, + { + "epoch": 3.77, + "learning_rate": 1.2306615625895885e-05, + "loss": 0.1841, + "step": 670800 + }, + { + "epoch": 3.77, + "learning_rate": 1.230099439563343e-05, + "loss": 0.1821, + "step": 670900 + }, + { + "epoch": 3.77, + "learning_rate": 1.2295373165370974e-05, + "loss": 0.1812, + "step": 671000 + }, + { + "epoch": 3.77, + "learning_rate": 1.2289751935108519e-05, + "loss": 0.1798, + "step": 671100 + }, + { + "epoch": 3.77, + "learning_rate": 1.2284130704846063e-05, + "loss": 0.1766, + "step": 671200 + }, + { + "epoch": 3.77, + "learning_rate": 1.2278509474583608e-05, + "loss": 0.1819, + "step": 671300 + }, + { + "epoch": 3.77, + "learning_rate": 1.2272888244321153e-05, + "loss": 0.1791, + "step": 671400 + }, + { + "epoch": 3.77, + "learning_rate": 1.2267267014058697e-05, + "loss": 0.1842, + "step": 671500 + }, + { + "epoch": 3.78, + "learning_rate": 1.2261645783796243e-05, + "loss": 0.1825, + "step": 671600 + }, + { + "epoch": 3.78, + "learning_rate": 1.2256024553533788e-05, + "loss": 0.1804, + "step": 671700 + }, + { + "epoch": 3.78, + "learning_rate": 1.2250403323271331e-05, + "loss": 0.1809, + "step": 671800 + }, + { + "epoch": 3.78, + "learning_rate": 1.2244782093008875e-05, + "loss": 0.1771, + "step": 671900 + }, + { + "epoch": 3.78, + "learning_rate": 1.2239160862746422e-05, + "loss": 0.1816, + "step": 672000 + }, + { + "epoch": 3.78, + "learning_rate": 1.2233539632483966e-05, + "loss": 0.1752, + "step": 672100 + }, + { + "epoch": 3.78, + "learning_rate": 1.2227918402221511e-05, + "loss": 0.187, + "step": 672200 + }, + { + "epoch": 3.78, + "learning_rate": 1.2222297171959055e-05, + "loss": 0.1813, + "step": 672300 + }, + { + "epoch": 3.78, + "learning_rate": 1.2216675941696602e-05, + "loss": 0.1802, + "step": 672400 + }, + { + "epoch": 3.78, + "learning_rate": 1.2211054711434145e-05, + "loss": 0.1756, + "step": 672500 + }, + { + "epoch": 3.78, + "learning_rate": 1.220543348117169e-05, + "loss": 0.1743, + "step": 672600 + }, + { + "epoch": 3.78, + "learning_rate": 1.2199812250909234e-05, + "loss": 0.1813, + "step": 672700 + }, + { + "epoch": 3.78, + "learning_rate": 1.219419102064678e-05, + "loss": 0.1763, + "step": 672800 + }, + { + "epoch": 3.78, + "learning_rate": 1.2188569790384325e-05, + "loss": 0.1825, + "step": 672900 + }, + { + "epoch": 3.78, + "learning_rate": 1.218294856012187e-05, + "loss": 0.1732, + "step": 673000 + }, + { + "epoch": 3.78, + "learning_rate": 1.2177327329859414e-05, + "loss": 0.1804, + "step": 673100 + }, + { + "epoch": 3.78, + "learning_rate": 1.2171706099596958e-05, + "loss": 0.1859, + "step": 673200 + }, + { + "epoch": 3.78, + "learning_rate": 1.2166084869334503e-05, + "loss": 0.1742, + "step": 673300 + }, + { + "epoch": 3.79, + "learning_rate": 1.2160463639072048e-05, + "loss": 0.1786, + "step": 673400 + }, + { + "epoch": 3.79, + "learning_rate": 1.2154898621112219e-05, + "loss": 0.1852, + "step": 673500 + }, + { + "epoch": 3.79, + "learning_rate": 1.2149277390849761e-05, + "loss": 0.1816, + "step": 673600 + }, + { + "epoch": 3.79, + "learning_rate": 1.2143656160587306e-05, + "loss": 0.1838, + "step": 673700 + }, + { + "epoch": 3.79, + "learning_rate": 1.213803493032485e-05, + "loss": 0.1808, + "step": 673800 + }, + { + "epoch": 3.79, + "learning_rate": 1.2132413700062397e-05, + "loss": 0.1811, + "step": 673900 + }, + { + "epoch": 3.79, + "learning_rate": 1.2126792469799941e-05, + "loss": 0.1763, + "step": 674000 + }, + { + "epoch": 3.79, + "learning_rate": 1.2121171239537486e-05, + "loss": 0.1798, + "step": 674100 + }, + { + "epoch": 3.79, + "learning_rate": 1.211555000927503e-05, + "loss": 0.1753, + "step": 674200 + }, + { + "epoch": 3.79, + "learning_rate": 1.2109928779012575e-05, + "loss": 0.1818, + "step": 674300 + }, + { + "epoch": 3.79, + "learning_rate": 1.210430754875012e-05, + "loss": 0.176, + "step": 674400 + }, + { + "epoch": 3.79, + "learning_rate": 1.2098686318487664e-05, + "loss": 0.1789, + "step": 674500 + }, + { + "epoch": 3.79, + "learning_rate": 1.2093065088225209e-05, + "loss": 0.177, + "step": 674600 + }, + { + "epoch": 3.79, + "learning_rate": 1.2087443857962755e-05, + "loss": 0.1781, + "step": 674700 + }, + { + "epoch": 3.79, + "learning_rate": 1.20818226277003e-05, + "loss": 0.1769, + "step": 674800 + }, + { + "epoch": 3.79, + "learning_rate": 1.2076201397437843e-05, + "loss": 0.1757, + "step": 674900 + }, + { + "epoch": 3.79, + "learning_rate": 1.2070580167175389e-05, + "loss": 0.1786, + "step": 675000 + }, + { + "epoch": 3.79, + "learning_rate": 1.2064958936912934e-05, + "loss": 0.1823, + "step": 675100 + }, + { + "epoch": 3.8, + "learning_rate": 1.2059337706650478e-05, + "loss": 0.1852, + "step": 675200 + }, + { + "epoch": 3.8, + "learning_rate": 1.2053716476388023e-05, + "loss": 0.1768, + "step": 675300 + }, + { + "epoch": 3.8, + "learning_rate": 1.2048095246125567e-05, + "loss": 0.1802, + "step": 675400 + }, + { + "epoch": 3.8, + "learning_rate": 1.2042474015863114e-05, + "loss": 0.179, + "step": 675500 + }, + { + "epoch": 3.8, + "learning_rate": 1.2036852785600657e-05, + "loss": 0.1771, + "step": 675600 + }, + { + "epoch": 3.8, + "learning_rate": 1.2031231555338201e-05, + "loss": 0.1778, + "step": 675700 + }, + { + "epoch": 3.8, + "learning_rate": 1.2025666537378372e-05, + "loss": 0.1774, + "step": 675800 + }, + { + "epoch": 3.8, + "learning_rate": 1.2020045307115917e-05, + "loss": 0.1808, + "step": 675900 + }, + { + "epoch": 3.8, + "learning_rate": 1.201442407685346e-05, + "loss": 0.1832, + "step": 676000 + }, + { + "epoch": 3.8, + "learning_rate": 1.2008802846591006e-05, + "loss": 0.1843, + "step": 676100 + }, + { + "epoch": 3.8, + "learning_rate": 1.200318161632855e-05, + "loss": 0.185, + "step": 676200 + }, + { + "epoch": 3.8, + "learning_rate": 1.1997560386066095e-05, + "loss": 0.1803, + "step": 676300 + }, + { + "epoch": 3.8, + "learning_rate": 1.1991995368106264e-05, + "loss": 0.1821, + "step": 676400 + }, + { + "epoch": 3.8, + "learning_rate": 1.1986374137843809e-05, + "loss": 0.1825, + "step": 676500 + }, + { + "epoch": 3.8, + "learning_rate": 1.1980752907581354e-05, + "loss": 0.1769, + "step": 676600 + }, + { + "epoch": 3.8, + "learning_rate": 1.1975131677318898e-05, + "loss": 0.1832, + "step": 676700 + }, + { + "epoch": 3.8, + "learning_rate": 1.1969510447056444e-05, + "loss": 0.1786, + "step": 676800 + }, + { + "epoch": 3.81, + "learning_rate": 1.1963889216793989e-05, + "loss": 0.1828, + "step": 676900 + }, + { + "epoch": 3.81, + "learning_rate": 1.1958267986531532e-05, + "loss": 0.1871, + "step": 677000 + }, + { + "epoch": 3.81, + "learning_rate": 1.1952646756269076e-05, + "loss": 0.1818, + "step": 677100 + }, + { + "epoch": 3.81, + "learning_rate": 1.1947025526006623e-05, + "loss": 0.1827, + "step": 677200 + }, + { + "epoch": 3.81, + "learning_rate": 1.1941404295744167e-05, + "loss": 0.1831, + "step": 677300 + }, + { + "epoch": 3.81, + "learning_rate": 1.1935783065481712e-05, + "loss": 0.1798, + "step": 677400 + }, + { + "epoch": 3.81, + "learning_rate": 1.1930161835219256e-05, + "loss": 0.1763, + "step": 677500 + }, + { + "epoch": 3.81, + "learning_rate": 1.1924540604956803e-05, + "loss": 0.1863, + "step": 677600 + }, + { + "epoch": 3.81, + "learning_rate": 1.1918919374694346e-05, + "loss": 0.1835, + "step": 677700 + }, + { + "epoch": 3.81, + "learning_rate": 1.191329814443189e-05, + "loss": 0.1767, + "step": 677800 + }, + { + "epoch": 3.81, + "learning_rate": 1.1907676914169435e-05, + "loss": 0.1781, + "step": 677900 + }, + { + "epoch": 3.81, + "learning_rate": 1.1902055683906981e-05, + "loss": 0.1819, + "step": 678000 + }, + { + "epoch": 3.81, + "learning_rate": 1.1896434453644526e-05, + "loss": 0.1842, + "step": 678100 + }, + { + "epoch": 3.81, + "learning_rate": 1.189081322338207e-05, + "loss": 0.1797, + "step": 678200 + }, + { + "epoch": 3.81, + "learning_rate": 1.1885191993119615e-05, + "loss": 0.1755, + "step": 678300 + }, + { + "epoch": 3.81, + "learning_rate": 1.187957076285716e-05, + "loss": 0.1831, + "step": 678400 + }, + { + "epoch": 3.81, + "learning_rate": 1.1873949532594704e-05, + "loss": 0.1816, + "step": 678500 + }, + { + "epoch": 3.81, + "learning_rate": 1.1868328302332249e-05, + "loss": 0.183, + "step": 678600 + }, + { + "epoch": 3.82, + "learning_rate": 1.1862707072069793e-05, + "loss": 0.1742, + "step": 678700 + }, + { + "epoch": 3.82, + "learning_rate": 1.185708584180734e-05, + "loss": 0.1758, + "step": 678800 + }, + { + "epoch": 3.82, + "learning_rate": 1.1851464611544884e-05, + "loss": 0.1795, + "step": 678900 + }, + { + "epoch": 3.82, + "learning_rate": 1.1845843381282427e-05, + "loss": 0.181, + "step": 679000 + }, + { + "epoch": 3.82, + "learning_rate": 1.1840222151019973e-05, + "loss": 0.1774, + "step": 679100 + }, + { + "epoch": 3.82, + "learning_rate": 1.1834600920757518e-05, + "loss": 0.1787, + "step": 679200 + }, + { + "epoch": 3.82, + "learning_rate": 1.1828979690495062e-05, + "loss": 0.1753, + "step": 679300 + }, + { + "epoch": 3.82, + "learning_rate": 1.1823358460232607e-05, + "loss": 0.1791, + "step": 679400 + }, + { + "epoch": 3.82, + "learning_rate": 1.1817737229970152e-05, + "loss": 0.1697, + "step": 679500 + }, + { + "epoch": 3.82, + "learning_rate": 1.1812115999707698e-05, + "loss": 0.1772, + "step": 679600 + }, + { + "epoch": 3.82, + "learning_rate": 1.180649476944524e-05, + "loss": 0.1855, + "step": 679700 + }, + { + "epoch": 3.82, + "learning_rate": 1.1800873539182785e-05, + "loss": 0.178, + "step": 679800 + }, + { + "epoch": 3.82, + "learning_rate": 1.179525230892033e-05, + "loss": 0.1822, + "step": 679900 + }, + { + "epoch": 3.82, + "learning_rate": 1.1789631078657876e-05, + "loss": 0.1809, + "step": 680000 + }, + { + "epoch": 3.82, + "learning_rate": 1.178400984839542e-05, + "loss": 0.1776, + "step": 680100 + }, + { + "epoch": 3.82, + "learning_rate": 1.1778388618132965e-05, + "loss": 0.1816, + "step": 680200 + }, + { + "epoch": 3.82, + "learning_rate": 1.177276738787051e-05, + "loss": 0.1791, + "step": 680300 + }, + { + "epoch": 3.82, + "learning_rate": 1.176720236991068e-05, + "loss": 0.1779, + "step": 680400 + }, + { + "epoch": 3.83, + "learning_rate": 1.1761581139648224e-05, + "loss": 0.1839, + "step": 680500 + }, + { + "epoch": 3.83, + "learning_rate": 1.1755959909385768e-05, + "loss": 0.1776, + "step": 680600 + }, + { + "epoch": 3.83, + "learning_rate": 1.1750338679123315e-05, + "loss": 0.1829, + "step": 680700 + }, + { + "epoch": 3.83, + "learning_rate": 1.1744717448860858e-05, + "loss": 0.1784, + "step": 680800 + }, + { + "epoch": 3.83, + "learning_rate": 1.1739096218598402e-05, + "loss": 0.1817, + "step": 680900 + }, + { + "epoch": 3.83, + "learning_rate": 1.1733474988335948e-05, + "loss": 0.1839, + "step": 681000 + }, + { + "epoch": 3.83, + "learning_rate": 1.1727853758073493e-05, + "loss": 0.1767, + "step": 681100 + }, + { + "epoch": 3.83, + "learning_rate": 1.1722232527811038e-05, + "loss": 0.1864, + "step": 681200 + }, + { + "epoch": 3.83, + "learning_rate": 1.1716611297548582e-05, + "loss": 0.1785, + "step": 681300 + }, + { + "epoch": 3.83, + "learning_rate": 1.1710990067286127e-05, + "loss": 0.1731, + "step": 681400 + }, + { + "epoch": 3.83, + "learning_rate": 1.1705368837023671e-05, + "loss": 0.1842, + "step": 681500 + }, + { + "epoch": 3.83, + "learning_rate": 1.1699747606761216e-05, + "loss": 0.183, + "step": 681600 + }, + { + "epoch": 3.83, + "learning_rate": 1.169412637649876e-05, + "loss": 0.1864, + "step": 681700 + }, + { + "epoch": 3.83, + "learning_rate": 1.1688505146236307e-05, + "loss": 0.1798, + "step": 681800 + }, + { + "epoch": 3.83, + "learning_rate": 1.1682883915973851e-05, + "loss": 0.1789, + "step": 681900 + }, + { + "epoch": 3.83, + "learning_rate": 1.1677262685711396e-05, + "loss": 0.1782, + "step": 682000 + }, + { + "epoch": 3.83, + "learning_rate": 1.1671641455448939e-05, + "loss": 0.1814, + "step": 682100 + }, + { + "epoch": 3.83, + "learning_rate": 1.1666020225186485e-05, + "loss": 0.1767, + "step": 682200 + }, + { + "epoch": 3.84, + "learning_rate": 1.166039899492403e-05, + "loss": 0.1758, + "step": 682300 + }, + { + "epoch": 3.84, + "learning_rate": 1.1654777764661574e-05, + "loss": 0.1774, + "step": 682400 + }, + { + "epoch": 3.84, + "learning_rate": 1.1649212746701744e-05, + "loss": 0.1774, + "step": 682500 + }, + { + "epoch": 3.84, + "learning_rate": 1.1643591516439288e-05, + "loss": 0.1823, + "step": 682600 + }, + { + "epoch": 3.84, + "learning_rate": 1.1637970286176833e-05, + "loss": 0.1871, + "step": 682700 + }, + { + "epoch": 3.84, + "learning_rate": 1.1632349055914377e-05, + "loss": 0.1778, + "step": 682800 + }, + { + "epoch": 3.84, + "learning_rate": 1.1626727825651924e-05, + "loss": 0.176, + "step": 682900 + }, + { + "epoch": 3.84, + "learning_rate": 1.1621106595389468e-05, + "loss": 0.1766, + "step": 683000 + }, + { + "epoch": 3.84, + "learning_rate": 1.1615485365127013e-05, + "loss": 0.1827, + "step": 683100 + }, + { + "epoch": 3.84, + "learning_rate": 1.1609864134864556e-05, + "loss": 0.1815, + "step": 683200 + }, + { + "epoch": 3.84, + "learning_rate": 1.1604242904602102e-05, + "loss": 0.1811, + "step": 683300 + }, + { + "epoch": 3.84, + "learning_rate": 1.1598621674339647e-05, + "loss": 0.182, + "step": 683400 + }, + { + "epoch": 3.84, + "learning_rate": 1.1593000444077191e-05, + "loss": 0.1813, + "step": 683500 + }, + { + "epoch": 3.84, + "learning_rate": 1.1587379213814736e-05, + "loss": 0.1825, + "step": 683600 + }, + { + "epoch": 3.84, + "learning_rate": 1.1581757983552282e-05, + "loss": 0.1809, + "step": 683700 + }, + { + "epoch": 3.84, + "learning_rate": 1.1576136753289825e-05, + "loss": 0.1836, + "step": 683800 + }, + { + "epoch": 3.84, + "learning_rate": 1.157051552302737e-05, + "loss": 0.1777, + "step": 683900 + }, + { + "epoch": 3.84, + "learning_rate": 1.1564894292764914e-05, + "loss": 0.1793, + "step": 684000 + }, + { + "epoch": 3.85, + "learning_rate": 1.155927306250246e-05, + "loss": 0.1776, + "step": 684100 + }, + { + "epoch": 3.85, + "learning_rate": 1.1553651832240005e-05, + "loss": 0.1812, + "step": 684200 + }, + { + "epoch": 3.85, + "learning_rate": 1.154803060197755e-05, + "loss": 0.1843, + "step": 684300 + }, + { + "epoch": 3.85, + "learning_rate": 1.1542409371715094e-05, + "loss": 0.1793, + "step": 684400 + }, + { + "epoch": 3.85, + "learning_rate": 1.1536788141452639e-05, + "loss": 0.1874, + "step": 684500 + }, + { + "epoch": 3.85, + "learning_rate": 1.1531166911190183e-05, + "loss": 0.1836, + "step": 684600 + }, + { + "epoch": 3.85, + "learning_rate": 1.1525545680927728e-05, + "loss": 0.1788, + "step": 684700 + }, + { + "epoch": 3.85, + "learning_rate": 1.1519924450665272e-05, + "loss": 0.1789, + "step": 684800 + }, + { + "epoch": 3.85, + "learning_rate": 1.1514303220402819e-05, + "loss": 0.1794, + "step": 684900 + }, + { + "epoch": 3.85, + "learning_rate": 1.1508738202442986e-05, + "loss": 0.1827, + "step": 685000 + }, + { + "epoch": 3.85, + "learning_rate": 1.1503116972180533e-05, + "loss": 0.1814, + "step": 685100 + }, + { + "epoch": 3.85, + "learning_rate": 1.1497495741918077e-05, + "loss": 0.1851, + "step": 685200 + }, + { + "epoch": 3.85, + "learning_rate": 1.1491874511655622e-05, + "loss": 0.1819, + "step": 685300 + }, + { + "epoch": 3.85, + "learning_rate": 1.1486253281393166e-05, + "loss": 0.1786, + "step": 685400 + }, + { + "epoch": 3.85, + "learning_rate": 1.1480632051130711e-05, + "loss": 0.1822, + "step": 685500 + }, + { + "epoch": 3.85, + "learning_rate": 1.1475010820868255e-05, + "loss": 0.1771, + "step": 685600 + }, + { + "epoch": 3.85, + "learning_rate": 1.14693895906058e-05, + "loss": 0.1802, + "step": 685700 + }, + { + "epoch": 3.86, + "learning_rate": 1.1463768360343345e-05, + "loss": 0.1854, + "step": 685800 + }, + { + "epoch": 3.86, + "learning_rate": 1.1458147130080891e-05, + "loss": 0.1799, + "step": 685900 + }, + { + "epoch": 3.86, + "learning_rate": 1.1452525899818435e-05, + "loss": 0.1839, + "step": 686000 + }, + { + "epoch": 3.86, + "learning_rate": 1.144690466955598e-05, + "loss": 0.1815, + "step": 686100 + }, + { + "epoch": 3.86, + "learning_rate": 1.1441283439293523e-05, + "loss": 0.1765, + "step": 686200 + }, + { + "epoch": 3.86, + "learning_rate": 1.143566220903107e-05, + "loss": 0.1819, + "step": 686300 + }, + { + "epoch": 3.86, + "learning_rate": 1.1430040978768614e-05, + "loss": 0.1784, + "step": 686400 + }, + { + "epoch": 3.86, + "learning_rate": 1.1424419748506158e-05, + "loss": 0.1866, + "step": 686500 + }, + { + "epoch": 3.86, + "learning_rate": 1.1418798518243703e-05, + "loss": 0.1762, + "step": 686600 + }, + { + "epoch": 3.86, + "learning_rate": 1.141317728798125e-05, + "loss": 0.1787, + "step": 686700 + }, + { + "epoch": 3.86, + "learning_rate": 1.1407556057718794e-05, + "loss": 0.1807, + "step": 686800 + }, + { + "epoch": 3.86, + "learning_rate": 1.1401934827456337e-05, + "loss": 0.1782, + "step": 686900 + }, + { + "epoch": 3.86, + "learning_rate": 1.1396313597193881e-05, + "loss": 0.1775, + "step": 687000 + }, + { + "epoch": 3.86, + "learning_rate": 1.1390748579234052e-05, + "loss": 0.1803, + "step": 687100 + }, + { + "epoch": 3.86, + "learning_rate": 1.138518356127422e-05, + "loss": 0.1787, + "step": 687200 + }, + { + "epoch": 3.86, + "learning_rate": 1.1379562331011766e-05, + "loss": 0.1838, + "step": 687300 + }, + { + "epoch": 3.86, + "learning_rate": 1.1373997313051934e-05, + "loss": 0.1788, + "step": 687400 + }, + { + "epoch": 3.86, + "learning_rate": 1.136837608278948e-05, + "loss": 0.183, + "step": 687500 + }, + { + "epoch": 3.87, + "learning_rate": 1.1362754852527025e-05, + "loss": 0.1859, + "step": 687600 + }, + { + "epoch": 3.87, + "learning_rate": 1.135713362226457e-05, + "loss": 0.1789, + "step": 687700 + }, + { + "epoch": 3.87, + "learning_rate": 1.1351512392002114e-05, + "loss": 0.1822, + "step": 687800 + }, + { + "epoch": 3.87, + "learning_rate": 1.1345891161739658e-05, + "loss": 0.177, + "step": 687900 + }, + { + "epoch": 3.87, + "learning_rate": 1.1340269931477205e-05, + "loss": 0.1801, + "step": 688000 + }, + { + "epoch": 3.87, + "learning_rate": 1.1334648701214748e-05, + "loss": 0.1811, + "step": 688100 + }, + { + "epoch": 3.87, + "learning_rate": 1.1329027470952292e-05, + "loss": 0.1744, + "step": 688200 + }, + { + "epoch": 3.87, + "learning_rate": 1.1323406240689839e-05, + "loss": 0.175, + "step": 688300 + }, + { + "epoch": 3.87, + "learning_rate": 1.1317785010427383e-05, + "loss": 0.1799, + "step": 688400 + }, + { + "epoch": 3.87, + "learning_rate": 1.1312163780164928e-05, + "loss": 0.1786, + "step": 688500 + }, + { + "epoch": 3.87, + "learning_rate": 1.1306542549902472e-05, + "loss": 0.1806, + "step": 688600 + }, + { + "epoch": 3.87, + "learning_rate": 1.1300921319640017e-05, + "loss": 0.1725, + "step": 688700 + }, + { + "epoch": 3.87, + "learning_rate": 1.1295300089377561e-05, + "loss": 0.1743, + "step": 688800 + }, + { + "epoch": 3.87, + "learning_rate": 1.1289678859115106e-05, + "loss": 0.1767, + "step": 688900 + }, + { + "epoch": 3.87, + "learning_rate": 1.128405762885265e-05, + "loss": 0.1768, + "step": 689000 + }, + { + "epoch": 3.87, + "learning_rate": 1.1278436398590197e-05, + "loss": 0.18, + "step": 689100 + }, + { + "epoch": 3.87, + "learning_rate": 1.1272815168327741e-05, + "loss": 0.1816, + "step": 689200 + }, + { + "epoch": 3.87, + "learning_rate": 1.1267193938065286e-05, + "loss": 0.1797, + "step": 689300 + }, + { + "epoch": 3.88, + "learning_rate": 1.1261572707802829e-05, + "loss": 0.1765, + "step": 689400 + }, + { + "epoch": 3.88, + "learning_rate": 1.1255951477540375e-05, + "loss": 0.1834, + "step": 689500 + }, + { + "epoch": 3.88, + "learning_rate": 1.125033024727792e-05, + "loss": 0.1758, + "step": 689600 + }, + { + "epoch": 3.88, + "learning_rate": 1.1244709017015464e-05, + "loss": 0.1827, + "step": 689700 + }, + { + "epoch": 3.88, + "learning_rate": 1.1239087786753009e-05, + "loss": 0.1795, + "step": 689800 + }, + { + "epoch": 3.88, + "learning_rate": 1.1233466556490555e-05, + "loss": 0.1806, + "step": 689900 + }, + { + "epoch": 3.88, + "learning_rate": 1.12278453262281e-05, + "loss": 0.1778, + "step": 690000 + }, + { + "epoch": 3.88, + "learning_rate": 1.1222224095965643e-05, + "loss": 0.1812, + "step": 690100 + }, + { + "epoch": 3.88, + "learning_rate": 1.1216602865703187e-05, + "loss": 0.1839, + "step": 690200 + }, + { + "epoch": 3.88, + "learning_rate": 1.1210981635440734e-05, + "loss": 0.1776, + "step": 690300 + }, + { + "epoch": 3.88, + "learning_rate": 1.1205360405178278e-05, + "loss": 0.1749, + "step": 690400 + }, + { + "epoch": 3.88, + "learning_rate": 1.1199739174915823e-05, + "loss": 0.176, + "step": 690500 + }, + { + "epoch": 3.88, + "learning_rate": 1.1194117944653367e-05, + "loss": 0.1751, + "step": 690600 + }, + { + "epoch": 3.88, + "learning_rate": 1.1188496714390914e-05, + "loss": 0.1763, + "step": 690700 + }, + { + "epoch": 3.88, + "learning_rate": 1.1182875484128456e-05, + "loss": 0.1814, + "step": 690800 + }, + { + "epoch": 3.88, + "learning_rate": 1.1177254253866001e-05, + "loss": 0.1818, + "step": 690900 + }, + { + "epoch": 3.88, + "learning_rate": 1.1171633023603546e-05, + "loss": 0.1785, + "step": 691000 + }, + { + "epoch": 3.88, + "learning_rate": 1.1166011793341092e-05, + "loss": 0.1772, + "step": 691100 + }, + { + "epoch": 3.89, + "learning_rate": 1.1160390563078637e-05, + "loss": 0.1834, + "step": 691200 + }, + { + "epoch": 3.89, + "learning_rate": 1.1154769332816181e-05, + "loss": 0.1761, + "step": 691300 + }, + { + "epoch": 3.89, + "learning_rate": 1.1149148102553724e-05, + "loss": 0.1771, + "step": 691400 + }, + { + "epoch": 3.89, + "learning_rate": 1.114352687229127e-05, + "loss": 0.1775, + "step": 691500 + }, + { + "epoch": 3.89, + "learning_rate": 1.1137905642028815e-05, + "loss": 0.1735, + "step": 691600 + }, + { + "epoch": 3.89, + "learning_rate": 1.113228441176636e-05, + "loss": 0.1778, + "step": 691700 + }, + { + "epoch": 3.89, + "learning_rate": 1.1126663181503904e-05, + "loss": 0.1782, + "step": 691800 + }, + { + "epoch": 3.89, + "learning_rate": 1.112104195124145e-05, + "loss": 0.1799, + "step": 691900 + }, + { + "epoch": 3.89, + "learning_rate": 1.1115420720978995e-05, + "loss": 0.1817, + "step": 692000 + }, + { + "epoch": 3.89, + "learning_rate": 1.1109799490716538e-05, + "loss": 0.182, + "step": 692100 + }, + { + "epoch": 3.89, + "learning_rate": 1.1104178260454082e-05, + "loss": 0.1784, + "step": 692200 + }, + { + "epoch": 3.89, + "learning_rate": 1.1098557030191629e-05, + "loss": 0.1728, + "step": 692300 + }, + { + "epoch": 3.89, + "learning_rate": 1.1092935799929173e-05, + "loss": 0.1758, + "step": 692400 + }, + { + "epoch": 3.89, + "learning_rate": 1.1087314569666718e-05, + "loss": 0.1791, + "step": 692500 + }, + { + "epoch": 3.89, + "learning_rate": 1.1081693339404262e-05, + "loss": 0.1839, + "step": 692600 + }, + { + "epoch": 3.89, + "learning_rate": 1.1076072109141809e-05, + "loss": 0.1759, + "step": 692700 + }, + { + "epoch": 3.89, + "learning_rate": 1.1070450878879352e-05, + "loss": 0.1779, + "step": 692800 + }, + { + "epoch": 3.89, + "learning_rate": 1.1064829648616896e-05, + "loss": 0.1824, + "step": 692900 + }, + { + "epoch": 3.9, + "learning_rate": 1.105920841835444e-05, + "loss": 0.1805, + "step": 693000 + }, + { + "epoch": 3.9, + "learning_rate": 1.1053587188091987e-05, + "loss": 0.1742, + "step": 693100 + }, + { + "epoch": 3.9, + "learning_rate": 1.1047965957829532e-05, + "loss": 0.171, + "step": 693200 + }, + { + "epoch": 3.9, + "learning_rate": 1.1042344727567076e-05, + "loss": 0.1777, + "step": 693300 + }, + { + "epoch": 3.9, + "learning_rate": 1.103672349730462e-05, + "loss": 0.1762, + "step": 693400 + }, + { + "epoch": 3.9, + "learning_rate": 1.1031102267042165e-05, + "loss": 0.1792, + "step": 693500 + }, + { + "epoch": 3.9, + "learning_rate": 1.102548103677971e-05, + "loss": 0.177, + "step": 693600 + }, + { + "epoch": 3.9, + "learning_rate": 1.1019859806517254e-05, + "loss": 0.1802, + "step": 693700 + }, + { + "epoch": 3.9, + "learning_rate": 1.1014238576254799e-05, + "loss": 0.1756, + "step": 693800 + }, + { + "epoch": 3.9, + "learning_rate": 1.1008617345992345e-05, + "loss": 0.1814, + "step": 693900 + }, + { + "epoch": 3.9, + "learning_rate": 1.100299611572989e-05, + "loss": 0.1837, + "step": 694000 + }, + { + "epoch": 3.9, + "learning_rate": 1.099743109777006e-05, + "loss": 0.18, + "step": 694100 + }, + { + "epoch": 3.9, + "learning_rate": 1.0991809867507604e-05, + "loss": 0.1791, + "step": 694200 + }, + { + "epoch": 3.9, + "learning_rate": 1.0986188637245148e-05, + "loss": 0.1867, + "step": 694300 + }, + { + "epoch": 3.9, + "learning_rate": 1.0980567406982693e-05, + "loss": 0.1866, + "step": 694400 + }, + { + "epoch": 3.9, + "learning_rate": 1.0974946176720238e-05, + "loss": 0.1823, + "step": 694500 + }, + { + "epoch": 3.9, + "learning_rate": 1.0969324946457782e-05, + "loss": 0.1818, + "step": 694600 + }, + { + "epoch": 3.91, + "learning_rate": 1.0963703716195327e-05, + "loss": 0.178, + "step": 694700 + }, + { + "epoch": 3.91, + "learning_rate": 1.0958082485932871e-05, + "loss": 0.1782, + "step": 694800 + }, + { + "epoch": 3.91, + "learning_rate": 1.0952461255670418e-05, + "loss": 0.1829, + "step": 694900 + }, + { + "epoch": 3.91, + "learning_rate": 1.0946840025407962e-05, + "loss": 0.1818, + "step": 695000 + }, + { + "epoch": 3.91, + "learning_rate": 1.0941218795145507e-05, + "loss": 0.1755, + "step": 695100 + }, + { + "epoch": 3.91, + "learning_rate": 1.093559756488305e-05, + "loss": 0.1768, + "step": 695200 + }, + { + "epoch": 3.91, + "learning_rate": 1.0929976334620596e-05, + "loss": 0.179, + "step": 695300 + }, + { + "epoch": 3.91, + "learning_rate": 1.092435510435814e-05, + "loss": 0.1733, + "step": 695400 + }, + { + "epoch": 3.91, + "learning_rate": 1.0918733874095685e-05, + "loss": 0.1864, + "step": 695500 + }, + { + "epoch": 3.91, + "learning_rate": 1.091311264383323e-05, + "loss": 0.1772, + "step": 695600 + }, + { + "epoch": 3.91, + "learning_rate": 1.0907491413570774e-05, + "loss": 0.1733, + "step": 695700 + }, + { + "epoch": 3.91, + "learning_rate": 1.090187018330832e-05, + "loss": 0.1748, + "step": 695800 + }, + { + "epoch": 3.91, + "learning_rate": 1.0896248953045863e-05, + "loss": 0.1803, + "step": 695900 + }, + { + "epoch": 3.91, + "learning_rate": 1.0890627722783408e-05, + "loss": 0.1778, + "step": 696000 + }, + { + "epoch": 3.91, + "learning_rate": 1.0885006492520954e-05, + "loss": 0.1823, + "step": 696100 + }, + { + "epoch": 3.91, + "learning_rate": 1.0879385262258499e-05, + "loss": 0.1774, + "step": 696200 + }, + { + "epoch": 3.91, + "learning_rate": 1.0873764031996043e-05, + "loss": 0.1763, + "step": 696300 + }, + { + "epoch": 3.91, + "learning_rate": 1.0868142801733588e-05, + "loss": 0.1742, + "step": 696400 + }, + { + "epoch": 3.92, + "learning_rate": 1.0862521571471133e-05, + "loss": 0.1773, + "step": 696500 + }, + { + "epoch": 3.92, + "learning_rate": 1.0856900341208677e-05, + "loss": 0.1788, + "step": 696600 + }, + { + "epoch": 3.92, + "learning_rate": 1.0851279110946222e-05, + "loss": 0.18, + "step": 696700 + }, + { + "epoch": 3.92, + "learning_rate": 1.0845657880683766e-05, + "loss": 0.1772, + "step": 696800 + }, + { + "epoch": 3.92, + "learning_rate": 1.0840092862723936e-05, + "loss": 0.1823, + "step": 696900 + }, + { + "epoch": 3.92, + "learning_rate": 1.083447163246148e-05, + "loss": 0.1755, + "step": 697000 + }, + { + "epoch": 3.92, + "learning_rate": 1.0828850402199025e-05, + "loss": 0.1783, + "step": 697100 + }, + { + "epoch": 3.92, + "learning_rate": 1.0823229171936571e-05, + "loss": 0.1811, + "step": 697200 + }, + { + "epoch": 3.92, + "learning_rate": 1.0817607941674116e-05, + "loss": 0.172, + "step": 697300 + }, + { + "epoch": 3.92, + "learning_rate": 1.081198671141166e-05, + "loss": 0.1848, + "step": 697400 + }, + { + "epoch": 3.92, + "learning_rate": 1.0806365481149205e-05, + "loss": 0.1725, + "step": 697500 + }, + { + "epoch": 3.92, + "learning_rate": 1.080074425088675e-05, + "loss": 0.1773, + "step": 697600 + }, + { + "epoch": 3.92, + "learning_rate": 1.0795123020624294e-05, + "loss": 0.1772, + "step": 697700 + }, + { + "epoch": 3.92, + "learning_rate": 1.0789501790361839e-05, + "loss": 0.1785, + "step": 697800 + }, + { + "epoch": 3.92, + "learning_rate": 1.0783880560099383e-05, + "loss": 0.1795, + "step": 697900 + }, + { + "epoch": 3.92, + "learning_rate": 1.077825932983693e-05, + "loss": 0.1819, + "step": 698000 + }, + { + "epoch": 3.92, + "learning_rate": 1.0772638099574474e-05, + "loss": 0.1827, + "step": 698100 + }, + { + "epoch": 3.92, + "learning_rate": 1.0767016869312019e-05, + "loss": 0.1769, + "step": 698200 + }, + { + "epoch": 3.93, + "learning_rate": 1.0761395639049562e-05, + "loss": 0.1826, + "step": 698300 + }, + { + "epoch": 3.93, + "learning_rate": 1.0755774408787108e-05, + "loss": 0.1821, + "step": 698400 + }, + { + "epoch": 3.93, + "learning_rate": 1.0750153178524652e-05, + "loss": 0.1787, + "step": 698500 + }, + { + "epoch": 3.93, + "learning_rate": 1.0744531948262197e-05, + "loss": 0.1804, + "step": 698600 + }, + { + "epoch": 3.93, + "learning_rate": 1.0738910717999742e-05, + "loss": 0.1746, + "step": 698700 + }, + { + "epoch": 3.93, + "learning_rate": 1.0733289487737288e-05, + "loss": 0.1771, + "step": 698800 + }, + { + "epoch": 3.93, + "learning_rate": 1.072766825747483e-05, + "loss": 0.1784, + "step": 698900 + }, + { + "epoch": 3.93, + "learning_rate": 1.0722047027212375e-05, + "loss": 0.1783, + "step": 699000 + }, + { + "epoch": 3.93, + "learning_rate": 1.071642579694992e-05, + "loss": 0.1828, + "step": 699100 + }, + { + "epoch": 3.93, + "learning_rate": 1.0710804566687466e-05, + "loss": 0.1805, + "step": 699200 + }, + { + "epoch": 3.93, + "learning_rate": 1.070518333642501e-05, + "loss": 0.1802, + "step": 699300 + }, + { + "epoch": 3.93, + "learning_rate": 1.0699562106162555e-05, + "loss": 0.1755, + "step": 699400 + }, + { + "epoch": 3.93, + "learning_rate": 1.06939408759001e-05, + "loss": 0.1784, + "step": 699500 + }, + { + "epoch": 3.93, + "learning_rate": 1.0688319645637645e-05, + "loss": 0.1828, + "step": 699600 + }, + { + "epoch": 3.93, + "learning_rate": 1.0682698415375189e-05, + "loss": 0.1802, + "step": 699700 + }, + { + "epoch": 3.93, + "learning_rate": 1.0677133397415358e-05, + "loss": 0.1787, + "step": 699800 + }, + { + "epoch": 3.93, + "learning_rate": 1.0671512167152905e-05, + "loss": 0.174, + "step": 699900 + }, + { + "epoch": 3.93, + "learning_rate": 1.0665890936890448e-05, + "loss": 0.1778, + "step": 700000 + }, + { + "epoch": 3.94, + "learning_rate": 1.0660269706627992e-05, + "loss": 0.1806, + "step": 700100 + }, + { + "epoch": 3.94, + "learning_rate": 1.0654648476365538e-05, + "loss": 0.1794, + "step": 700200 + }, + { + "epoch": 3.94, + "learning_rate": 1.0649027246103083e-05, + "loss": 0.175, + "step": 700300 + }, + { + "epoch": 3.94, + "learning_rate": 1.0643406015840628e-05, + "loss": 0.1808, + "step": 700400 + }, + { + "epoch": 3.94, + "learning_rate": 1.0637784785578172e-05, + "loss": 0.1742, + "step": 700500 + }, + { + "epoch": 3.94, + "learning_rate": 1.0632163555315717e-05, + "loss": 0.1866, + "step": 700600 + }, + { + "epoch": 3.94, + "learning_rate": 1.0626542325053261e-05, + "loss": 0.1799, + "step": 700700 + }, + { + "epoch": 3.94, + "learning_rate": 1.0620921094790806e-05, + "loss": 0.1794, + "step": 700800 + }, + { + "epoch": 3.94, + "learning_rate": 1.061529986452835e-05, + "loss": 0.1798, + "step": 700900 + }, + { + "epoch": 3.94, + "learning_rate": 1.0609678634265897e-05, + "loss": 0.1798, + "step": 701000 + }, + { + "epoch": 3.94, + "learning_rate": 1.0604057404003441e-05, + "loss": 0.178, + "step": 701100 + }, + { + "epoch": 3.94, + "learning_rate": 1.0598436173740986e-05, + "loss": 0.181, + "step": 701200 + }, + { + "epoch": 3.94, + "learning_rate": 1.0592814943478529e-05, + "loss": 0.1803, + "step": 701300 + }, + { + "epoch": 3.94, + "learning_rate": 1.0587193713216075e-05, + "loss": 0.1745, + "step": 701400 + }, + { + "epoch": 3.94, + "learning_rate": 1.058157248295362e-05, + "loss": 0.1807, + "step": 701500 + }, + { + "epoch": 3.94, + "learning_rate": 1.0575951252691164e-05, + "loss": 0.1806, + "step": 701600 + }, + { + "epoch": 3.94, + "learning_rate": 1.0570330022428709e-05, + "loss": 0.1797, + "step": 701700 + }, + { + "epoch": 3.94, + "learning_rate": 1.0564765004468878e-05, + "loss": 0.1842, + "step": 701800 + }, + { + "epoch": 3.95, + "learning_rate": 1.0559143774206423e-05, + "loss": 0.1726, + "step": 701900 + }, + { + "epoch": 3.95, + "learning_rate": 1.0553522543943967e-05, + "loss": 0.1806, + "step": 702000 + }, + { + "epoch": 3.95, + "learning_rate": 1.0547901313681514e-05, + "loss": 0.1782, + "step": 702100 + }, + { + "epoch": 3.95, + "learning_rate": 1.0542280083419058e-05, + "loss": 0.1805, + "step": 702200 + }, + { + "epoch": 3.95, + "learning_rate": 1.0536658853156603e-05, + "loss": 0.1783, + "step": 702300 + }, + { + "epoch": 3.95, + "learning_rate": 1.0531037622894146e-05, + "loss": 0.1821, + "step": 702400 + }, + { + "epoch": 3.95, + "learning_rate": 1.0525416392631692e-05, + "loss": 0.172, + "step": 702500 + }, + { + "epoch": 3.95, + "learning_rate": 1.0519795162369237e-05, + "loss": 0.1769, + "step": 702600 + }, + { + "epoch": 3.95, + "learning_rate": 1.0514173932106781e-05, + "loss": 0.1739, + "step": 702700 + }, + { + "epoch": 3.95, + "learning_rate": 1.0508552701844326e-05, + "loss": 0.1787, + "step": 702800 + }, + { + "epoch": 3.95, + "learning_rate": 1.0502931471581872e-05, + "loss": 0.1738, + "step": 702900 + }, + { + "epoch": 3.95, + "learning_rate": 1.0497310241319417e-05, + "loss": 0.1793, + "step": 703000 + }, + { + "epoch": 3.95, + "learning_rate": 1.049168901105696e-05, + "loss": 0.1826, + "step": 703100 + }, + { + "epoch": 3.95, + "learning_rate": 1.0486067780794504e-05, + "loss": 0.1787, + "step": 703200 + }, + { + "epoch": 3.95, + "learning_rate": 1.048044655053205e-05, + "loss": 0.172, + "step": 703300 + }, + { + "epoch": 3.95, + "learning_rate": 1.0474825320269595e-05, + "loss": 0.1763, + "step": 703400 + }, + { + "epoch": 3.95, + "learning_rate": 1.046920409000714e-05, + "loss": 0.1804, + "step": 703500 + }, + { + "epoch": 3.96, + "learning_rate": 1.0463582859744684e-05, + "loss": 0.1749, + "step": 703600 + }, + { + "epoch": 3.96, + "learning_rate": 1.0457961629482229e-05, + "loss": 0.1816, + "step": 703700 + }, + { + "epoch": 3.96, + "learning_rate": 1.0452340399219773e-05, + "loss": 0.1683, + "step": 703800 + }, + { + "epoch": 3.96, + "learning_rate": 1.0446719168957318e-05, + "loss": 0.1819, + "step": 703900 + }, + { + "epoch": 3.96, + "learning_rate": 1.0441097938694862e-05, + "loss": 0.1731, + "step": 704000 + }, + { + "epoch": 3.96, + "learning_rate": 1.0435476708432409e-05, + "loss": 0.1762, + "step": 704100 + }, + { + "epoch": 3.96, + "learning_rate": 1.0429855478169953e-05, + "loss": 0.1764, + "step": 704200 + }, + { + "epoch": 3.96, + "learning_rate": 1.0424234247907498e-05, + "loss": 0.1777, + "step": 704300 + }, + { + "epoch": 3.96, + "learning_rate": 1.0418613017645042e-05, + "loss": 0.1758, + "step": 704400 + }, + { + "epoch": 3.96, + "learning_rate": 1.0412991787382587e-05, + "loss": 0.1811, + "step": 704500 + }, + { + "epoch": 3.96, + "learning_rate": 1.0407370557120132e-05, + "loss": 0.1757, + "step": 704600 + }, + { + "epoch": 3.96, + "learning_rate": 1.0401749326857676e-05, + "loss": 0.1722, + "step": 704700 + }, + { + "epoch": 3.96, + "learning_rate": 1.0396184308897846e-05, + "loss": 0.1725, + "step": 704800 + }, + { + "epoch": 3.96, + "learning_rate": 1.039056307863539e-05, + "loss": 0.1688, + "step": 704900 + }, + { + "epoch": 3.96, + "learning_rate": 1.0384941848372935e-05, + "loss": 0.1846, + "step": 705000 + }, + { + "epoch": 3.96, + "learning_rate": 1.0379320618110481e-05, + "loss": 0.1856, + "step": 705100 + }, + { + "epoch": 3.96, + "learning_rate": 1.0373699387848026e-05, + "loss": 0.1795, + "step": 705200 + }, + { + "epoch": 3.96, + "learning_rate": 1.036807815758557e-05, + "loss": 0.1799, + "step": 705300 + }, + { + "epoch": 3.97, + "learning_rate": 1.0362456927323115e-05, + "loss": 0.1803, + "step": 705400 + }, + { + "epoch": 3.97, + "learning_rate": 1.035683569706066e-05, + "loss": 0.1731, + "step": 705500 + }, + { + "epoch": 3.97, + "learning_rate": 1.0351214466798204e-05, + "loss": 0.1735, + "step": 705600 + }, + { + "epoch": 3.97, + "learning_rate": 1.0345593236535748e-05, + "loss": 0.1699, + "step": 705700 + }, + { + "epoch": 3.97, + "learning_rate": 1.0339972006273293e-05, + "loss": 0.1749, + "step": 705800 + }, + { + "epoch": 3.97, + "learning_rate": 1.033435077601084e-05, + "loss": 0.1791, + "step": 705900 + }, + { + "epoch": 3.97, + "learning_rate": 1.0328729545748384e-05, + "loss": 0.1835, + "step": 706000 + }, + { + "epoch": 3.97, + "learning_rate": 1.0323108315485928e-05, + "loss": 0.1793, + "step": 706100 + }, + { + "epoch": 3.97, + "learning_rate": 1.0317487085223471e-05, + "loss": 0.1828, + "step": 706200 + }, + { + "epoch": 3.97, + "learning_rate": 1.0311865854961018e-05, + "loss": 0.1738, + "step": 706300 + }, + { + "epoch": 3.97, + "learning_rate": 1.0306244624698562e-05, + "loss": 0.1733, + "step": 706400 + }, + { + "epoch": 3.97, + "learning_rate": 1.0300623394436107e-05, + "loss": 0.1815, + "step": 706500 + }, + { + "epoch": 3.97, + "learning_rate": 1.0295002164173651e-05, + "loss": 0.1797, + "step": 706600 + }, + { + "epoch": 3.97, + "learning_rate": 1.0289380933911198e-05, + "loss": 0.1778, + "step": 706700 + }, + { + "epoch": 3.97, + "learning_rate": 1.0283815915951365e-05, + "loss": 0.1801, + "step": 706800 + }, + { + "epoch": 3.97, + "learning_rate": 1.027819468568891e-05, + "loss": 0.1801, + "step": 706900 + }, + { + "epoch": 3.97, + "learning_rate": 1.0272573455426456e-05, + "loss": 0.1809, + "step": 707000 + }, + { + "epoch": 3.97, + "learning_rate": 1.0266952225164e-05, + "loss": 0.1712, + "step": 707100 + }, + { + "epoch": 3.98, + "learning_rate": 1.0261330994901544e-05, + "loss": 0.1767, + "step": 707200 + }, + { + "epoch": 3.98, + "learning_rate": 1.0255709764639088e-05, + "loss": 0.1822, + "step": 707300 + }, + { + "epoch": 3.98, + "learning_rate": 1.0250088534376635e-05, + "loss": 0.1804, + "step": 707400 + }, + { + "epoch": 3.98, + "learning_rate": 1.0244467304114179e-05, + "loss": 0.1755, + "step": 707500 + }, + { + "epoch": 3.98, + "learning_rate": 1.0238846073851724e-05, + "loss": 0.1846, + "step": 707600 + }, + { + "epoch": 3.98, + "learning_rate": 1.0233224843589268e-05, + "loss": 0.1714, + "step": 707700 + }, + { + "epoch": 3.98, + "learning_rate": 1.0227603613326815e-05, + "loss": 0.1839, + "step": 707800 + }, + { + "epoch": 3.98, + "learning_rate": 1.0221982383064357e-05, + "loss": 0.1742, + "step": 707900 + }, + { + "epoch": 3.98, + "learning_rate": 1.0216361152801902e-05, + "loss": 0.1775, + "step": 708000 + }, + { + "epoch": 3.98, + "learning_rate": 1.0210739922539447e-05, + "loss": 0.1795, + "step": 708100 + }, + { + "epoch": 3.98, + "learning_rate": 1.0205118692276993e-05, + "loss": 0.1772, + "step": 708200 + }, + { + "epoch": 3.98, + "learning_rate": 1.0199497462014537e-05, + "loss": 0.1768, + "step": 708300 + }, + { + "epoch": 3.98, + "learning_rate": 1.0193876231752082e-05, + "loss": 0.1728, + "step": 708400 + }, + { + "epoch": 3.98, + "learning_rate": 1.0188255001489627e-05, + "loss": 0.1808, + "step": 708500 + }, + { + "epoch": 3.98, + "learning_rate": 1.0182633771227171e-05, + "loss": 0.1781, + "step": 708600 + }, + { + "epoch": 3.98, + "learning_rate": 1.0177012540964716e-05, + "loss": 0.1791, + "step": 708700 + }, + { + "epoch": 3.98, + "learning_rate": 1.017139131070226e-05, + "loss": 0.1775, + "step": 708800 + }, + { + "epoch": 3.98, + "learning_rate": 1.0165770080439805e-05, + "loss": 0.1731, + "step": 708900 + }, + { + "epoch": 3.99, + "learning_rate": 1.0160148850177351e-05, + "loss": 0.1777, + "step": 709000 + }, + { + "epoch": 3.99, + "learning_rate": 1.0154583832217519e-05, + "loss": 0.1798, + "step": 709100 + }, + { + "epoch": 3.99, + "learning_rate": 1.0148962601955065e-05, + "loss": 0.1808, + "step": 709200 + }, + { + "epoch": 3.99, + "learning_rate": 1.014334137169261e-05, + "loss": 0.1773, + "step": 709300 + }, + { + "epoch": 3.99, + "learning_rate": 1.0137720141430154e-05, + "loss": 0.1758, + "step": 709400 + }, + { + "epoch": 3.99, + "learning_rate": 1.0132098911167699e-05, + "loss": 0.1788, + "step": 709500 + }, + { + "epoch": 3.99, + "learning_rate": 1.0126477680905243e-05, + "loss": 0.1761, + "step": 709600 + }, + { + "epoch": 3.99, + "learning_rate": 1.0120856450642788e-05, + "loss": 0.1781, + "step": 709700 + }, + { + "epoch": 3.99, + "learning_rate": 1.0115235220380333e-05, + "loss": 0.1767, + "step": 709800 + }, + { + "epoch": 3.99, + "learning_rate": 1.0109613990117877e-05, + "loss": 0.1795, + "step": 709900 + }, + { + "epoch": 3.99, + "learning_rate": 1.0103992759855423e-05, + "loss": 0.1751, + "step": 710000 + }, + { + "epoch": 3.99, + "learning_rate": 1.0098371529592968e-05, + "loss": 0.1771, + "step": 710100 + }, + { + "epoch": 3.99, + "learning_rate": 1.0092750299330513e-05, + "loss": 0.1807, + "step": 710200 + }, + { + "epoch": 3.99, + "learning_rate": 1.0087129069068056e-05, + "loss": 0.1744, + "step": 710300 + }, + { + "epoch": 3.99, + "learning_rate": 1.0081507838805602e-05, + "loss": 0.1836, + "step": 710400 + }, + { + "epoch": 3.99, + "learning_rate": 1.0075886608543146e-05, + "loss": 0.1787, + "step": 710500 + }, + { + "epoch": 3.99, + "learning_rate": 1.0070265378280691e-05, + "loss": 0.1766, + "step": 710600 + }, + { + "epoch": 4.0, + "learning_rate": 1.0064644148018236e-05, + "loss": 0.1799, + "step": 710700 + }, + { + "epoch": 4.0, + "learning_rate": 1.005902291775578e-05, + "loss": 0.1761, + "step": 710800 + }, + { + "epoch": 4.0, + "learning_rate": 1.0053401687493326e-05, + "loss": 0.1719, + "step": 710900 + }, + { + "epoch": 4.0, + "learning_rate": 1.004778045723087e-05, + "loss": 0.1796, + "step": 711000 + }, + { + "epoch": 4.0, + "learning_rate": 1.0042159226968414e-05, + "loss": 0.1852, + "step": 711100 + }, + { + "epoch": 4.0, + "learning_rate": 1.003653799670596e-05, + "loss": 0.1778, + "step": 711200 + }, + { + "epoch": 4.0, + "learning_rate": 1.0030916766443505e-05, + "loss": 0.1793, + "step": 711300 + }, + { + "epoch": 4.0, + "learning_rate": 1.002529553618105e-05, + "loss": 0.175, + "step": 711400 + }, + { + "epoch": 4.0, + "learning_rate": 1.0019674305918594e-05, + "loss": 0.1753, + "step": 711500 + }, + { + "epoch": 4.0, + "eval_bleu": 77.928, + "eval_cer": 2.2105, + "eval_chrF": 95.62967894411643, + "eval_gen_len": 16.776484, + "eval_loss": 0.4870626628398895, + "eval_runtime": 7213.9111, + "eval_samples_per_second": 34.655, + "eval_steps_per_second": 0.542, + "eval_wer": 12.3386, + "step": 711588 + }, + { + "epoch": 4.0, + "learning_rate": 1.0014053075656139e-05, + "loss": 0.1735, + "step": 711600 + }, + { + "epoch": 4.0, + "learning_rate": 1.0008431845393683e-05, + "loss": 0.1625, + "step": 711700 + }, + { + "epoch": 4.0, + "learning_rate": 1.0002810615131228e-05, + "loss": 0.168, + "step": 711800 + }, + { + "epoch": 4.0, + "learning_rate": 9.997189384868772e-06, + "loss": 0.1698, + "step": 711900 + }, + { + "epoch": 4.0, + "learning_rate": 9.991568154606319e-06, + "loss": 0.1631, + "step": 712000 + }, + { + "epoch": 4.0, + "learning_rate": 9.986003136646486e-06, + "loss": 0.1596, + "step": 712100 + }, + { + "epoch": 4.0, + "learning_rate": 9.98038190638403e-06, + "loss": 0.1676, + "step": 712200 + }, + { + "epoch": 4.0, + "learning_rate": 9.974760676121577e-06, + "loss": 0.1691, + "step": 712300 + }, + { + "epoch": 4.0, + "learning_rate": 9.969139445859122e-06, + "loss": 0.1642, + "step": 712400 + }, + { + "epoch": 4.01, + "learning_rate": 9.963518215596666e-06, + "loss": 0.1688, + "step": 712500 + }, + { + "epoch": 4.01, + "learning_rate": 9.95789698533421e-06, + "loss": 0.1615, + "step": 712600 + }, + { + "epoch": 4.01, + "learning_rate": 9.95233196737438e-06, + "loss": 0.1666, + "step": 712700 + }, + { + "epoch": 4.01, + "learning_rate": 9.946710737111925e-06, + "loss": 0.1659, + "step": 712800 + }, + { + "epoch": 4.01, + "learning_rate": 9.94108950684947e-06, + "loss": 0.1666, + "step": 712900 + }, + { + "epoch": 4.01, + "learning_rate": 9.935468276587016e-06, + "loss": 0.1639, + "step": 713000 + }, + { + "epoch": 4.01, + "learning_rate": 9.929847046324558e-06, + "loss": 0.1629, + "step": 713100 + }, + { + "epoch": 4.01, + "learning_rate": 9.924225816062103e-06, + "loss": 0.1649, + "step": 713200 + }, + { + "epoch": 4.01, + "learning_rate": 9.91860458579965e-06, + "loss": 0.169, + "step": 713300 + }, + { + "epoch": 4.01, + "learning_rate": 9.912983355537194e-06, + "loss": 0.1647, + "step": 713400 + }, + { + "epoch": 4.01, + "learning_rate": 9.907362125274738e-06, + "loss": 0.1611, + "step": 713500 + }, + { + "epoch": 4.01, + "learning_rate": 9.901740895012283e-06, + "loss": 0.1658, + "step": 713600 + }, + { + "epoch": 4.01, + "learning_rate": 9.896119664749828e-06, + "loss": 0.1648, + "step": 713700 + }, + { + "epoch": 4.01, + "learning_rate": 9.890498434487372e-06, + "loss": 0.166, + "step": 713800 + }, + { + "epoch": 4.01, + "learning_rate": 9.884877204224917e-06, + "loss": 0.163, + "step": 713900 + }, + { + "epoch": 4.01, + "learning_rate": 9.879255973962461e-06, + "loss": 0.1651, + "step": 714000 + }, + { + "epoch": 4.01, + "learning_rate": 9.873634743700006e-06, + "loss": 0.1687, + "step": 714100 + }, + { + "epoch": 4.01, + "learning_rate": 9.868013513437552e-06, + "loss": 0.1703, + "step": 714200 + }, + { + "epoch": 4.02, + "learning_rate": 9.862392283175097e-06, + "loss": 0.1609, + "step": 714300 + }, + { + "epoch": 4.02, + "learning_rate": 9.85677105291264e-06, + "loss": 0.1659, + "step": 714400 + }, + { + "epoch": 4.02, + "learning_rate": 9.851149822650186e-06, + "loss": 0.1691, + "step": 714500 + }, + { + "epoch": 4.02, + "learning_rate": 9.84552859238773e-06, + "loss": 0.1606, + "step": 714600 + }, + { + "epoch": 4.02, + "learning_rate": 9.839907362125275e-06, + "loss": 0.1634, + "step": 714700 + }, + { + "epoch": 4.02, + "learning_rate": 9.83428613186282e-06, + "loss": 0.1637, + "step": 714800 + }, + { + "epoch": 4.02, + "learning_rate": 9.828664901600364e-06, + "loss": 0.1639, + "step": 714900 + }, + { + "epoch": 4.02, + "learning_rate": 9.82304367133791e-06, + "loss": 0.1661, + "step": 715000 + }, + { + "epoch": 4.02, + "learning_rate": 9.817422441075453e-06, + "loss": 0.17, + "step": 715100 + }, + { + "epoch": 4.02, + "learning_rate": 9.811801210812998e-06, + "loss": 0.1704, + "step": 715200 + }, + { + "epoch": 4.02, + "learning_rate": 9.806179980550544e-06, + "loss": 0.164, + "step": 715300 + }, + { + "epoch": 4.02, + "learning_rate": 9.800558750288089e-06, + "loss": 0.1579, + "step": 715400 + }, + { + "epoch": 4.02, + "learning_rate": 9.794937520025634e-06, + "loss": 0.1713, + "step": 715500 + }, + { + "epoch": 4.02, + "learning_rate": 9.789316289763178e-06, + "loss": 0.1653, + "step": 715600 + }, + { + "epoch": 4.02, + "learning_rate": 9.783695059500723e-06, + "loss": 0.1675, + "step": 715700 + }, + { + "epoch": 4.02, + "learning_rate": 9.778073829238267e-06, + "loss": 0.1704, + "step": 715800 + }, + { + "epoch": 4.02, + "learning_rate": 9.772452598975812e-06, + "loss": 0.1669, + "step": 715900 + }, + { + "epoch": 4.02, + "learning_rate": 9.766831368713356e-06, + "loss": 0.1623, + "step": 716000 + }, + { + "epoch": 4.03, + "learning_rate": 9.761210138450903e-06, + "loss": 0.1669, + "step": 716100 + }, + { + "epoch": 4.03, + "learning_rate": 9.755588908188447e-06, + "loss": 0.1667, + "step": 716200 + }, + { + "epoch": 4.03, + "learning_rate": 9.749967677925992e-06, + "loss": 0.1597, + "step": 716300 + }, + { + "epoch": 4.03, + "learning_rate": 9.744346447663535e-06, + "loss": 0.1642, + "step": 716400 + }, + { + "epoch": 4.03, + "learning_rate": 9.738725217401081e-06, + "loss": 0.1666, + "step": 716500 + }, + { + "epoch": 4.03, + "learning_rate": 9.733103987138626e-06, + "loss": 0.1682, + "step": 716600 + }, + { + "epoch": 4.03, + "learning_rate": 9.727538969178795e-06, + "loss": 0.1664, + "step": 716700 + }, + { + "epoch": 4.03, + "learning_rate": 9.72191773891634e-06, + "loss": 0.1618, + "step": 716800 + }, + { + "epoch": 4.03, + "learning_rate": 9.716296508653884e-06, + "loss": 0.1668, + "step": 716900 + }, + { + "epoch": 4.03, + "learning_rate": 9.710675278391429e-06, + "loss": 0.1688, + "step": 717000 + }, + { + "epoch": 4.03, + "learning_rate": 9.705054048128973e-06, + "loss": 0.1648, + "step": 717100 + }, + { + "epoch": 4.03, + "learning_rate": 9.69943281786652e-06, + "loss": 0.1643, + "step": 717200 + }, + { + "epoch": 4.03, + "learning_rate": 9.693811587604064e-06, + "loss": 0.1691, + "step": 717300 + }, + { + "epoch": 4.03, + "learning_rate": 9.688190357341609e-06, + "loss": 0.1627, + "step": 717400 + }, + { + "epoch": 4.03, + "learning_rate": 9.682569127079152e-06, + "loss": 0.1597, + "step": 717500 + }, + { + "epoch": 4.03, + "learning_rate": 9.676947896816698e-06, + "loss": 0.1608, + "step": 717600 + }, + { + "epoch": 4.03, + "learning_rate": 9.671326666554242e-06, + "loss": 0.1626, + "step": 717700 + }, + { + "epoch": 4.03, + "learning_rate": 9.665705436291787e-06, + "loss": 0.1718, + "step": 717800 + }, + { + "epoch": 4.04, + "learning_rate": 9.660084206029332e-06, + "loss": 0.1598, + "step": 717900 + }, + { + "epoch": 4.04, + "learning_rate": 9.654462975766878e-06, + "loss": 0.1651, + "step": 718000 + }, + { + "epoch": 4.04, + "learning_rate": 9.648841745504422e-06, + "loss": 0.1629, + "step": 718100 + }, + { + "epoch": 4.04, + "learning_rate": 9.643220515241965e-06, + "loss": 0.1669, + "step": 718200 + }, + { + "epoch": 4.04, + "learning_rate": 9.63759928497951e-06, + "loss": 0.1638, + "step": 718300 + }, + { + "epoch": 4.04, + "learning_rate": 9.631978054717056e-06, + "loss": 0.1707, + "step": 718400 + }, + { + "epoch": 4.04, + "learning_rate": 9.6263568244546e-06, + "loss": 0.1637, + "step": 718500 + }, + { + "epoch": 4.04, + "learning_rate": 9.620735594192145e-06, + "loss": 0.1626, + "step": 718600 + }, + { + "epoch": 4.04, + "learning_rate": 9.61511436392969e-06, + "loss": 0.1628, + "step": 718700 + }, + { + "epoch": 4.04, + "learning_rate": 9.609493133667235e-06, + "loss": 0.1616, + "step": 718800 + }, + { + "epoch": 4.04, + "learning_rate": 9.60387190340478e-06, + "loss": 0.1713, + "step": 718900 + }, + { + "epoch": 4.04, + "learning_rate": 9.598250673142324e-06, + "loss": 0.1642, + "step": 719000 + }, + { + "epoch": 4.04, + "learning_rate": 9.592685655182495e-06, + "loss": 0.1652, + "step": 719100 + }, + { + "epoch": 4.04, + "learning_rate": 9.587064424920038e-06, + "loss": 0.1651, + "step": 719200 + }, + { + "epoch": 4.04, + "learning_rate": 9.581443194657582e-06, + "loss": 0.165, + "step": 719300 + }, + { + "epoch": 4.04, + "learning_rate": 9.575821964395129e-06, + "loss": 0.163, + "step": 719400 + }, + { + "epoch": 4.04, + "learning_rate": 9.570200734132673e-06, + "loss": 0.1638, + "step": 719500 + }, + { + "epoch": 4.05, + "learning_rate": 9.564579503870218e-06, + "loss": 0.1652, + "step": 719600 + }, + { + "epoch": 4.05, + "learning_rate": 9.558958273607762e-06, + "loss": 0.1597, + "step": 719700 + }, + { + "epoch": 4.05, + "learning_rate": 9.553337043345307e-06, + "loss": 0.166, + "step": 719800 + }, + { + "epoch": 4.05, + "learning_rate": 9.547715813082851e-06, + "loss": 0.1654, + "step": 719900 + }, + { + "epoch": 4.05, + "learning_rate": 9.542094582820396e-06, + "loss": 0.1678, + "step": 720000 + }, + { + "epoch": 4.05, + "learning_rate": 9.53647335255794e-06, + "loss": 0.1658, + "step": 720100 + }, + { + "epoch": 4.05, + "learning_rate": 9.530852122295487e-06, + "loss": 0.1642, + "step": 720200 + }, + { + "epoch": 4.05, + "learning_rate": 9.525230892033031e-06, + "loss": 0.1699, + "step": 720300 + }, + { + "epoch": 4.05, + "learning_rate": 9.519609661770576e-06, + "loss": 0.1604, + "step": 720400 + }, + { + "epoch": 4.05, + "learning_rate": 9.51398843150812e-06, + "loss": 0.1614, + "step": 720500 + }, + { + "epoch": 4.05, + "learning_rate": 9.508367201245665e-06, + "loss": 0.166, + "step": 720600 + }, + { + "epoch": 4.05, + "learning_rate": 9.50274597098321e-06, + "loss": 0.1673, + "step": 720700 + }, + { + "epoch": 4.05, + "learning_rate": 9.497124740720754e-06, + "loss": 0.1651, + "step": 720800 + }, + { + "epoch": 4.05, + "learning_rate": 9.491559722760925e-06, + "loss": 0.1638, + "step": 720900 + }, + { + "epoch": 4.05, + "learning_rate": 9.485938492498468e-06, + "loss": 0.1696, + "step": 721000 + }, + { + "epoch": 4.05, + "learning_rate": 9.480317262236013e-06, + "loss": 0.1672, + "step": 721100 + }, + { + "epoch": 4.05, + "learning_rate": 9.474696031973557e-06, + "loss": 0.166, + "step": 721200 + }, + { + "epoch": 4.05, + "learning_rate": 9.469074801711104e-06, + "loss": 0.1672, + "step": 721300 + }, + { + "epoch": 4.06, + "learning_rate": 9.463453571448648e-06, + "loss": 0.161, + "step": 721400 + }, + { + "epoch": 4.06, + "learning_rate": 9.457832341186193e-06, + "loss": 0.1661, + "step": 721500 + }, + { + "epoch": 4.06, + "learning_rate": 9.452211110923736e-06, + "loss": 0.1692, + "step": 721600 + }, + { + "epoch": 4.06, + "learning_rate": 9.446589880661282e-06, + "loss": 0.1661, + "step": 721700 + }, + { + "epoch": 4.06, + "learning_rate": 9.440968650398827e-06, + "loss": 0.1633, + "step": 721800 + }, + { + "epoch": 4.06, + "learning_rate": 9.435347420136371e-06, + "loss": 0.1611, + "step": 721900 + }, + { + "epoch": 4.06, + "learning_rate": 9.429726189873916e-06, + "loss": 0.1684, + "step": 722000 + }, + { + "epoch": 4.06, + "learning_rate": 9.424161171914085e-06, + "loss": 0.161, + "step": 722100 + }, + { + "epoch": 4.06, + "learning_rate": 9.41853994165163e-06, + "loss": 0.1688, + "step": 722200 + }, + { + "epoch": 4.06, + "learning_rate": 9.4129749236918e-06, + "loss": 0.1684, + "step": 722300 + }, + { + "epoch": 4.06, + "learning_rate": 9.407353693429344e-06, + "loss": 0.164, + "step": 722400 + }, + { + "epoch": 4.06, + "learning_rate": 9.401732463166888e-06, + "loss": 0.1635, + "step": 722500 + }, + { + "epoch": 4.06, + "learning_rate": 9.396111232904434e-06, + "loss": 0.1625, + "step": 722600 + }, + { + "epoch": 4.06, + "learning_rate": 9.390490002641979e-06, + "loss": 0.1628, + "step": 722700 + }, + { + "epoch": 4.06, + "learning_rate": 9.384868772379524e-06, + "loss": 0.1659, + "step": 722800 + }, + { + "epoch": 4.06, + "learning_rate": 9.379247542117068e-06, + "loss": 0.1671, + "step": 722900 + }, + { + "epoch": 4.06, + "learning_rate": 9.373626311854613e-06, + "loss": 0.1627, + "step": 723000 + }, + { + "epoch": 4.06, + "learning_rate": 9.368005081592157e-06, + "loss": 0.1653, + "step": 723100 + }, + { + "epoch": 4.07, + "learning_rate": 9.362383851329702e-06, + "loss": 0.1625, + "step": 723200 + }, + { + "epoch": 4.07, + "learning_rate": 9.356762621067247e-06, + "loss": 0.158, + "step": 723300 + }, + { + "epoch": 4.07, + "learning_rate": 9.351141390804793e-06, + "loss": 0.166, + "step": 723400 + }, + { + "epoch": 4.07, + "learning_rate": 9.345520160542337e-06, + "loss": 0.1643, + "step": 723500 + }, + { + "epoch": 4.07, + "learning_rate": 9.339898930279882e-06, + "loss": 0.1663, + "step": 723600 + }, + { + "epoch": 4.07, + "learning_rate": 9.334277700017425e-06, + "loss": 0.1676, + "step": 723700 + }, + { + "epoch": 4.07, + "learning_rate": 9.328656469754971e-06, + "loss": 0.1643, + "step": 723800 + }, + { + "epoch": 4.07, + "learning_rate": 9.323035239492516e-06, + "loss": 0.1639, + "step": 723900 + }, + { + "epoch": 4.07, + "learning_rate": 9.31741400923006e-06, + "loss": 0.1651, + "step": 724000 + }, + { + "epoch": 4.07, + "learning_rate": 9.311792778967605e-06, + "loss": 0.1682, + "step": 724100 + }, + { + "epoch": 4.07, + "learning_rate": 9.306171548705151e-06, + "loss": 0.1593, + "step": 724200 + }, + { + "epoch": 4.07, + "learning_rate": 9.300550318442696e-06, + "loss": 0.1701, + "step": 724300 + }, + { + "epoch": 4.07, + "learning_rate": 9.294929088180239e-06, + "loss": 0.1667, + "step": 724400 + }, + { + "epoch": 4.07, + "learning_rate": 9.289307857917783e-06, + "loss": 0.1645, + "step": 724500 + }, + { + "epoch": 4.07, + "learning_rate": 9.28368662765533e-06, + "loss": 0.1664, + "step": 724600 + }, + { + "epoch": 4.07, + "learning_rate": 9.278065397392874e-06, + "loss": 0.1679, + "step": 724700 + }, + { + "epoch": 4.07, + "learning_rate": 9.272444167130419e-06, + "loss": 0.167, + "step": 724800 + }, + { + "epoch": 4.07, + "learning_rate": 9.266822936867963e-06, + "loss": 0.1644, + "step": 724900 + }, + { + "epoch": 4.08, + "learning_rate": 9.26120170660551e-06, + "loss": 0.1659, + "step": 725000 + }, + { + "epoch": 4.08, + "learning_rate": 9.255580476343052e-06, + "loss": 0.1619, + "step": 725100 + }, + { + "epoch": 4.08, + "learning_rate": 9.249959246080597e-06, + "loss": 0.1639, + "step": 725200 + }, + { + "epoch": 4.08, + "learning_rate": 9.244338015818142e-06, + "loss": 0.1645, + "step": 725300 + }, + { + "epoch": 4.08, + "learning_rate": 9.238716785555688e-06, + "loss": 0.174, + "step": 725400 + }, + { + "epoch": 4.08, + "learning_rate": 9.233095555293232e-06, + "loss": 0.1677, + "step": 725500 + }, + { + "epoch": 4.08, + "learning_rate": 9.227474325030777e-06, + "loss": 0.1653, + "step": 725600 + }, + { + "epoch": 4.08, + "learning_rate": 9.221853094768322e-06, + "loss": 0.1643, + "step": 725700 + }, + { + "epoch": 4.08, + "learning_rate": 9.216231864505866e-06, + "loss": 0.1669, + "step": 725800 + }, + { + "epoch": 4.08, + "learning_rate": 9.21061063424341e-06, + "loss": 0.1629, + "step": 725900 + }, + { + "epoch": 4.08, + "learning_rate": 9.204989403980955e-06, + "loss": 0.1626, + "step": 726000 + }, + { + "epoch": 4.08, + "learning_rate": 9.1993681737185e-06, + "loss": 0.1652, + "step": 726100 + }, + { + "epoch": 4.08, + "learning_rate": 9.193746943456046e-06, + "loss": 0.1628, + "step": 726200 + }, + { + "epoch": 4.08, + "learning_rate": 9.18812571319359e-06, + "loss": 0.1673, + "step": 726300 + }, + { + "epoch": 4.08, + "learning_rate": 9.182504482931134e-06, + "loss": 0.1612, + "step": 726400 + }, + { + "epoch": 4.08, + "learning_rate": 9.176883252668678e-06, + "loss": 0.1706, + "step": 726500 + }, + { + "epoch": 4.08, + "learning_rate": 9.171262022406225e-06, + "loss": 0.1701, + "step": 726600 + }, + { + "epoch": 4.08, + "learning_rate": 9.165640792143769e-06, + "loss": 0.1678, + "step": 726700 + }, + { + "epoch": 4.09, + "learning_rate": 9.160019561881314e-06, + "loss": 0.1643, + "step": 726800 + }, + { + "epoch": 4.09, + "learning_rate": 9.154398331618858e-06, + "loss": 0.1623, + "step": 726900 + }, + { + "epoch": 4.09, + "learning_rate": 9.148777101356405e-06, + "loss": 0.1665, + "step": 727000 + }, + { + "epoch": 4.09, + "learning_rate": 9.143155871093947e-06, + "loss": 0.1602, + "step": 727100 + }, + { + "epoch": 4.09, + "learning_rate": 9.137534640831492e-06, + "loss": 0.1647, + "step": 727200 + }, + { + "epoch": 4.09, + "learning_rate": 9.131913410569037e-06, + "loss": 0.1681, + "step": 727300 + }, + { + "epoch": 4.09, + "learning_rate": 9.126292180306583e-06, + "loss": 0.1651, + "step": 727400 + }, + { + "epoch": 4.09, + "learning_rate": 9.120670950044128e-06, + "loss": 0.1651, + "step": 727500 + }, + { + "epoch": 4.09, + "learning_rate": 9.115049719781672e-06, + "loss": 0.1653, + "step": 727600 + }, + { + "epoch": 4.09, + "learning_rate": 9.109428489519217e-06, + "loss": 0.1644, + "step": 727700 + }, + { + "epoch": 4.09, + "learning_rate": 9.103807259256761e-06, + "loss": 0.1649, + "step": 727800 + }, + { + "epoch": 4.09, + "learning_rate": 9.098186028994306e-06, + "loss": 0.1666, + "step": 727900 + }, + { + "epoch": 4.09, + "learning_rate": 9.09256479873185e-06, + "loss": 0.1678, + "step": 728000 + }, + { + "epoch": 4.09, + "learning_rate": 9.086943568469395e-06, + "loss": 0.1622, + "step": 728100 + }, + { + "epoch": 4.09, + "learning_rate": 9.081322338206941e-06, + "loss": 0.1643, + "step": 728200 + }, + { + "epoch": 4.09, + "learning_rate": 9.075701107944486e-06, + "loss": 0.1699, + "step": 728300 + }, + { + "epoch": 4.09, + "learning_rate": 9.07007987768203e-06, + "loss": 0.1691, + "step": 728400 + }, + { + "epoch": 4.1, + "learning_rate": 9.064458647419573e-06, + "loss": 0.1625, + "step": 728500 + }, + { + "epoch": 4.1, + "learning_rate": 9.05883741715712e-06, + "loss": 0.166, + "step": 728600 + }, + { + "epoch": 4.1, + "learning_rate": 9.053216186894664e-06, + "loss": 0.1658, + "step": 728700 + }, + { + "epoch": 4.1, + "learning_rate": 9.047594956632209e-06, + "loss": 0.1652, + "step": 728800 + }, + { + "epoch": 4.1, + "learning_rate": 9.041973726369753e-06, + "loss": 0.1645, + "step": 728900 + }, + { + "epoch": 4.1, + "learning_rate": 9.0363524961073e-06, + "loss": 0.1636, + "step": 729000 + }, + { + "epoch": 4.1, + "learning_rate": 9.030731265844843e-06, + "loss": 0.1635, + "step": 729100 + }, + { + "epoch": 4.1, + "learning_rate": 9.025110035582387e-06, + "loss": 0.1623, + "step": 729200 + }, + { + "epoch": 4.1, + "learning_rate": 9.019488805319932e-06, + "loss": 0.1662, + "step": 729300 + }, + { + "epoch": 4.1, + "learning_rate": 9.013867575057478e-06, + "loss": 0.1657, + "step": 729400 + }, + { + "epoch": 4.1, + "learning_rate": 9.008246344795023e-06, + "loss": 0.1618, + "step": 729500 + }, + { + "epoch": 4.1, + "learning_rate": 9.002681326835192e-06, + "loss": 0.164, + "step": 729600 + }, + { + "epoch": 4.1, + "learning_rate": 8.997116308875361e-06, + "loss": 0.1646, + "step": 729700 + }, + { + "epoch": 4.1, + "learning_rate": 8.991495078612906e-06, + "loss": 0.1628, + "step": 729800 + }, + { + "epoch": 4.1, + "learning_rate": 8.98587384835045e-06, + "loss": 0.1653, + "step": 729900 + }, + { + "epoch": 4.1, + "learning_rate": 8.980252618087995e-06, + "loss": 0.1619, + "step": 730000 + }, + { + "epoch": 4.1, + "learning_rate": 8.97463138782554e-06, + "loss": 0.164, + "step": 730100 + }, + { + "epoch": 4.1, + "learning_rate": 8.969010157563084e-06, + "loss": 0.1654, + "step": 730200 + }, + { + "epoch": 4.11, + "learning_rate": 8.96338892730063e-06, + "loss": 0.1656, + "step": 730300 + }, + { + "epoch": 4.11, + "learning_rate": 8.957767697038175e-06, + "loss": 0.166, + "step": 730400 + }, + { + "epoch": 4.11, + "learning_rate": 8.95214646677572e-06, + "loss": 0.1669, + "step": 730500 + }, + { + "epoch": 4.11, + "learning_rate": 8.946525236513262e-06, + "loss": 0.1641, + "step": 730600 + }, + { + "epoch": 4.11, + "learning_rate": 8.940904006250809e-06, + "loss": 0.1675, + "step": 730700 + }, + { + "epoch": 4.11, + "learning_rate": 8.935282775988353e-06, + "loss": 0.1595, + "step": 730800 + }, + { + "epoch": 4.11, + "learning_rate": 8.929661545725898e-06, + "loss": 0.1701, + "step": 730900 + }, + { + "epoch": 4.11, + "learning_rate": 8.924040315463442e-06, + "loss": 0.1604, + "step": 731000 + }, + { + "epoch": 4.11, + "learning_rate": 8.918419085200989e-06, + "loss": 0.1678, + "step": 731100 + }, + { + "epoch": 4.11, + "learning_rate": 8.912797854938532e-06, + "loss": 0.1666, + "step": 731200 + }, + { + "epoch": 4.11, + "learning_rate": 8.907176624676076e-06, + "loss": 0.1615, + "step": 731300 + }, + { + "epoch": 4.11, + "learning_rate": 8.90155539441362e-06, + "loss": 0.1682, + "step": 731400 + }, + { + "epoch": 4.11, + "learning_rate": 8.895934164151167e-06, + "loss": 0.1646, + "step": 731500 + }, + { + "epoch": 4.11, + "learning_rate": 8.890312933888712e-06, + "loss": 0.162, + "step": 731600 + }, + { + "epoch": 4.11, + "learning_rate": 8.884691703626256e-06, + "loss": 0.1649, + "step": 731700 + }, + { + "epoch": 4.11, + "learning_rate": 8.8790704733638e-06, + "loss": 0.1664, + "step": 731800 + }, + { + "epoch": 4.11, + "learning_rate": 8.873449243101345e-06, + "loss": 0.1665, + "step": 731900 + }, + { + "epoch": 4.11, + "learning_rate": 8.86782801283889e-06, + "loss": 0.1691, + "step": 732000 + }, + { + "epoch": 4.12, + "learning_rate": 8.862206782576435e-06, + "loss": 0.1638, + "step": 732100 + }, + { + "epoch": 4.12, + "learning_rate": 8.85658555231398e-06, + "loss": 0.1667, + "step": 732200 + }, + { + "epoch": 4.12, + "learning_rate": 8.850964322051525e-06, + "loss": 0.1609, + "step": 732300 + }, + { + "epoch": 4.12, + "learning_rate": 8.84534309178907e-06, + "loss": 0.1634, + "step": 732400 + }, + { + "epoch": 4.12, + "learning_rate": 8.839721861526615e-06, + "loss": 0.1651, + "step": 732500 + }, + { + "epoch": 4.12, + "learning_rate": 8.834100631264158e-06, + "loss": 0.1678, + "step": 732600 + }, + { + "epoch": 4.12, + "learning_rate": 8.828479401001704e-06, + "loss": 0.1704, + "step": 732700 + }, + { + "epoch": 4.12, + "learning_rate": 8.822858170739248e-06, + "loss": 0.1636, + "step": 732800 + }, + { + "epoch": 4.12, + "learning_rate": 8.817236940476793e-06, + "loss": 0.1708, + "step": 732900 + }, + { + "epoch": 4.12, + "learning_rate": 8.811615710214338e-06, + "loss": 0.1621, + "step": 733000 + }, + { + "epoch": 4.12, + "learning_rate": 8.805994479951884e-06, + "loss": 0.17, + "step": 733100 + }, + { + "epoch": 4.12, + "learning_rate": 8.800373249689428e-06, + "loss": 0.1684, + "step": 733200 + }, + { + "epoch": 4.12, + "learning_rate": 8.794752019426971e-06, + "loss": 0.1601, + "step": 733300 + }, + { + "epoch": 4.12, + "learning_rate": 8.789130789164516e-06, + "loss": 0.169, + "step": 733400 + }, + { + "epoch": 4.12, + "learning_rate": 8.783509558902062e-06, + "loss": 0.1656, + "step": 733500 + }, + { + "epoch": 4.12, + "learning_rate": 8.777888328639607e-06, + "loss": 0.1671, + "step": 733600 + }, + { + "epoch": 4.12, + "learning_rate": 8.772267098377151e-06, + "loss": 0.1682, + "step": 733700 + }, + { + "epoch": 4.12, + "learning_rate": 8.766645868114696e-06, + "loss": 0.1657, + "step": 733800 + }, + { + "epoch": 4.13, + "learning_rate": 8.76102463785224e-06, + "loss": 0.1661, + "step": 733900 + }, + { + "epoch": 4.13, + "learning_rate": 8.755403407589785e-06, + "loss": 0.1622, + "step": 734000 + }, + { + "epoch": 4.13, + "learning_rate": 8.74978217732733e-06, + "loss": 0.1678, + "step": 734100 + }, + { + "epoch": 4.13, + "learning_rate": 8.744160947064874e-06, + "loss": 0.1639, + "step": 734200 + }, + { + "epoch": 4.13, + "learning_rate": 8.73853971680242e-06, + "loss": 0.1655, + "step": 734300 + }, + { + "epoch": 4.13, + "learning_rate": 8.732918486539965e-06, + "loss": 0.1651, + "step": 734400 + }, + { + "epoch": 4.13, + "learning_rate": 8.72729725627751e-06, + "loss": 0.1712, + "step": 734500 + }, + { + "epoch": 4.13, + "learning_rate": 8.721676026015054e-06, + "loss": 0.1676, + "step": 734600 + }, + { + "epoch": 4.13, + "learning_rate": 8.716054795752599e-06, + "loss": 0.1637, + "step": 734700 + }, + { + "epoch": 4.13, + "learning_rate": 8.710433565490143e-06, + "loss": 0.1689, + "step": 734800 + }, + { + "epoch": 4.13, + "learning_rate": 8.704812335227688e-06, + "loss": 0.1701, + "step": 734900 + }, + { + "epoch": 4.13, + "learning_rate": 8.699191104965233e-06, + "loss": 0.1683, + "step": 735000 + }, + { + "epoch": 4.13, + "learning_rate": 8.693569874702779e-06, + "loss": 0.1639, + "step": 735100 + }, + { + "epoch": 4.13, + "learning_rate": 8.687948644440323e-06, + "loss": 0.171, + "step": 735200 + }, + { + "epoch": 4.13, + "learning_rate": 8.682383626480493e-06, + "loss": 0.1678, + "step": 735300 + }, + { + "epoch": 4.13, + "learning_rate": 8.676762396218037e-06, + "loss": 0.1673, + "step": 735400 + }, + { + "epoch": 4.13, + "learning_rate": 8.671141165955582e-06, + "loss": 0.162, + "step": 735500 + }, + { + "epoch": 4.13, + "learning_rate": 8.665519935693127e-06, + "loss": 0.1668, + "step": 735600 + }, + { + "epoch": 4.14, + "learning_rate": 8.659898705430671e-06, + "loss": 0.1612, + "step": 735700 + }, + { + "epoch": 4.14, + "learning_rate": 8.654277475168216e-06, + "loss": 0.1662, + "step": 735800 + }, + { + "epoch": 4.14, + "learning_rate": 8.64865624490576e-06, + "loss": 0.1671, + "step": 735900 + }, + { + "epoch": 4.14, + "learning_rate": 8.643035014643305e-06, + "loss": 0.1662, + "step": 736000 + }, + { + "epoch": 4.14, + "learning_rate": 8.637413784380851e-06, + "loss": 0.1606, + "step": 736100 + }, + { + "epoch": 4.14, + "learning_rate": 8.631792554118396e-06, + "loss": 0.1657, + "step": 736200 + }, + { + "epoch": 4.14, + "learning_rate": 8.626171323855939e-06, + "loss": 0.1652, + "step": 736300 + }, + { + "epoch": 4.14, + "learning_rate": 8.620550093593483e-06, + "loss": 0.1635, + "step": 736400 + }, + { + "epoch": 4.14, + "learning_rate": 8.61492886333103e-06, + "loss": 0.1684, + "step": 736500 + }, + { + "epoch": 4.14, + "learning_rate": 8.609307633068574e-06, + "loss": 0.1651, + "step": 736600 + }, + { + "epoch": 4.14, + "learning_rate": 8.603686402806119e-06, + "loss": 0.165, + "step": 736700 + }, + { + "epoch": 4.14, + "learning_rate": 8.598065172543663e-06, + "loss": 0.1623, + "step": 736800 + }, + { + "epoch": 4.14, + "learning_rate": 8.59244394228121e-06, + "loss": 0.1649, + "step": 736900 + }, + { + "epoch": 4.14, + "learning_rate": 8.586822712018752e-06, + "loss": 0.167, + "step": 737000 + }, + { + "epoch": 4.14, + "learning_rate": 8.581201481756297e-06, + "loss": 0.1623, + "step": 737100 + }, + { + "epoch": 4.14, + "learning_rate": 8.575580251493842e-06, + "loss": 0.1654, + "step": 737200 + }, + { + "epoch": 4.14, + "learning_rate": 8.569959021231388e-06, + "loss": 0.1638, + "step": 737300 + }, + { + "epoch": 4.15, + "learning_rate": 8.564337790968932e-06, + "loss": 0.1689, + "step": 737400 + }, + { + "epoch": 4.15, + "learning_rate": 8.558716560706477e-06, + "loss": 0.1677, + "step": 737500 + }, + { + "epoch": 4.15, + "learning_rate": 8.553095330444022e-06, + "loss": 0.1605, + "step": 737600 + }, + { + "epoch": 4.15, + "learning_rate": 8.547530312484191e-06, + "loss": 0.1614, + "step": 737700 + }, + { + "epoch": 4.15, + "learning_rate": 8.541909082221735e-06, + "loss": 0.1622, + "step": 737800 + }, + { + "epoch": 4.15, + "learning_rate": 8.53628785195928e-06, + "loss": 0.1659, + "step": 737900 + }, + { + "epoch": 4.15, + "learning_rate": 8.530666621696826e-06, + "loss": 0.1637, + "step": 738000 + }, + { + "epoch": 4.15, + "learning_rate": 8.52504539143437e-06, + "loss": 0.1633, + "step": 738100 + }, + { + "epoch": 4.15, + "learning_rate": 8.519424161171914e-06, + "loss": 0.1643, + "step": 738200 + }, + { + "epoch": 4.15, + "learning_rate": 8.513802930909458e-06, + "loss": 0.1631, + "step": 738300 + }, + { + "epoch": 4.15, + "learning_rate": 8.508181700647005e-06, + "loss": 0.1648, + "step": 738400 + }, + { + "epoch": 4.15, + "learning_rate": 8.50256047038455e-06, + "loss": 0.161, + "step": 738500 + }, + { + "epoch": 4.15, + "learning_rate": 8.496939240122094e-06, + "loss": 0.1612, + "step": 738600 + }, + { + "epoch": 4.15, + "learning_rate": 8.491318009859638e-06, + "loss": 0.1644, + "step": 738700 + }, + { + "epoch": 4.15, + "learning_rate": 8.485696779597183e-06, + "loss": 0.1675, + "step": 738800 + }, + { + "epoch": 4.15, + "learning_rate": 8.480075549334728e-06, + "loss": 0.1661, + "step": 738900 + }, + { + "epoch": 4.15, + "learning_rate": 8.474510531374897e-06, + "loss": 0.1701, + "step": 739000 + }, + { + "epoch": 4.15, + "learning_rate": 8.468889301112441e-06, + "loss": 0.1649, + "step": 739100 + }, + { + "epoch": 4.16, + "learning_rate": 8.463268070849986e-06, + "loss": 0.1655, + "step": 739200 + }, + { + "epoch": 4.16, + "learning_rate": 8.45764684058753e-06, + "loss": 0.1656, + "step": 739300 + }, + { + "epoch": 4.16, + "learning_rate": 8.452025610325077e-06, + "loss": 0.1652, + "step": 739400 + }, + { + "epoch": 4.16, + "learning_rate": 8.446404380062622e-06, + "loss": 0.1676, + "step": 739500 + }, + { + "epoch": 4.16, + "learning_rate": 8.440783149800166e-06, + "loss": 0.1661, + "step": 739600 + }, + { + "epoch": 4.16, + "learning_rate": 8.43516191953771e-06, + "loss": 0.1643, + "step": 739700 + }, + { + "epoch": 4.16, + "learning_rate": 8.429540689275255e-06, + "loss": 0.164, + "step": 739800 + }, + { + "epoch": 4.16, + "learning_rate": 8.4239194590128e-06, + "loss": 0.1641, + "step": 739900 + }, + { + "epoch": 4.16, + "learning_rate": 8.418298228750344e-06, + "loss": 0.1625, + "step": 740000 + }, + { + "epoch": 4.16, + "learning_rate": 8.412676998487889e-06, + "loss": 0.1638, + "step": 740100 + }, + { + "epoch": 4.16, + "learning_rate": 8.407055768225435e-06, + "loss": 0.1638, + "step": 740200 + }, + { + "epoch": 4.16, + "learning_rate": 8.40143453796298e-06, + "loss": 0.1614, + "step": 740300 + }, + { + "epoch": 4.16, + "learning_rate": 8.395813307700524e-06, + "loss": 0.1579, + "step": 740400 + }, + { + "epoch": 4.16, + "learning_rate": 8.390192077438067e-06, + "loss": 0.163, + "step": 740500 + }, + { + "epoch": 4.16, + "learning_rate": 8.384570847175614e-06, + "loss": 0.1622, + "step": 740600 + }, + { + "epoch": 4.16, + "learning_rate": 8.378949616913158e-06, + "loss": 0.1617, + "step": 740700 + }, + { + "epoch": 4.16, + "learning_rate": 8.373328386650703e-06, + "loss": 0.1664, + "step": 740800 + }, + { + "epoch": 4.16, + "learning_rate": 8.367707156388247e-06, + "loss": 0.1674, + "step": 740900 + }, + { + "epoch": 4.17, + "learning_rate": 8.362085926125792e-06, + "loss": 0.1687, + "step": 741000 + }, + { + "epoch": 4.17, + "learning_rate": 8.356520908165961e-06, + "loss": 0.1636, + "step": 741100 + }, + { + "epoch": 4.17, + "learning_rate": 8.350899677903506e-06, + "loss": 0.163, + "step": 741200 + }, + { + "epoch": 4.17, + "learning_rate": 8.345278447641052e-06, + "loss": 0.1642, + "step": 741300 + }, + { + "epoch": 4.17, + "learning_rate": 8.339657217378597e-06, + "loss": 0.1598, + "step": 741400 + }, + { + "epoch": 4.17, + "learning_rate": 8.33403598711614e-06, + "loss": 0.1674, + "step": 741500 + }, + { + "epoch": 4.17, + "learning_rate": 8.328414756853684e-06, + "loss": 0.165, + "step": 741600 + }, + { + "epoch": 4.17, + "learning_rate": 8.32279352659123e-06, + "loss": 0.1625, + "step": 741700 + }, + { + "epoch": 4.17, + "learning_rate": 8.3172285086314e-06, + "loss": 0.1595, + "step": 741800 + }, + { + "epoch": 4.17, + "learning_rate": 8.311607278368944e-06, + "loss": 0.1617, + "step": 741900 + }, + { + "epoch": 4.17, + "learning_rate": 8.305986048106489e-06, + "loss": 0.1617, + "step": 742000 + }, + { + "epoch": 4.17, + "learning_rate": 8.300364817844034e-06, + "loss": 0.1685, + "step": 742100 + }, + { + "epoch": 4.17, + "learning_rate": 8.294743587581578e-06, + "loss": 0.1678, + "step": 742200 + }, + { + "epoch": 4.17, + "learning_rate": 8.289122357319123e-06, + "loss": 0.1636, + "step": 742300 + }, + { + "epoch": 4.17, + "learning_rate": 8.283501127056669e-06, + "loss": 0.1666, + "step": 742400 + }, + { + "epoch": 4.17, + "learning_rate": 8.277879896794214e-06, + "loss": 0.162, + "step": 742500 + }, + { + "epoch": 4.17, + "learning_rate": 8.272258666531756e-06, + "loss": 0.1676, + "step": 742600 + }, + { + "epoch": 4.17, + "learning_rate": 8.266637436269303e-06, + "loss": 0.1681, + "step": 742700 + }, + { + "epoch": 4.18, + "learning_rate": 8.261016206006847e-06, + "loss": 0.1634, + "step": 742800 + }, + { + "epoch": 4.18, + "learning_rate": 8.255394975744392e-06, + "loss": 0.1593, + "step": 742900 + }, + { + "epoch": 4.18, + "learning_rate": 8.249773745481936e-06, + "loss": 0.1632, + "step": 743000 + }, + { + "epoch": 4.18, + "learning_rate": 8.244152515219481e-06, + "loss": 0.1635, + "step": 743100 + }, + { + "epoch": 4.18, + "learning_rate": 8.238531284957027e-06, + "loss": 0.1638, + "step": 743200 + }, + { + "epoch": 4.18, + "learning_rate": 8.23291005469457e-06, + "loss": 0.1674, + "step": 743300 + }, + { + "epoch": 4.18, + "learning_rate": 8.227288824432115e-06, + "loss": 0.1606, + "step": 743400 + }, + { + "epoch": 4.18, + "learning_rate": 8.221667594169661e-06, + "loss": 0.1588, + "step": 743500 + }, + { + "epoch": 4.18, + "learning_rate": 8.216046363907206e-06, + "loss": 0.1608, + "step": 743600 + }, + { + "epoch": 4.18, + "learning_rate": 8.21042513364475e-06, + "loss": 0.1676, + "step": 743700 + }, + { + "epoch": 4.18, + "learning_rate": 8.204803903382295e-06, + "loss": 0.1703, + "step": 743800 + }, + { + "epoch": 4.18, + "learning_rate": 8.19918267311984e-06, + "loss": 0.1652, + "step": 743900 + }, + { + "epoch": 4.18, + "learning_rate": 8.193561442857384e-06, + "loss": 0.1676, + "step": 744000 + }, + { + "epoch": 4.18, + "learning_rate": 8.187940212594929e-06, + "loss": 0.1652, + "step": 744100 + }, + { + "epoch": 4.18, + "learning_rate": 8.182318982332473e-06, + "loss": 0.1639, + "step": 744200 + }, + { + "epoch": 4.18, + "learning_rate": 8.176697752070018e-06, + "loss": 0.1692, + "step": 744300 + }, + { + "epoch": 4.18, + "learning_rate": 8.171076521807564e-06, + "loss": 0.1675, + "step": 744400 + }, + { + "epoch": 4.19, + "learning_rate": 8.165455291545109e-06, + "loss": 0.1711, + "step": 744500 + }, + { + "epoch": 4.19, + "learning_rate": 8.159834061282651e-06, + "loss": 0.156, + "step": 744600 + }, + { + "epoch": 4.19, + "learning_rate": 8.154212831020198e-06, + "loss": 0.165, + "step": 744700 + }, + { + "epoch": 4.19, + "learning_rate": 8.148591600757742e-06, + "loss": 0.1592, + "step": 744800 + }, + { + "epoch": 4.19, + "learning_rate": 8.142970370495287e-06, + "loss": 0.1668, + "step": 744900 + }, + { + "epoch": 4.19, + "learning_rate": 8.137349140232832e-06, + "loss": 0.1619, + "step": 745000 + }, + { + "epoch": 4.19, + "learning_rate": 8.131727909970376e-06, + "loss": 0.1741, + "step": 745100 + }, + { + "epoch": 4.19, + "learning_rate": 8.126106679707922e-06, + "loss": 0.1673, + "step": 745200 + }, + { + "epoch": 4.19, + "learning_rate": 8.120485449445465e-06, + "loss": 0.1617, + "step": 745300 + }, + { + "epoch": 4.19, + "learning_rate": 8.11486421918301e-06, + "loss": 0.1676, + "step": 745400 + }, + { + "epoch": 4.19, + "learning_rate": 8.109242988920556e-06, + "loss": 0.1671, + "step": 745500 + }, + { + "epoch": 4.19, + "learning_rate": 8.1036217586581e-06, + "loss": 0.1693, + "step": 745600 + }, + { + "epoch": 4.19, + "learning_rate": 8.098000528395645e-06, + "loss": 0.1657, + "step": 745700 + }, + { + "epoch": 4.19, + "learning_rate": 8.09237929813319e-06, + "loss": 0.1679, + "step": 745800 + }, + { + "epoch": 4.19, + "learning_rate": 8.086758067870734e-06, + "loss": 0.1657, + "step": 745900 + }, + { + "epoch": 4.19, + "learning_rate": 8.081136837608279e-06, + "loss": 0.1664, + "step": 746000 + }, + { + "epoch": 4.19, + "learning_rate": 8.075515607345824e-06, + "loss": 0.1592, + "step": 746100 + }, + { + "epoch": 4.19, + "learning_rate": 8.069894377083368e-06, + "loss": 0.1724, + "step": 746200 + }, + { + "epoch": 4.2, + "learning_rate": 8.064273146820914e-06, + "loss": 0.1659, + "step": 746300 + }, + { + "epoch": 4.2, + "learning_rate": 8.058651916558459e-06, + "loss": 0.168, + "step": 746400 + }, + { + "epoch": 4.2, + "learning_rate": 8.053030686296004e-06, + "loss": 0.1623, + "step": 746500 + }, + { + "epoch": 4.2, + "learning_rate": 8.047409456033547e-06, + "loss": 0.1626, + "step": 746600 + }, + { + "epoch": 4.2, + "learning_rate": 8.041844438073718e-06, + "loss": 0.1622, + "step": 746700 + }, + { + "epoch": 4.2, + "learning_rate": 8.036223207811262e-06, + "loss": 0.1682, + "step": 746800 + }, + { + "epoch": 4.2, + "learning_rate": 8.030601977548807e-06, + "loss": 0.1621, + "step": 746900 + }, + { + "epoch": 4.2, + "learning_rate": 8.024980747286351e-06, + "loss": 0.167, + "step": 747000 + }, + { + "epoch": 4.2, + "learning_rate": 8.019359517023896e-06, + "loss": 0.1636, + "step": 747100 + }, + { + "epoch": 4.2, + "learning_rate": 8.01373828676144e-06, + "loss": 0.1656, + "step": 747200 + }, + { + "epoch": 4.2, + "learning_rate": 8.008117056498985e-06, + "loss": 0.1682, + "step": 747300 + }, + { + "epoch": 4.2, + "learning_rate": 8.002495826236531e-06, + "loss": 0.1665, + "step": 747400 + }, + { + "epoch": 4.2, + "learning_rate": 7.996874595974076e-06, + "loss": 0.1607, + "step": 747500 + }, + { + "epoch": 4.2, + "learning_rate": 7.99125336571162e-06, + "loss": 0.168, + "step": 747600 + }, + { + "epoch": 4.2, + "learning_rate": 7.985632135449163e-06, + "loss": 0.165, + "step": 747700 + }, + { + "epoch": 4.2, + "learning_rate": 7.98001090518671e-06, + "loss": 0.1689, + "step": 747800 + }, + { + "epoch": 4.2, + "learning_rate": 7.974389674924254e-06, + "loss": 0.1631, + "step": 747900 + }, + { + "epoch": 4.2, + "learning_rate": 7.968768444661799e-06, + "loss": 0.1685, + "step": 748000 + }, + { + "epoch": 4.21, + "learning_rate": 7.963147214399343e-06, + "loss": 0.1635, + "step": 748100 + }, + { + "epoch": 4.21, + "learning_rate": 7.95752598413689e-06, + "loss": 0.1668, + "step": 748200 + }, + { + "epoch": 4.21, + "learning_rate": 7.951904753874434e-06, + "loss": 0.1625, + "step": 748300 + }, + { + "epoch": 4.21, + "learning_rate": 7.946283523611977e-06, + "loss": 0.1694, + "step": 748400 + }, + { + "epoch": 4.21, + "learning_rate": 7.940662293349522e-06, + "loss": 0.1675, + "step": 748500 + }, + { + "epoch": 4.21, + "learning_rate": 7.935041063087068e-06, + "loss": 0.166, + "step": 748600 + }, + { + "epoch": 4.21, + "learning_rate": 7.929419832824613e-06, + "loss": 0.1649, + "step": 748700 + }, + { + "epoch": 4.21, + "learning_rate": 7.923798602562157e-06, + "loss": 0.1698, + "step": 748800 + }, + { + "epoch": 4.21, + "learning_rate": 7.918177372299702e-06, + "loss": 0.1607, + "step": 748900 + }, + { + "epoch": 4.21, + "learning_rate": 7.912556142037246e-06, + "loss": 0.1635, + "step": 749000 + }, + { + "epoch": 4.21, + "learning_rate": 7.906934911774791e-06, + "loss": 0.1607, + "step": 749100 + }, + { + "epoch": 4.21, + "learning_rate": 7.901313681512336e-06, + "loss": 0.1638, + "step": 749200 + }, + { + "epoch": 4.21, + "learning_rate": 7.89569245124988e-06, + "loss": 0.1625, + "step": 749300 + }, + { + "epoch": 4.21, + "learning_rate": 7.890071220987426e-06, + "loss": 0.1609, + "step": 749400 + }, + { + "epoch": 4.21, + "learning_rate": 7.884449990724971e-06, + "loss": 0.1672, + "step": 749500 + }, + { + "epoch": 4.21, + "learning_rate": 7.878828760462516e-06, + "loss": 0.1624, + "step": 749600 + }, + { + "epoch": 4.21, + "learning_rate": 7.87320753020006e-06, + "loss": 0.1615, + "step": 749700 + }, + { + "epoch": 4.21, + "learning_rate": 7.867586299937605e-06, + "loss": 0.1673, + "step": 749800 + }, + { + "epoch": 4.22, + "learning_rate": 7.86196506967515e-06, + "loss": 0.1595, + "step": 749900 + }, + { + "epoch": 4.22, + "learning_rate": 7.856343839412694e-06, + "loss": 0.1654, + "step": 750000 + }, + { + "epoch": 4.22, + "learning_rate": 7.850722609150238e-06, + "loss": 0.1652, + "step": 750100 + }, + { + "epoch": 4.22, + "learning_rate": 7.845101378887785e-06, + "loss": 0.1691, + "step": 750200 + }, + { + "epoch": 4.22, + "learning_rate": 7.83948014862533e-06, + "loss": 0.1654, + "step": 750300 + }, + { + "epoch": 4.22, + "learning_rate": 7.833858918362872e-06, + "loss": 0.1629, + "step": 750400 + }, + { + "epoch": 4.22, + "learning_rate": 7.828237688100418e-06, + "loss": 0.1683, + "step": 750500 + }, + { + "epoch": 4.22, + "learning_rate": 7.822616457837963e-06, + "loss": 0.1666, + "step": 750600 + }, + { + "epoch": 4.22, + "learning_rate": 7.816995227575508e-06, + "loss": 0.1628, + "step": 750700 + }, + { + "epoch": 4.22, + "learning_rate": 7.811373997313052e-06, + "loss": 0.1669, + "step": 750800 + }, + { + "epoch": 4.22, + "learning_rate": 7.805752767050597e-06, + "loss": 0.1714, + "step": 750900 + }, + { + "epoch": 4.22, + "learning_rate": 7.800131536788143e-06, + "loss": 0.1594, + "step": 751000 + }, + { + "epoch": 4.22, + "learning_rate": 7.794510306525686e-06, + "loss": 0.1607, + "step": 751100 + }, + { + "epoch": 4.22, + "learning_rate": 7.78888907626323e-06, + "loss": 0.1694, + "step": 751200 + }, + { + "epoch": 4.22, + "learning_rate": 7.783267846000777e-06, + "loss": 0.1621, + "step": 751300 + }, + { + "epoch": 4.22, + "learning_rate": 7.777646615738321e-06, + "loss": 0.1617, + "step": 751400 + }, + { + "epoch": 4.22, + "learning_rate": 7.772025385475866e-06, + "loss": 0.1667, + "step": 751500 + }, + { + "epoch": 4.22, + "learning_rate": 7.76640415521341e-06, + "loss": 0.1632, + "step": 751600 + }, + { + "epoch": 4.23, + "learning_rate": 7.760782924950955e-06, + "loss": 0.1706, + "step": 751700 + }, + { + "epoch": 4.23, + "learning_rate": 7.7551616946885e-06, + "loss": 0.1674, + "step": 751800 + }, + { + "epoch": 4.23, + "learning_rate": 7.749540464426044e-06, + "loss": 0.1627, + "step": 751900 + }, + { + "epoch": 4.23, + "learning_rate": 7.743919234163589e-06, + "loss": 0.1652, + "step": 752000 + }, + { + "epoch": 4.23, + "learning_rate": 7.738298003901134e-06, + "loss": 0.1682, + "step": 752100 + }, + { + "epoch": 4.23, + "learning_rate": 7.732732985941303e-06, + "loss": 0.1647, + "step": 752200 + }, + { + "epoch": 4.23, + "learning_rate": 7.727111755678847e-06, + "loss": 0.1594, + "step": 752300 + }, + { + "epoch": 4.23, + "learning_rate": 7.721490525416394e-06, + "loss": 0.1612, + "step": 752400 + }, + { + "epoch": 4.23, + "learning_rate": 7.715869295153938e-06, + "loss": 0.1726, + "step": 752500 + }, + { + "epoch": 4.23, + "learning_rate": 7.710248064891483e-06, + "loss": 0.163, + "step": 752600 + }, + { + "epoch": 4.23, + "learning_rate": 7.704626834629027e-06, + "loss": 0.1659, + "step": 752700 + }, + { + "epoch": 4.23, + "learning_rate": 7.699005604366572e-06, + "loss": 0.1683, + "step": 752800 + }, + { + "epoch": 4.23, + "learning_rate": 7.693384374104117e-06, + "loss": 0.1656, + "step": 752900 + }, + { + "epoch": 4.23, + "learning_rate": 7.687763143841661e-06, + "loss": 0.1643, + "step": 753000 + }, + { + "epoch": 4.23, + "learning_rate": 7.682141913579206e-06, + "loss": 0.1624, + "step": 753100 + }, + { + "epoch": 4.23, + "learning_rate": 7.676520683316752e-06, + "loss": 0.1684, + "step": 753200 + }, + { + "epoch": 4.23, + "learning_rate": 7.670899453054297e-06, + "loss": 0.1664, + "step": 753300 + }, + { + "epoch": 4.24, + "learning_rate": 7.665278222791841e-06, + "loss": 0.1684, + "step": 753400 + }, + { + "epoch": 4.24, + "learning_rate": 7.659656992529384e-06, + "loss": 0.1674, + "step": 753500 + }, + { + "epoch": 4.24, + "learning_rate": 7.65403576226693e-06, + "loss": 0.1642, + "step": 753600 + }, + { + "epoch": 4.24, + "learning_rate": 7.648414532004475e-06, + "loss": 0.1698, + "step": 753700 + }, + { + "epoch": 4.24, + "learning_rate": 7.64279330174202e-06, + "loss": 0.1629, + "step": 753800 + }, + { + "epoch": 4.24, + "learning_rate": 7.637172071479564e-06, + "loss": 0.1665, + "step": 753900 + }, + { + "epoch": 4.24, + "learning_rate": 7.63155084121711e-06, + "loss": 0.1675, + "step": 754000 + }, + { + "epoch": 4.24, + "learning_rate": 7.625929610954653e-06, + "loss": 0.171, + "step": 754100 + }, + { + "epoch": 4.24, + "learning_rate": 7.620308380692198e-06, + "loss": 0.1654, + "step": 754200 + }, + { + "epoch": 4.24, + "learning_rate": 7.614743362732368e-06, + "loss": 0.1686, + "step": 754300 + }, + { + "epoch": 4.24, + "learning_rate": 7.6091221324699135e-06, + "loss": 0.1662, + "step": 754400 + }, + { + "epoch": 4.24, + "learning_rate": 7.603500902207457e-06, + "loss": 0.1661, + "step": 754500 + }, + { + "epoch": 4.24, + "learning_rate": 7.597879671945002e-06, + "loss": 0.1667, + "step": 754600 + }, + { + "epoch": 4.24, + "learning_rate": 7.592258441682546e-06, + "loss": 0.1628, + "step": 754700 + }, + { + "epoch": 4.24, + "learning_rate": 7.586637211420092e-06, + "loss": 0.1628, + "step": 754800 + }, + { + "epoch": 4.24, + "learning_rate": 7.581015981157636e-06, + "loss": 0.1636, + "step": 754900 + }, + { + "epoch": 4.24, + "learning_rate": 7.575394750895182e-06, + "loss": 0.1712, + "step": 755000 + }, + { + "epoch": 4.24, + "learning_rate": 7.569773520632726e-06, + "loss": 0.1671, + "step": 755100 + }, + { + "epoch": 4.25, + "learning_rate": 7.564208502672896e-06, + "loss": 0.1667, + "step": 755200 + }, + { + "epoch": 4.25, + "learning_rate": 7.55858727241044e-06, + "loss": 0.1621, + "step": 755300 + }, + { + "epoch": 4.25, + "learning_rate": 7.552966042147985e-06, + "loss": 0.16, + "step": 755400 + }, + { + "epoch": 4.25, + "learning_rate": 7.54734481188553e-06, + "loss": 0.168, + "step": 755500 + }, + { + "epoch": 4.25, + "learning_rate": 7.541723581623074e-06, + "loss": 0.1682, + "step": 755600 + }, + { + "epoch": 4.25, + "learning_rate": 7.536102351360619e-06, + "loss": 0.1648, + "step": 755700 + }, + { + "epoch": 4.25, + "learning_rate": 7.530481121098164e-06, + "loss": 0.1598, + "step": 755800 + }, + { + "epoch": 4.25, + "learning_rate": 7.524859890835709e-06, + "loss": 0.1681, + "step": 755900 + }, + { + "epoch": 4.25, + "learning_rate": 7.519238660573254e-06, + "loss": 0.1677, + "step": 756000 + }, + { + "epoch": 4.25, + "learning_rate": 7.513617430310799e-06, + "loss": 0.1677, + "step": 756100 + }, + { + "epoch": 4.25, + "learning_rate": 7.507996200048342e-06, + "loss": 0.1679, + "step": 756200 + }, + { + "epoch": 4.25, + "learning_rate": 7.502374969785887e-06, + "loss": 0.1663, + "step": 756300 + }, + { + "epoch": 4.25, + "learning_rate": 7.4967537395234324e-06, + "loss": 0.1712, + "step": 756400 + }, + { + "epoch": 4.25, + "learning_rate": 7.491132509260977e-06, + "loss": 0.1685, + "step": 756500 + }, + { + "epoch": 4.25, + "learning_rate": 7.4855112789985224e-06, + "loss": 0.1652, + "step": 756600 + }, + { + "epoch": 4.25, + "learning_rate": 7.479890048736067e-06, + "loss": 0.1667, + "step": 756700 + }, + { + "epoch": 4.25, + "learning_rate": 7.4742688184736125e-06, + "loss": 0.1622, + "step": 756800 + }, + { + "epoch": 4.25, + "learning_rate": 7.468647588211155e-06, + "loss": 0.1636, + "step": 756900 + }, + { + "epoch": 4.26, + "learning_rate": 7.463026357948701e-06, + "loss": 0.1673, + "step": 757000 + }, + { + "epoch": 4.26, + "learning_rate": 7.457405127686245e-06, + "loss": 0.1561, + "step": 757100 + }, + { + "epoch": 4.26, + "learning_rate": 7.451783897423791e-06, + "loss": 0.1683, + "step": 757200 + }, + { + "epoch": 4.26, + "learning_rate": 7.446162667161335e-06, + "loss": 0.1631, + "step": 757300 + }, + { + "epoch": 4.26, + "learning_rate": 7.440541436898881e-06, + "loss": 0.1629, + "step": 757400 + }, + { + "epoch": 4.26, + "learning_rate": 7.434920206636425e-06, + "loss": 0.164, + "step": 757500 + }, + { + "epoch": 4.26, + "learning_rate": 7.429298976373969e-06, + "loss": 0.1691, + "step": 757600 + }, + { + "epoch": 4.26, + "learning_rate": 7.423677746111514e-06, + "loss": 0.1718, + "step": 757700 + }, + { + "epoch": 4.26, + "learning_rate": 7.418056515849059e-06, + "loss": 0.1679, + "step": 757800 + }, + { + "epoch": 4.26, + "learning_rate": 7.412435285586604e-06, + "loss": 0.1645, + "step": 757900 + }, + { + "epoch": 4.26, + "learning_rate": 7.406814055324149e-06, + "loss": 0.1629, + "step": 758000 + }, + { + "epoch": 4.26, + "learning_rate": 7.401192825061694e-06, + "loss": 0.1664, + "step": 758100 + }, + { + "epoch": 4.26, + "learning_rate": 7.395571594799239e-06, + "loss": 0.1668, + "step": 758200 + }, + { + "epoch": 4.26, + "learning_rate": 7.389950364536782e-06, + "loss": 0.1618, + "step": 758300 + }, + { + "epoch": 4.26, + "learning_rate": 7.3843291342743275e-06, + "loss": 0.1642, + "step": 758400 + }, + { + "epoch": 4.26, + "learning_rate": 7.378707904011872e-06, + "loss": 0.1679, + "step": 758500 + }, + { + "epoch": 4.26, + "learning_rate": 7.3730866737494175e-06, + "loss": 0.1622, + "step": 758600 + }, + { + "epoch": 4.26, + "learning_rate": 7.367465443486962e-06, + "loss": 0.1606, + "step": 758700 + }, + { + "epoch": 4.27, + "learning_rate": 7.3618442132245075e-06, + "loss": 0.1582, + "step": 758800 + }, + { + "epoch": 4.27, + "learning_rate": 7.35622298296205e-06, + "loss": 0.1671, + "step": 758900 + }, + { + "epoch": 4.27, + "learning_rate": 7.350601752699596e-06, + "loss": 0.1643, + "step": 759000 + }, + { + "epoch": 4.27, + "learning_rate": 7.34498052243714e-06, + "loss": 0.1666, + "step": 759100 + }, + { + "epoch": 4.27, + "learning_rate": 7.339359292174686e-06, + "loss": 0.1662, + "step": 759200 + }, + { + "epoch": 4.27, + "learning_rate": 7.3337380619122304e-06, + "loss": 0.1654, + "step": 759300 + }, + { + "epoch": 4.27, + "learning_rate": 7.328116831649776e-06, + "loss": 0.1644, + "step": 759400 + }, + { + "epoch": 4.27, + "learning_rate": 7.3224956013873204e-06, + "loss": 0.163, + "step": 759500 + }, + { + "epoch": 4.27, + "learning_rate": 7.316874371124864e-06, + "loss": 0.1734, + "step": 759600 + }, + { + "epoch": 4.27, + "learning_rate": 7.311253140862409e-06, + "loss": 0.1612, + "step": 759700 + }, + { + "epoch": 4.27, + "learning_rate": 7.305631910599954e-06, + "loss": 0.1628, + "step": 759800 + }, + { + "epoch": 4.27, + "learning_rate": 7.300010680337499e-06, + "loss": 0.1646, + "step": 759900 + }, + { + "epoch": 4.27, + "learning_rate": 7.294389450075044e-06, + "loss": 0.1601, + "step": 760000 + }, + { + "epoch": 4.27, + "learning_rate": 7.288768219812589e-06, + "loss": 0.1643, + "step": 760100 + }, + { + "epoch": 4.27, + "learning_rate": 7.283146989550134e-06, + "loss": 0.1675, + "step": 760200 + }, + { + "epoch": 4.27, + "learning_rate": 7.277525759287677e-06, + "loss": 0.1598, + "step": 760300 + }, + { + "epoch": 4.27, + "learning_rate": 7.2719045290252225e-06, + "loss": 0.1645, + "step": 760400 + }, + { + "epoch": 4.27, + "learning_rate": 7.266283298762767e-06, + "loss": 0.1637, + "step": 760500 + }, + { + "epoch": 4.28, + "learning_rate": 7.2606620685003125e-06, + "loss": 0.1621, + "step": 760600 + }, + { + "epoch": 4.28, + "learning_rate": 7.255040838237857e-06, + "loss": 0.1631, + "step": 760700 + }, + { + "epoch": 4.28, + "learning_rate": 7.2494196079754026e-06, + "loss": 0.1693, + "step": 760800 + }, + { + "epoch": 4.28, + "learning_rate": 7.243798377712947e-06, + "loss": 0.1651, + "step": 760900 + }, + { + "epoch": 4.28, + "learning_rate": 7.238177147450491e-06, + "loss": 0.164, + "step": 761000 + }, + { + "epoch": 4.28, + "learning_rate": 7.2325559171880355e-06, + "loss": 0.1641, + "step": 761100 + }, + { + "epoch": 4.28, + "learning_rate": 7.226934686925581e-06, + "loss": 0.1616, + "step": 761200 + }, + { + "epoch": 4.28, + "learning_rate": 7.2213134566631255e-06, + "loss": 0.1656, + "step": 761300 + }, + { + "epoch": 4.28, + "learning_rate": 7.215692226400671e-06, + "loss": 0.1704, + "step": 761400 + }, + { + "epoch": 4.28, + "learning_rate": 7.2100709961382155e-06, + "loss": 0.1663, + "step": 761500 + }, + { + "epoch": 4.28, + "learning_rate": 7.204449765875759e-06, + "loss": 0.1628, + "step": 761600 + }, + { + "epoch": 4.28, + "learning_rate": 7.198828535613304e-06, + "loss": 0.1596, + "step": 761700 + }, + { + "epoch": 4.28, + "learning_rate": 7.193207305350849e-06, + "loss": 0.1634, + "step": 761800 + }, + { + "epoch": 4.28, + "learning_rate": 7.187586075088394e-06, + "loss": 0.1621, + "step": 761900 + }, + { + "epoch": 4.28, + "learning_rate": 7.181964844825939e-06, + "loss": 0.1643, + "step": 762000 + }, + { + "epoch": 4.28, + "learning_rate": 7.176343614563484e-06, + "loss": 0.1603, + "step": 762100 + }, + { + "epoch": 4.28, + "learning_rate": 7.170722384301029e-06, + "loss": 0.167, + "step": 762200 + }, + { + "epoch": 4.29, + "learning_rate": 7.165101154038573e-06, + "loss": 0.1663, + "step": 762300 + }, + { + "epoch": 4.29, + "learning_rate": 7.159479923776118e-06, + "loss": 0.1605, + "step": 762400 + }, + { + "epoch": 4.29, + "learning_rate": 7.153858693513662e-06, + "loss": 0.1612, + "step": 762500 + }, + { + "epoch": 4.29, + "learning_rate": 7.148237463251208e-06, + "loss": 0.165, + "step": 762600 + }, + { + "epoch": 4.29, + "learning_rate": 7.142616232988752e-06, + "loss": 0.1673, + "step": 762700 + }, + { + "epoch": 4.29, + "learning_rate": 7.136995002726298e-06, + "loss": 0.1618, + "step": 762800 + }, + { + "epoch": 4.29, + "learning_rate": 7.131373772463842e-06, + "loss": 0.1682, + "step": 762900 + }, + { + "epoch": 4.29, + "learning_rate": 7.125752542201386e-06, + "loss": 0.16, + "step": 763000 + }, + { + "epoch": 4.29, + "learning_rate": 7.120131311938931e-06, + "loss": 0.167, + "step": 763100 + }, + { + "epoch": 4.29, + "learning_rate": 7.114566293979101e-06, + "loss": 0.1635, + "step": 763200 + }, + { + "epoch": 4.29, + "learning_rate": 7.108945063716646e-06, + "loss": 0.1599, + "step": 763300 + }, + { + "epoch": 4.29, + "learning_rate": 7.10332383345419e-06, + "loss": 0.1612, + "step": 763400 + }, + { + "epoch": 4.29, + "learning_rate": 7.0977026031917344e-06, + "loss": 0.162, + "step": 763500 + }, + { + "epoch": 4.29, + "learning_rate": 7.09208137292928e-06, + "loss": 0.1628, + "step": 763600 + }, + { + "epoch": 4.29, + "learning_rate": 7.0864601426668244e-06, + "loss": 0.165, + "step": 763700 + }, + { + "epoch": 4.29, + "learning_rate": 7.08083891240437e-06, + "loss": 0.1687, + "step": 763800 + }, + { + "epoch": 4.29, + "learning_rate": 7.0752176821419145e-06, + "loss": 0.1669, + "step": 763900 + }, + { + "epoch": 4.29, + "learning_rate": 7.069596451879458e-06, + "loss": 0.1658, + "step": 764000 + }, + { + "epoch": 4.3, + "learning_rate": 7.063975221617003e-06, + "loss": 0.1627, + "step": 764100 + }, + { + "epoch": 4.3, + "learning_rate": 7.058353991354548e-06, + "loss": 0.1684, + "step": 764200 + }, + { + "epoch": 4.3, + "learning_rate": 7.052732761092093e-06, + "loss": 0.1657, + "step": 764300 + }, + { + "epoch": 4.3, + "learning_rate": 7.047111530829638e-06, + "loss": 0.167, + "step": 764400 + }, + { + "epoch": 4.3, + "learning_rate": 7.041490300567183e-06, + "loss": 0.1653, + "step": 764500 + }, + { + "epoch": 4.3, + "learning_rate": 7.035869070304727e-06, + "loss": 0.1664, + "step": 764600 + }, + { + "epoch": 4.3, + "learning_rate": 7.030247840042271e-06, + "loss": 0.1724, + "step": 764700 + }, + { + "epoch": 4.3, + "learning_rate": 7.0246266097798166e-06, + "loss": 0.1648, + "step": 764800 + }, + { + "epoch": 4.3, + "learning_rate": 7.019005379517361e-06, + "loss": 0.1638, + "step": 764900 + }, + { + "epoch": 4.3, + "learning_rate": 7.0133841492549066e-06, + "loss": 0.1636, + "step": 765000 + }, + { + "epoch": 4.3, + "learning_rate": 7.007762918992451e-06, + "loss": 0.1655, + "step": 765100 + }, + { + "epoch": 4.3, + "learning_rate": 7.002141688729997e-06, + "loss": 0.1658, + "step": 765200 + }, + { + "epoch": 4.3, + "learning_rate": 6.996520458467541e-06, + "loss": 0.1622, + "step": 765300 + }, + { + "epoch": 4.3, + "learning_rate": 6.990899228205085e-06, + "loss": 0.1621, + "step": 765400 + }, + { + "epoch": 4.3, + "learning_rate": 6.985334210245255e-06, + "loss": 0.166, + "step": 765500 + }, + { + "epoch": 4.3, + "learning_rate": 6.9797129799828e-06, + "loss": 0.1655, + "step": 765600 + }, + { + "epoch": 4.3, + "learning_rate": 6.974091749720345e-06, + "loss": 0.1649, + "step": 765700 + }, + { + "epoch": 4.3, + "learning_rate": 6.968470519457888e-06, + "loss": 0.168, + "step": 765800 + }, + { + "epoch": 4.31, + "learning_rate": 6.962849289195433e-06, + "loss": 0.1653, + "step": 765900 + }, + { + "epoch": 4.31, + "learning_rate": 6.957228058932978e-06, + "loss": 0.1698, + "step": 766000 + }, + { + "epoch": 4.31, + "learning_rate": 6.951606828670523e-06, + "loss": 0.1646, + "step": 766100 + }, + { + "epoch": 4.31, + "learning_rate": 6.945985598408068e-06, + "loss": 0.1624, + "step": 766200 + }, + { + "epoch": 4.31, + "learning_rate": 6.9403643681456134e-06, + "loss": 0.1633, + "step": 766300 + }, + { + "epoch": 4.31, + "learning_rate": 6.934743137883157e-06, + "loss": 0.1595, + "step": 766400 + }, + { + "epoch": 4.31, + "learning_rate": 6.929121907620702e-06, + "loss": 0.1693, + "step": 766500 + }, + { + "epoch": 4.31, + "learning_rate": 6.923500677358246e-06, + "loss": 0.1603, + "step": 766600 + }, + { + "epoch": 4.31, + "learning_rate": 6.917879447095792e-06, + "loss": 0.1667, + "step": 766700 + }, + { + "epoch": 4.31, + "learning_rate": 6.912258216833336e-06, + "loss": 0.1674, + "step": 766800 + }, + { + "epoch": 4.31, + "learning_rate": 6.906636986570882e-06, + "loss": 0.1633, + "step": 766900 + }, + { + "epoch": 4.31, + "learning_rate": 6.901015756308426e-06, + "loss": 0.1615, + "step": 767000 + }, + { + "epoch": 4.31, + "learning_rate": 6.89539452604597e-06, + "loss": 0.1599, + "step": 767100 + }, + { + "epoch": 4.31, + "learning_rate": 6.8897732957835155e-06, + "loss": 0.1663, + "step": 767200 + }, + { + "epoch": 4.31, + "learning_rate": 6.88415206552106e-06, + "loss": 0.1649, + "step": 767300 + }, + { + "epoch": 4.31, + "learning_rate": 6.878530835258605e-06, + "loss": 0.1662, + "step": 767400 + }, + { + "epoch": 4.31, + "learning_rate": 6.87290960499615e-06, + "loss": 0.1671, + "step": 767500 + }, + { + "epoch": 4.31, + "learning_rate": 6.867344587036319e-06, + "loss": 0.1627, + "step": 767600 + }, + { + "epoch": 4.32, + "learning_rate": 6.861723356773864e-06, + "loss": 0.1651, + "step": 767700 + }, + { + "epoch": 4.32, + "learning_rate": 6.856158338814034e-06, + "loss": 0.1656, + "step": 767800 + }, + { + "epoch": 4.32, + "learning_rate": 6.850537108551577e-06, + "loss": 0.1642, + "step": 767900 + }, + { + "epoch": 4.32, + "learning_rate": 6.8449158782891225e-06, + "loss": 0.161, + "step": 768000 + }, + { + "epoch": 4.32, + "learning_rate": 6.839294648026667e-06, + "loss": 0.1637, + "step": 768100 + }, + { + "epoch": 4.32, + "learning_rate": 6.8336734177642125e-06, + "loss": 0.1655, + "step": 768200 + }, + { + "epoch": 4.32, + "learning_rate": 6.828052187501757e-06, + "loss": 0.1643, + "step": 768300 + }, + { + "epoch": 4.32, + "learning_rate": 6.822487169541926e-06, + "loss": 0.1628, + "step": 768400 + }, + { + "epoch": 4.32, + "learning_rate": 6.816865939279471e-06, + "loss": 0.1653, + "step": 768500 + }, + { + "epoch": 4.32, + "learning_rate": 6.811244709017016e-06, + "loss": 0.1582, + "step": 768600 + }, + { + "epoch": 4.32, + "learning_rate": 6.805623478754561e-06, + "loss": 0.1689, + "step": 768700 + }, + { + "epoch": 4.32, + "learning_rate": 6.800002248492106e-06, + "loss": 0.1662, + "step": 768800 + }, + { + "epoch": 4.32, + "learning_rate": 6.794381018229649e-06, + "loss": 0.1659, + "step": 768900 + }, + { + "epoch": 4.32, + "learning_rate": 6.788759787967195e-06, + "loss": 0.164, + "step": 769000 + }, + { + "epoch": 4.32, + "learning_rate": 6.783138557704739e-06, + "loss": 0.1632, + "step": 769100 + }, + { + "epoch": 4.32, + "learning_rate": 6.777517327442284e-06, + "loss": 0.1635, + "step": 769200 + }, + { + "epoch": 4.32, + "learning_rate": 6.771896097179829e-06, + "loss": 0.1636, + "step": 769300 + }, + { + "epoch": 4.32, + "learning_rate": 6.766274866917374e-06, + "loss": 0.1646, + "step": 769400 + }, + { + "epoch": 4.33, + "learning_rate": 6.760653636654919e-06, + "loss": 0.1663, + "step": 769500 + }, + { + "epoch": 4.33, + "learning_rate": 6.755032406392463e-06, + "loss": 0.1662, + "step": 769600 + }, + { + "epoch": 4.33, + "learning_rate": 6.749411176130008e-06, + "loss": 0.1602, + "step": 769700 + }, + { + "epoch": 4.33, + "learning_rate": 6.743789945867552e-06, + "loss": 0.1671, + "step": 769800 + }, + { + "epoch": 4.33, + "learning_rate": 6.738168715605098e-06, + "loss": 0.1653, + "step": 769900 + }, + { + "epoch": 4.33, + "learning_rate": 6.732547485342642e-06, + "loss": 0.1628, + "step": 770000 + }, + { + "epoch": 4.33, + "learning_rate": 6.726926255080188e-06, + "loss": 0.1597, + "step": 770100 + }, + { + "epoch": 4.33, + "learning_rate": 6.721305024817732e-06, + "loss": 0.1609, + "step": 770200 + }, + { + "epoch": 4.33, + "learning_rate": 6.715683794555276e-06, + "loss": 0.1677, + "step": 770300 + }, + { + "epoch": 4.33, + "learning_rate": 6.7100625642928215e-06, + "loss": 0.1597, + "step": 770400 + }, + { + "epoch": 4.33, + "learning_rate": 6.704441334030366e-06, + "loss": 0.1663, + "step": 770500 + }, + { + "epoch": 4.33, + "learning_rate": 6.698820103767911e-06, + "loss": 0.1642, + "step": 770600 + }, + { + "epoch": 4.33, + "learning_rate": 6.693198873505456e-06, + "loss": 0.1667, + "step": 770700 + }, + { + "epoch": 4.33, + "learning_rate": 6.687577643243001e-06, + "loss": 0.1655, + "step": 770800 + }, + { + "epoch": 4.33, + "learning_rate": 6.681956412980546e-06, + "loss": 0.1687, + "step": 770900 + }, + { + "epoch": 4.33, + "learning_rate": 6.67633518271809e-06, + "loss": 0.1642, + "step": 771000 + }, + { + "epoch": 4.33, + "learning_rate": 6.670713952455634e-06, + "loss": 0.1666, + "step": 771100 + }, + { + "epoch": 4.34, + "learning_rate": 6.66509272219318e-06, + "loss": 0.1636, + "step": 771200 + }, + { + "epoch": 4.34, + "learning_rate": 6.6594714919307244e-06, + "loss": 0.1635, + "step": 771300 + }, + { + "epoch": 4.34, + "learning_rate": 6.653850261668269e-06, + "loss": 0.163, + "step": 771400 + }, + { + "epoch": 4.34, + "learning_rate": 6.6482290314058144e-06, + "loss": 0.1608, + "step": 771500 + }, + { + "epoch": 4.34, + "learning_rate": 6.642607801143358e-06, + "loss": 0.1597, + "step": 771600 + }, + { + "epoch": 4.34, + "learning_rate": 6.636986570880903e-06, + "loss": 0.1641, + "step": 771700 + }, + { + "epoch": 4.34, + "learning_rate": 6.631365340618448e-06, + "loss": 0.1658, + "step": 771800 + }, + { + "epoch": 4.34, + "learning_rate": 6.625744110355993e-06, + "loss": 0.1681, + "step": 771900 + }, + { + "epoch": 4.34, + "learning_rate": 6.620122880093537e-06, + "loss": 0.1639, + "step": 772000 + }, + { + "epoch": 4.34, + "learning_rate": 6.614501649831083e-06, + "loss": 0.1684, + "step": 772100 + }, + { + "epoch": 4.34, + "learning_rate": 6.608880419568627e-06, + "loss": 0.1693, + "step": 772200 + }, + { + "epoch": 4.34, + "learning_rate": 6.603259189306171e-06, + "loss": 0.1715, + "step": 772300 + }, + { + "epoch": 4.34, + "learning_rate": 6.597694171346341e-06, + "loss": 0.1653, + "step": 772400 + }, + { + "epoch": 4.34, + "learning_rate": 6.592072941083887e-06, + "loss": 0.1622, + "step": 772500 + }, + { + "epoch": 4.34, + "learning_rate": 6.586451710821431e-06, + "loss": 0.1686, + "step": 772600 + }, + { + "epoch": 4.34, + "learning_rate": 6.580830480558975e-06, + "loss": 0.167, + "step": 772700 + }, + { + "epoch": 4.34, + "learning_rate": 6.57520925029652e-06, + "loss": 0.1645, + "step": 772800 + }, + { + "epoch": 4.34, + "learning_rate": 6.569588020034065e-06, + "loss": 0.1682, + "step": 772900 + }, + { + "epoch": 4.35, + "learning_rate": 6.56396678977161e-06, + "loss": 0.1649, + "step": 773000 + }, + { + "epoch": 4.35, + "learning_rate": 6.558345559509155e-06, + "loss": 0.1629, + "step": 773100 + }, + { + "epoch": 4.35, + "learning_rate": 6.5527243292467e-06, + "loss": 0.162, + "step": 773200 + }, + { + "epoch": 4.35, + "learning_rate": 6.547103098984245e-06, + "loss": 0.1649, + "step": 773300 + }, + { + "epoch": 4.35, + "learning_rate": 6.541481868721788e-06, + "loss": 0.1666, + "step": 773400 + }, + { + "epoch": 4.35, + "learning_rate": 6.535860638459333e-06, + "loss": 0.1667, + "step": 773500 + }, + { + "epoch": 4.35, + "learning_rate": 6.530239408196878e-06, + "loss": 0.1618, + "step": 773600 + }, + { + "epoch": 4.35, + "learning_rate": 6.524618177934423e-06, + "loss": 0.1621, + "step": 773700 + }, + { + "epoch": 4.35, + "learning_rate": 6.518996947671968e-06, + "loss": 0.1658, + "step": 773800 + }, + { + "epoch": 4.35, + "learning_rate": 6.513375717409513e-06, + "loss": 0.1608, + "step": 773900 + }, + { + "epoch": 4.35, + "learning_rate": 6.507754487147056e-06, + "loss": 0.1632, + "step": 774000 + }, + { + "epoch": 4.35, + "learning_rate": 6.502133256884602e-06, + "loss": 0.1634, + "step": 774100 + }, + { + "epoch": 4.35, + "learning_rate": 6.496512026622146e-06, + "loss": 0.1609, + "step": 774200 + }, + { + "epoch": 4.35, + "learning_rate": 6.490890796359692e-06, + "loss": 0.1666, + "step": 774300 + }, + { + "epoch": 4.35, + "learning_rate": 6.485269566097236e-06, + "loss": 0.1622, + "step": 774400 + }, + { + "epoch": 4.35, + "learning_rate": 6.479648335834782e-06, + "loss": 0.167, + "step": 774500 + }, + { + "epoch": 4.35, + "learning_rate": 6.474027105572326e-06, + "loss": 0.1682, + "step": 774600 + }, + { + "epoch": 4.35, + "learning_rate": 6.46840587530987e-06, + "loss": 0.1676, + "step": 774700 + }, + { + "epoch": 4.36, + "learning_rate": 6.462784645047415e-06, + "loss": 0.1663, + "step": 774800 + }, + { + "epoch": 4.36, + "learning_rate": 6.45716341478496e-06, + "loss": 0.164, + "step": 774900 + }, + { + "epoch": 4.36, + "learning_rate": 6.451542184522505e-06, + "loss": 0.1664, + "step": 775000 + }, + { + "epoch": 4.36, + "learning_rate": 6.44592095426005e-06, + "loss": 0.1669, + "step": 775100 + }, + { + "epoch": 4.36, + "learning_rate": 6.440299723997595e-06, + "loss": 0.168, + "step": 775200 + }, + { + "epoch": 4.36, + "learning_rate": 6.43467849373514e-06, + "loss": 0.1684, + "step": 775300 + }, + { + "epoch": 4.36, + "learning_rate": 6.429057263472683e-06, + "loss": 0.1677, + "step": 775400 + }, + { + "epoch": 4.36, + "learning_rate": 6.4234360332102284e-06, + "loss": 0.1611, + "step": 775500 + }, + { + "epoch": 4.36, + "learning_rate": 6.417814802947773e-06, + "loss": 0.1623, + "step": 775600 + }, + { + "epoch": 4.36, + "learning_rate": 6.4121935726853184e-06, + "loss": 0.1607, + "step": 775700 + }, + { + "epoch": 4.36, + "learning_rate": 6.406572342422863e-06, + "loss": 0.164, + "step": 775800 + }, + { + "epoch": 4.36, + "learning_rate": 6.4009511121604085e-06, + "loss": 0.1659, + "step": 775900 + }, + { + "epoch": 4.36, + "learning_rate": 6.395386094200577e-06, + "loss": 0.1661, + "step": 776000 + }, + { + "epoch": 4.36, + "learning_rate": 6.3897648639381215e-06, + "loss": 0.1616, + "step": 776100 + }, + { + "epoch": 4.36, + "learning_rate": 6.384143633675667e-06, + "loss": 0.1638, + "step": 776200 + }, + { + "epoch": 4.36, + "learning_rate": 6.3785224034132115e-06, + "loss": 0.1635, + "step": 776300 + }, + { + "epoch": 4.36, + "learning_rate": 6.372901173150755e-06, + "loss": 0.1704, + "step": 776400 + }, + { + "epoch": 4.36, + "learning_rate": 6.367279942888301e-06, + "loss": 0.1578, + "step": 776500 + }, + { + "epoch": 4.37, + "learning_rate": 6.361658712625845e-06, + "loss": 0.166, + "step": 776600 + }, + { + "epoch": 4.37, + "learning_rate": 6.356037482363391e-06, + "loss": 0.1623, + "step": 776700 + }, + { + "epoch": 4.37, + "learning_rate": 6.350416252100935e-06, + "loss": 0.1648, + "step": 776800 + }, + { + "epoch": 4.37, + "learning_rate": 6.34479502183848e-06, + "loss": 0.1644, + "step": 776900 + }, + { + "epoch": 4.37, + "learning_rate": 6.339173791576025e-06, + "loss": 0.1589, + "step": 777000 + }, + { + "epoch": 4.37, + "learning_rate": 6.333552561313569e-06, + "loss": 0.1618, + "step": 777100 + }, + { + "epoch": 4.37, + "learning_rate": 6.327931331051114e-06, + "loss": 0.1605, + "step": 777200 + }, + { + "epoch": 4.37, + "learning_rate": 6.322310100788659e-06, + "loss": 0.1686, + "step": 777300 + }, + { + "epoch": 4.37, + "learning_rate": 6.316688870526204e-06, + "loss": 0.1638, + "step": 777400 + }, + { + "epoch": 4.37, + "learning_rate": 6.311067640263748e-06, + "loss": 0.1673, + "step": 777500 + }, + { + "epoch": 4.37, + "learning_rate": 6.305446410001294e-06, + "loss": 0.1703, + "step": 777600 + }, + { + "epoch": 4.37, + "learning_rate": 6.299825179738838e-06, + "loss": 0.1648, + "step": 777700 + }, + { + "epoch": 4.37, + "learning_rate": 6.294203949476382e-06, + "loss": 0.1682, + "step": 777800 + }, + { + "epoch": 4.37, + "learning_rate": 6.288582719213927e-06, + "loss": 0.1617, + "step": 777900 + }, + { + "epoch": 4.37, + "learning_rate": 6.282961488951472e-06, + "loss": 0.1636, + "step": 778000 + }, + { + "epoch": 4.37, + "learning_rate": 6.277340258689017e-06, + "loss": 0.1611, + "step": 778100 + }, + { + "epoch": 4.37, + "learning_rate": 6.271719028426562e-06, + "loss": 0.1624, + "step": 778200 + }, + { + "epoch": 4.38, + "learning_rate": 6.266097798164107e-06, + "loss": 0.1675, + "step": 778300 + }, + { + "epoch": 4.38, + "learning_rate": 6.260476567901652e-06, + "loss": 0.1644, + "step": 778400 + }, + { + "epoch": 4.38, + "learning_rate": 6.254855337639196e-06, + "loss": 0.1659, + "step": 778500 + }, + { + "epoch": 4.38, + "learning_rate": 6.24923410737674e-06, + "loss": 0.1617, + "step": 778600 + }, + { + "epoch": 4.38, + "learning_rate": 6.243612877114286e-06, + "loss": 0.1632, + "step": 778700 + }, + { + "epoch": 4.38, + "learning_rate": 6.23799164685183e-06, + "loss": 0.1714, + "step": 778800 + }, + { + "epoch": 4.38, + "learning_rate": 6.232370416589376e-06, + "loss": 0.1658, + "step": 778900 + }, + { + "epoch": 4.38, + "learning_rate": 6.2267491863269195e-06, + "loss": 0.1648, + "step": 779000 + }, + { + "epoch": 4.38, + "learning_rate": 6.221127956064465e-06, + "loss": 0.1656, + "step": 779100 + }, + { + "epoch": 4.38, + "learning_rate": 6.2155067258020095e-06, + "loss": 0.16, + "step": 779200 + }, + { + "epoch": 4.38, + "learning_rate": 6.209885495539554e-06, + "loss": 0.1625, + "step": 779300 + }, + { + "epoch": 4.38, + "learning_rate": 6.204264265277099e-06, + "loss": 0.1618, + "step": 779400 + }, + { + "epoch": 4.38, + "learning_rate": 6.198643035014644e-06, + "loss": 0.1649, + "step": 779500 + }, + { + "epoch": 4.38, + "learning_rate": 6.193021804752188e-06, + "loss": 0.1681, + "step": 779600 + }, + { + "epoch": 4.38, + "learning_rate": 6.187400574489733e-06, + "loss": 0.1626, + "step": 779700 + }, + { + "epoch": 4.38, + "learning_rate": 6.181779344227278e-06, + "loss": 0.1643, + "step": 779800 + }, + { + "epoch": 4.38, + "learning_rate": 6.176158113964823e-06, + "loss": 0.1707, + "step": 779900 + }, + { + "epoch": 4.38, + "learning_rate": 6.170593096004992e-06, + "loss": 0.1617, + "step": 780000 + }, + { + "epoch": 4.39, + "learning_rate": 6.164971865742536e-06, + "loss": 0.164, + "step": 780100 + }, + { + "epoch": 4.39, + "learning_rate": 6.159350635480082e-06, + "loss": 0.1655, + "step": 780200 + }, + { + "epoch": 4.39, + "learning_rate": 6.153729405217626e-06, + "loss": 0.168, + "step": 780300 + }, + { + "epoch": 4.39, + "learning_rate": 6.148108174955171e-06, + "loss": 0.1689, + "step": 780400 + }, + { + "epoch": 4.39, + "learning_rate": 6.1424869446927155e-06, + "loss": 0.1602, + "step": 780500 + }, + { + "epoch": 4.39, + "learning_rate": 6.136865714430261e-06, + "loss": 0.1634, + "step": 780600 + }, + { + "epoch": 4.39, + "learning_rate": 6.131244484167805e-06, + "loss": 0.1598, + "step": 780700 + }, + { + "epoch": 4.39, + "learning_rate": 6.125679466207975e-06, + "loss": 0.1641, + "step": 780800 + }, + { + "epoch": 4.39, + "learning_rate": 6.1200582359455195e-06, + "loss": 0.1655, + "step": 780900 + }, + { + "epoch": 4.39, + "learning_rate": 6.114437005683064e-06, + "loss": 0.1619, + "step": 781000 + }, + { + "epoch": 4.39, + "learning_rate": 6.108815775420609e-06, + "loss": 0.1626, + "step": 781100 + }, + { + "epoch": 4.39, + "learning_rate": 6.103194545158153e-06, + "loss": 0.1646, + "step": 781200 + }, + { + "epoch": 4.39, + "learning_rate": 6.097573314895699e-06, + "loss": 0.161, + "step": 781300 + }, + { + "epoch": 4.39, + "learning_rate": 6.091952084633243e-06, + "loss": 0.163, + "step": 781400 + }, + { + "epoch": 4.39, + "learning_rate": 6.086330854370788e-06, + "loss": 0.1639, + "step": 781500 + }, + { + "epoch": 4.39, + "learning_rate": 6.080709624108332e-06, + "loss": 0.163, + "step": 781600 + }, + { + "epoch": 4.39, + "learning_rate": 6.075088393845878e-06, + "loss": 0.1605, + "step": 781700 + }, + { + "epoch": 4.39, + "learning_rate": 6.069467163583422e-06, + "loss": 0.1711, + "step": 781800 + }, + { + "epoch": 4.4, + "learning_rate": 6.063845933320967e-06, + "loss": 0.1638, + "step": 781900 + }, + { + "epoch": 4.4, + "learning_rate": 6.0582247030585116e-06, + "loss": 0.1635, + "step": 782000 + }, + { + "epoch": 4.4, + "learning_rate": 6.052603472796056e-06, + "loss": 0.164, + "step": 782100 + }, + { + "epoch": 4.4, + "learning_rate": 6.0469822425336016e-06, + "loss": 0.1637, + "step": 782200 + }, + { + "epoch": 4.4, + "learning_rate": 6.041361012271146e-06, + "loss": 0.162, + "step": 782300 + }, + { + "epoch": 4.4, + "learning_rate": 6.035739782008691e-06, + "loss": 0.166, + "step": 782400 + }, + { + "epoch": 4.4, + "learning_rate": 6.030118551746235e-06, + "loss": 0.1616, + "step": 782500 + }, + { + "epoch": 4.4, + "learning_rate": 6.024497321483781e-06, + "loss": 0.1667, + "step": 782600 + }, + { + "epoch": 4.4, + "learning_rate": 6.018876091221325e-06, + "loss": 0.165, + "step": 782700 + }, + { + "epoch": 4.4, + "learning_rate": 6.01325486095887e-06, + "loss": 0.1675, + "step": 782800 + }, + { + "epoch": 4.4, + "learning_rate": 6.0076336306964145e-06, + "loss": 0.1665, + "step": 782900 + }, + { + "epoch": 4.4, + "learning_rate": 6.00201240043396e-06, + "loss": 0.1655, + "step": 783000 + }, + { + "epoch": 4.4, + "learning_rate": 5.996447382474128e-06, + "loss": 0.1688, + "step": 783100 + }, + { + "epoch": 4.4, + "learning_rate": 5.990826152211673e-06, + "loss": 0.1633, + "step": 783200 + }, + { + "epoch": 4.4, + "learning_rate": 5.985204921949218e-06, + "loss": 0.1691, + "step": 783300 + }, + { + "epoch": 4.4, + "learning_rate": 5.979583691686763e-06, + "loss": 0.1674, + "step": 783400 + }, + { + "epoch": 4.4, + "learning_rate": 5.973962461424308e-06, + "loss": 0.1661, + "step": 783500 + }, + { + "epoch": 4.4, + "learning_rate": 5.968341231161852e-06, + "loss": 0.1654, + "step": 783600 + }, + { + "epoch": 4.41, + "learning_rate": 5.962720000899398e-06, + "loss": 0.1646, + "step": 783700 + }, + { + "epoch": 4.41, + "learning_rate": 5.957098770636941e-06, + "loss": 0.1619, + "step": 783800 + }, + { + "epoch": 4.41, + "learning_rate": 5.951477540374487e-06, + "loss": 0.1609, + "step": 783900 + }, + { + "epoch": 4.41, + "learning_rate": 5.945856310112031e-06, + "loss": 0.1626, + "step": 784000 + }, + { + "epoch": 4.41, + "learning_rate": 5.940235079849577e-06, + "loss": 0.1624, + "step": 784100 + }, + { + "epoch": 4.41, + "learning_rate": 5.9346138495871205e-06, + "loss": 0.168, + "step": 784200 + }, + { + "epoch": 4.41, + "learning_rate": 5.928992619324666e-06, + "loss": 0.1602, + "step": 784300 + }, + { + "epoch": 4.41, + "learning_rate": 5.9233713890622105e-06, + "loss": 0.1612, + "step": 784400 + }, + { + "epoch": 4.41, + "learning_rate": 5.917750158799755e-06, + "loss": 0.1602, + "step": 784500 + }, + { + "epoch": 4.41, + "learning_rate": 5.9121289285373e-06, + "loss": 0.1678, + "step": 784600 + }, + { + "epoch": 4.41, + "learning_rate": 5.906507698274845e-06, + "loss": 0.1622, + "step": 784700 + }, + { + "epoch": 4.41, + "learning_rate": 5.900886468012389e-06, + "loss": 0.1628, + "step": 784800 + }, + { + "epoch": 4.41, + "learning_rate": 5.895265237749934e-06, + "loss": 0.1678, + "step": 784900 + }, + { + "epoch": 4.41, + "learning_rate": 5.889644007487479e-06, + "loss": 0.1624, + "step": 785000 + }, + { + "epoch": 4.41, + "learning_rate": 5.884022777225024e-06, + "loss": 0.1685, + "step": 785100 + }, + { + "epoch": 4.41, + "learning_rate": 5.878401546962568e-06, + "loss": 0.1663, + "step": 785200 + }, + { + "epoch": 4.41, + "learning_rate": 5.8727803167001135e-06, + "loss": 0.165, + "step": 785300 + }, + { + "epoch": 4.41, + "learning_rate": 5.867159086437658e-06, + "loss": 0.1587, + "step": 785400 + }, + { + "epoch": 4.42, + "learning_rate": 5.861537856175203e-06, + "loss": 0.1592, + "step": 785500 + }, + { + "epoch": 4.42, + "learning_rate": 5.855916625912747e-06, + "loss": 0.1631, + "step": 785600 + }, + { + "epoch": 4.42, + "learning_rate": 5.850295395650293e-06, + "loss": 0.1649, + "step": 785700 + }, + { + "epoch": 4.42, + "learning_rate": 5.844674165387836e-06, + "loss": 0.1643, + "step": 785800 + }, + { + "epoch": 4.42, + "learning_rate": 5.839052935125382e-06, + "loss": 0.1652, + "step": 785900 + }, + { + "epoch": 4.42, + "learning_rate": 5.833431704862926e-06, + "loss": 0.1656, + "step": 786000 + }, + { + "epoch": 4.42, + "learning_rate": 5.827810474600472e-06, + "loss": 0.1662, + "step": 786100 + }, + { + "epoch": 4.42, + "learning_rate": 5.8221892443380156e-06, + "loss": 0.1633, + "step": 786200 + }, + { + "epoch": 4.42, + "learning_rate": 5.816568014075561e-06, + "loss": 0.1653, + "step": 786300 + }, + { + "epoch": 4.42, + "learning_rate": 5.810946783813106e-06, + "loss": 0.1633, + "step": 786400 + }, + { + "epoch": 4.42, + "learning_rate": 5.80532555355065e-06, + "loss": 0.1618, + "step": 786500 + }, + { + "epoch": 4.42, + "learning_rate": 5.799704323288195e-06, + "loss": 0.1658, + "step": 786600 + }, + { + "epoch": 4.42, + "learning_rate": 5.79408309302574e-06, + "loss": 0.1677, + "step": 786700 + }, + { + "epoch": 4.42, + "learning_rate": 5.788461862763284e-06, + "loss": 0.1661, + "step": 786800 + }, + { + "epoch": 4.42, + "learning_rate": 5.782840632500829e-06, + "loss": 0.1685, + "step": 786900 + }, + { + "epoch": 4.42, + "learning_rate": 5.777219402238374e-06, + "loss": 0.1646, + "step": 787000 + }, + { + "epoch": 4.42, + "learning_rate": 5.771654384278543e-06, + "loss": 0.1623, + "step": 787100 + }, + { + "epoch": 4.43, + "learning_rate": 5.766033154016088e-06, + "loss": 0.1675, + "step": 787200 + }, + { + "epoch": 4.43, + "learning_rate": 5.760411923753633e-06, + "loss": 0.161, + "step": 787300 + }, + { + "epoch": 4.43, + "learning_rate": 5.754846905793803e-06, + "loss": 0.1653, + "step": 787400 + }, + { + "epoch": 4.43, + "learning_rate": 5.749225675531347e-06, + "loss": 0.1627, + "step": 787500 + }, + { + "epoch": 4.43, + "learning_rate": 5.743604445268892e-06, + "loss": 0.1599, + "step": 787600 + }, + { + "epoch": 4.43, + "learning_rate": 5.737983215006436e-06, + "loss": 0.165, + "step": 787700 + }, + { + "epoch": 4.43, + "learning_rate": 5.732361984743982e-06, + "loss": 0.1631, + "step": 787800 + }, + { + "epoch": 4.43, + "learning_rate": 5.726740754481526e-06, + "loss": 0.1671, + "step": 787900 + }, + { + "epoch": 4.43, + "learning_rate": 5.721119524219071e-06, + "loss": 0.1642, + "step": 788000 + }, + { + "epoch": 4.43, + "learning_rate": 5.7154982939566155e-06, + "loss": 0.1651, + "step": 788100 + }, + { + "epoch": 4.43, + "learning_rate": 5.709877063694161e-06, + "loss": 0.158, + "step": 788200 + }, + { + "epoch": 4.43, + "learning_rate": 5.704255833431705e-06, + "loss": 0.1639, + "step": 788300 + }, + { + "epoch": 4.43, + "learning_rate": 5.69863460316925e-06, + "loss": 0.1648, + "step": 788400 + }, + { + "epoch": 4.43, + "learning_rate": 5.693013372906795e-06, + "loss": 0.1625, + "step": 788500 + }, + { + "epoch": 4.43, + "learning_rate": 5.687392142644339e-06, + "loss": 0.169, + "step": 788600 + }, + { + "epoch": 4.43, + "learning_rate": 5.681770912381884e-06, + "loss": 0.1579, + "step": 788700 + }, + { + "epoch": 4.43, + "learning_rate": 5.676149682119429e-06, + "loss": 0.1605, + "step": 788800 + }, + { + "epoch": 4.43, + "learning_rate": 5.670528451856974e-06, + "loss": 0.1604, + "step": 788900 + }, + { + "epoch": 4.44, + "learning_rate": 5.6649072215945185e-06, + "loss": 0.1625, + "step": 789000 + }, + { + "epoch": 4.44, + "learning_rate": 5.659285991332063e-06, + "loss": 0.1682, + "step": 789100 + }, + { + "epoch": 4.44, + "learning_rate": 5.6536647610696085e-06, + "loss": 0.1604, + "step": 789200 + }, + { + "epoch": 4.44, + "learning_rate": 5.648043530807152e-06, + "loss": 0.1624, + "step": 789300 + }, + { + "epoch": 4.44, + "learning_rate": 5.642422300544698e-06, + "loss": 0.1628, + "step": 789400 + }, + { + "epoch": 4.44, + "learning_rate": 5.636801070282242e-06, + "loss": 0.1665, + "step": 789500 + }, + { + "epoch": 4.44, + "learning_rate": 5.6312360523224115e-06, + "loss": 0.1678, + "step": 789600 + }, + { + "epoch": 4.44, + "learning_rate": 5.625614822059956e-06, + "loss": 0.165, + "step": 789700 + }, + { + "epoch": 4.44, + "learning_rate": 5.619993591797501e-06, + "loss": 0.1604, + "step": 789800 + }, + { + "epoch": 4.44, + "learning_rate": 5.614372361535046e-06, + "loss": 0.1689, + "step": 789900 + }, + { + "epoch": 4.44, + "learning_rate": 5.608751131272591e-06, + "loss": 0.1612, + "step": 790000 + }, + { + "epoch": 4.44, + "learning_rate": 5.603129901010135e-06, + "loss": 0.1626, + "step": 790100 + }, + { + "epoch": 4.44, + "learning_rate": 5.59750867074768e-06, + "loss": 0.1631, + "step": 790200 + }, + { + "epoch": 4.44, + "learning_rate": 5.591887440485225e-06, + "loss": 0.1589, + "step": 790300 + }, + { + "epoch": 4.44, + "learning_rate": 5.586266210222769e-06, + "loss": 0.1612, + "step": 790400 + }, + { + "epoch": 4.44, + "learning_rate": 5.5806449799603145e-06, + "loss": 0.1653, + "step": 790500 + }, + { + "epoch": 4.44, + "learning_rate": 5.575023749697859e-06, + "loss": 0.1605, + "step": 790600 + }, + { + "epoch": 4.44, + "learning_rate": 5.569402519435404e-06, + "loss": 0.1607, + "step": 790700 + }, + { + "epoch": 4.45, + "learning_rate": 5.563781289172948e-06, + "loss": 0.1681, + "step": 790800 + }, + { + "epoch": 4.45, + "learning_rate": 5.558160058910494e-06, + "loss": 0.1632, + "step": 790900 + }, + { + "epoch": 4.45, + "learning_rate": 5.552538828648038e-06, + "loss": 0.1636, + "step": 791000 + }, + { + "epoch": 4.45, + "learning_rate": 5.546917598385583e-06, + "loss": 0.1626, + "step": 791100 + }, + { + "epoch": 4.45, + "learning_rate": 5.541296368123127e-06, + "loss": 0.165, + "step": 791200 + }, + { + "epoch": 4.45, + "learning_rate": 5.535675137860673e-06, + "loss": 0.1659, + "step": 791300 + }, + { + "epoch": 4.45, + "learning_rate": 5.5300539075982174e-06, + "loss": 0.166, + "step": 791400 + }, + { + "epoch": 4.45, + "learning_rate": 5.524432677335762e-06, + "loss": 0.1634, + "step": 791500 + }, + { + "epoch": 4.45, + "learning_rate": 5.518811447073307e-06, + "loss": 0.1672, + "step": 791600 + }, + { + "epoch": 4.45, + "learning_rate": 5.513190216810851e-06, + "loss": 0.1654, + "step": 791700 + }, + { + "epoch": 4.45, + "learning_rate": 5.507568986548397e-06, + "loss": 0.1653, + "step": 791800 + }, + { + "epoch": 4.45, + "learning_rate": 5.501947756285941e-06, + "loss": 0.1622, + "step": 791900 + }, + { + "epoch": 4.45, + "learning_rate": 5.496326526023486e-06, + "loss": 0.1638, + "step": 792000 + }, + { + "epoch": 4.45, + "learning_rate": 5.49070529576103e-06, + "loss": 0.1638, + "step": 792100 + }, + { + "epoch": 4.45, + "learning_rate": 5.485084065498576e-06, + "loss": 0.1638, + "step": 792200 + }, + { + "epoch": 4.45, + "learning_rate": 5.47946283523612e-06, + "loss": 0.1627, + "step": 792300 + }, + { + "epoch": 4.45, + "learning_rate": 5.473841604973665e-06, + "loss": 0.1622, + "step": 792400 + }, + { + "epoch": 4.45, + "learning_rate": 5.4682203747112095e-06, + "loss": 0.1637, + "step": 792500 + }, + { + "epoch": 4.46, + "learning_rate": 5.462599144448754e-06, + "loss": 0.1695, + "step": 792600 + }, + { + "epoch": 4.46, + "learning_rate": 5.456977914186299e-06, + "loss": 0.1685, + "step": 792700 + }, + { + "epoch": 4.46, + "learning_rate": 5.451356683923844e-06, + "loss": 0.1644, + "step": 792800 + }, + { + "epoch": 4.46, + "learning_rate": 5.445735453661389e-06, + "loss": 0.161, + "step": 792900 + }, + { + "epoch": 4.46, + "learning_rate": 5.440114223398933e-06, + "loss": 0.165, + "step": 793000 + }, + { + "epoch": 4.46, + "learning_rate": 5.434492993136478e-06, + "loss": 0.1621, + "step": 793100 + }, + { + "epoch": 4.46, + "learning_rate": 5.428871762874023e-06, + "loss": 0.1639, + "step": 793200 + }, + { + "epoch": 4.46, + "learning_rate": 5.423250532611568e-06, + "loss": 0.1664, + "step": 793300 + }, + { + "epoch": 4.46, + "learning_rate": 5.4176293023491125e-06, + "loss": 0.1642, + "step": 793400 + }, + { + "epoch": 4.46, + "learning_rate": 5.412008072086657e-06, + "loss": 0.1617, + "step": 793500 + }, + { + "epoch": 4.46, + "learning_rate": 5.4063868418242025e-06, + "loss": 0.1636, + "step": 793600 + }, + { + "epoch": 4.46, + "learning_rate": 5.400821823864372e-06, + "loss": 0.1676, + "step": 793700 + }, + { + "epoch": 4.46, + "learning_rate": 5.3952005936019155e-06, + "loss": 0.1694, + "step": 793800 + }, + { + "epoch": 4.46, + "learning_rate": 5.389579363339461e-06, + "loss": 0.1625, + "step": 793900 + }, + { + "epoch": 4.46, + "learning_rate": 5.3839581330770056e-06, + "loss": 0.1607, + "step": 794000 + }, + { + "epoch": 4.46, + "learning_rate": 5.37833690281455e-06, + "loss": 0.1634, + "step": 794100 + }, + { + "epoch": 4.46, + "learning_rate": 5.372715672552095e-06, + "loss": 0.1634, + "step": 794200 + }, + { + "epoch": 4.46, + "learning_rate": 5.36709444228964e-06, + "loss": 0.164, + "step": 794300 + }, + { + "epoch": 4.47, + "learning_rate": 5.361473212027184e-06, + "loss": 0.1626, + "step": 794400 + }, + { + "epoch": 4.47, + "learning_rate": 5.355851981764729e-06, + "loss": 0.1644, + "step": 794500 + }, + { + "epoch": 4.47, + "learning_rate": 5.350230751502274e-06, + "loss": 0.1681, + "step": 794600 + }, + { + "epoch": 4.47, + "learning_rate": 5.344609521239819e-06, + "loss": 0.1659, + "step": 794700 + }, + { + "epoch": 4.47, + "learning_rate": 5.338988290977363e-06, + "loss": 0.1667, + "step": 794800 + }, + { + "epoch": 4.47, + "learning_rate": 5.3333670607149085e-06, + "loss": 0.1675, + "step": 794900 + }, + { + "epoch": 4.47, + "learning_rate": 5.327745830452453e-06, + "loss": 0.1623, + "step": 795000 + }, + { + "epoch": 4.47, + "learning_rate": 5.322124600189998e-06, + "loss": 0.1696, + "step": 795100 + }, + { + "epoch": 4.47, + "learning_rate": 5.316503369927542e-06, + "loss": 0.1569, + "step": 795200 + }, + { + "epoch": 4.47, + "learning_rate": 5.310882139665088e-06, + "loss": 0.1576, + "step": 795300 + }, + { + "epoch": 4.47, + "learning_rate": 5.305260909402632e-06, + "loss": 0.1661, + "step": 795400 + }, + { + "epoch": 4.47, + "learning_rate": 5.299639679140177e-06, + "loss": 0.1645, + "step": 795500 + }, + { + "epoch": 4.47, + "learning_rate": 5.2940184488777214e-06, + "loss": 0.1632, + "step": 795600 + }, + { + "epoch": 4.47, + "learning_rate": 5.288397218615267e-06, + "loss": 0.159, + "step": 795700 + }, + { + "epoch": 4.47, + "learning_rate": 5.282775988352811e-06, + "loss": 0.1667, + "step": 795800 + }, + { + "epoch": 4.47, + "learning_rate": 5.277154758090356e-06, + "loss": 0.1614, + "step": 795900 + }, + { + "epoch": 4.47, + "learning_rate": 5.271533527827901e-06, + "loss": 0.1652, + "step": 796000 + }, + { + "epoch": 4.48, + "learning_rate": 5.265912297565445e-06, + "loss": 0.1671, + "step": 796100 + }, + { + "epoch": 4.48, + "learning_rate": 5.26029106730299e-06, + "loss": 0.1664, + "step": 796200 + }, + { + "epoch": 4.48, + "learning_rate": 5.254669837040535e-06, + "loss": 0.1666, + "step": 796300 + }, + { + "epoch": 4.48, + "learning_rate": 5.24904860677808e-06, + "loss": 0.1638, + "step": 796400 + }, + { + "epoch": 4.48, + "learning_rate": 5.243427376515624e-06, + "loss": 0.1619, + "step": 796500 + }, + { + "epoch": 4.48, + "learning_rate": 5.237862358555794e-06, + "loss": 0.1621, + "step": 796600 + }, + { + "epoch": 4.48, + "learning_rate": 5.232241128293338e-06, + "loss": 0.1651, + "step": 796700 + }, + { + "epoch": 4.48, + "learning_rate": 5.226619898030883e-06, + "loss": 0.1656, + "step": 796800 + }, + { + "epoch": 4.48, + "learning_rate": 5.220998667768428e-06, + "loss": 0.1622, + "step": 796900 + }, + { + "epoch": 4.48, + "learning_rate": 5.215377437505973e-06, + "loss": 0.1653, + "step": 797000 + }, + { + "epoch": 4.48, + "learning_rate": 5.2097562072435175e-06, + "loss": 0.162, + "step": 797100 + }, + { + "epoch": 4.48, + "learning_rate": 5.204134976981062e-06, + "loss": 0.1641, + "step": 797200 + }, + { + "epoch": 4.48, + "learning_rate": 5.1985137467186075e-06, + "loss": 0.1655, + "step": 797300 + }, + { + "epoch": 4.48, + "learning_rate": 5.192892516456152e-06, + "loss": 0.1579, + "step": 797400 + }, + { + "epoch": 4.48, + "learning_rate": 5.187271286193697e-06, + "loss": 0.1696, + "step": 797500 + }, + { + "epoch": 4.48, + "learning_rate": 5.181650055931241e-06, + "loss": 0.166, + "step": 797600 + }, + { + "epoch": 4.48, + "learning_rate": 5.176028825668787e-06, + "loss": 0.1641, + "step": 797700 + }, + { + "epoch": 4.48, + "learning_rate": 5.170407595406331e-06, + "loss": 0.1664, + "step": 797800 + }, + { + "epoch": 4.49, + "learning_rate": 5.164786365143876e-06, + "loss": 0.1591, + "step": 797900 + }, + { + "epoch": 4.49, + "learning_rate": 5.15916513488142e-06, + "loss": 0.1617, + "step": 798000 + }, + { + "epoch": 4.49, + "learning_rate": 5.153543904618966e-06, + "loss": 0.163, + "step": 798100 + }, + { + "epoch": 4.49, + "learning_rate": 5.1479226743565096e-06, + "loss": 0.1675, + "step": 798200 + }, + { + "epoch": 4.49, + "learning_rate": 5.142301444094055e-06, + "loss": 0.1624, + "step": 798300 + }, + { + "epoch": 4.49, + "learning_rate": 5.136736426134224e-06, + "loss": 0.164, + "step": 798400 + }, + { + "epoch": 4.49, + "learning_rate": 5.131115195871769e-06, + "loss": 0.1625, + "step": 798500 + }, + { + "epoch": 4.49, + "learning_rate": 5.1254939656093135e-06, + "loss": 0.1667, + "step": 798600 + }, + { + "epoch": 4.49, + "learning_rate": 5.119872735346858e-06, + "loss": 0.1601, + "step": 798700 + }, + { + "epoch": 4.49, + "learning_rate": 5.1142515050844035e-06, + "loss": 0.1638, + "step": 798800 + }, + { + "epoch": 4.49, + "learning_rate": 5.108630274821947e-06, + "loss": 0.1673, + "step": 798900 + }, + { + "epoch": 4.49, + "learning_rate": 5.103009044559493e-06, + "loss": 0.1691, + "step": 799000 + }, + { + "epoch": 4.49, + "learning_rate": 5.097387814297037e-06, + "loss": 0.1666, + "step": 799100 + }, + { + "epoch": 4.49, + "learning_rate": 5.091766584034583e-06, + "loss": 0.1681, + "step": 799200 + }, + { + "epoch": 4.49, + "learning_rate": 5.086145353772126e-06, + "loss": 0.1619, + "step": 799300 + }, + { + "epoch": 4.49, + "learning_rate": 5.080524123509672e-06, + "loss": 0.167, + "step": 799400 + }, + { + "epoch": 4.49, + "learning_rate": 5.0749028932472164e-06, + "loss": 0.1662, + "step": 799500 + }, + { + "epoch": 4.49, + "learning_rate": 5.069281662984761e-06, + "loss": 0.1639, + "step": 799600 + }, + { + "epoch": 4.5, + "learning_rate": 5.063660432722306e-06, + "loss": 0.1618, + "step": 799700 + }, + { + "epoch": 4.5, + "learning_rate": 5.058039202459851e-06, + "loss": 0.1638, + "step": 799800 + }, + { + "epoch": 4.5, + "learning_rate": 5.052417972197395e-06, + "loss": 0.1691, + "step": 799900 + }, + { + "epoch": 4.5, + "learning_rate": 5.04679674193494e-06, + "loss": 0.1617, + "step": 800000 + }, + { + "epoch": 4.5, + "learning_rate": 5.041175511672485e-06, + "loss": 0.1621, + "step": 800100 + }, + { + "epoch": 4.5, + "learning_rate": 5.03555428141003e-06, + "loss": 0.1584, + "step": 800200 + }, + { + "epoch": 4.5, + "learning_rate": 5.029933051147574e-06, + "loss": 0.1636, + "step": 800300 + }, + { + "epoch": 4.5, + "learning_rate": 5.024311820885119e-06, + "loss": 0.1698, + "step": 800400 + }, + { + "epoch": 4.5, + "learning_rate": 5.018690590622664e-06, + "loss": 0.1688, + "step": 800500 + }, + { + "epoch": 4.5, + "learning_rate": 5.0130693603602085e-06, + "loss": 0.1644, + "step": 800600 + }, + { + "epoch": 4.5, + "learning_rate": 5.007448130097753e-06, + "loss": 0.1625, + "step": 800700 + }, + { + "epoch": 4.5, + "learning_rate": 5.0018268998352985e-06, + "loss": 0.1637, + "step": 800800 + }, + { + "epoch": 4.5, + "learning_rate": 4.996205669572842e-06, + "loss": 0.1625, + "step": 800900 + }, + { + "epoch": 4.5, + "learning_rate": 4.990584439310388e-06, + "loss": 0.1608, + "step": 801000 + }, + { + "epoch": 4.5, + "learning_rate": 4.984963209047932e-06, + "loss": 0.1651, + "step": 801100 + }, + { + "epoch": 4.5, + "learning_rate": 4.979341978785478e-06, + "loss": 0.1673, + "step": 801200 + }, + { + "epoch": 4.5, + "learning_rate": 4.9737207485230215e-06, + "loss": 0.1634, + "step": 801300 + }, + { + "epoch": 4.5, + "learning_rate": 4.968099518260567e-06, + "loss": 0.1586, + "step": 801400 + }, + { + "epoch": 4.51, + "learning_rate": 4.9624782879981115e-06, + "loss": 0.1633, + "step": 801500 + }, + { + "epoch": 4.51, + "learning_rate": 4.956857057735656e-06, + "loss": 0.1651, + "step": 801600 + }, + { + "epoch": 4.51, + "learning_rate": 4.951235827473201e-06, + "loss": 0.1634, + "step": 801700 + }, + { + "epoch": 4.51, + "learning_rate": 4.945614597210746e-06, + "loss": 0.1623, + "step": 801800 + }, + { + "epoch": 4.51, + "learning_rate": 4.93999336694829e-06, + "loss": 0.167, + "step": 801900 + }, + { + "epoch": 4.51, + "learning_rate": 4.934372136685835e-06, + "loss": 0.1595, + "step": 802000 + }, + { + "epoch": 4.51, + "learning_rate": 4.92875090642338e-06, + "loss": 0.1602, + "step": 802100 + }, + { + "epoch": 4.51, + "learning_rate": 4.923129676160925e-06, + "loss": 0.1685, + "step": 802200 + }, + { + "epoch": 4.51, + "learning_rate": 4.917508445898469e-06, + "loss": 0.1593, + "step": 802300 + }, + { + "epoch": 4.51, + "learning_rate": 4.911887215636014e-06, + "loss": 0.1625, + "step": 802400 + }, + { + "epoch": 4.51, + "learning_rate": 4.906265985373559e-06, + "loss": 0.1549, + "step": 802500 + }, + { + "epoch": 4.51, + "learning_rate": 4.900700967413728e-06, + "loss": 0.167, + "step": 802600 + }, + { + "epoch": 4.51, + "learning_rate": 4.895079737151273e-06, + "loss": 0.1687, + "step": 802700 + }, + { + "epoch": 4.51, + "learning_rate": 4.889458506888818e-06, + "loss": 0.1644, + "step": 802800 + }, + { + "epoch": 4.51, + "learning_rate": 4.883837276626363e-06, + "loss": 0.1595, + "step": 802900 + }, + { + "epoch": 4.51, + "learning_rate": 4.8782160463639075e-06, + "loss": 0.1634, + "step": 803000 + }, + { + "epoch": 4.51, + "learning_rate": 4.872594816101452e-06, + "loss": 0.162, + "step": 803100 + }, + { + "epoch": 4.51, + "learning_rate": 4.8669735858389975e-06, + "loss": 0.1638, + "step": 803200 + }, + { + "epoch": 4.52, + "learning_rate": 4.861352355576541e-06, + "loss": 0.157, + "step": 803300 + }, + { + "epoch": 4.52, + "learning_rate": 4.855731125314087e-06, + "loss": 0.1671, + "step": 803400 + }, + { + "epoch": 4.52, + "learning_rate": 4.850109895051631e-06, + "loss": 0.1607, + "step": 803500 + }, + { + "epoch": 4.52, + "learning_rate": 4.844488664789177e-06, + "loss": 0.1627, + "step": 803600 + }, + { + "epoch": 4.52, + "learning_rate": 4.8388674345267204e-06, + "loss": 0.1613, + "step": 803700 + }, + { + "epoch": 4.52, + "learning_rate": 4.833246204264266e-06, + "loss": 0.1667, + "step": 803800 + }, + { + "epoch": 4.52, + "learning_rate": 4.8276249740018104e-06, + "loss": 0.167, + "step": 803900 + }, + { + "epoch": 4.52, + "learning_rate": 4.822003743739355e-06, + "loss": 0.1675, + "step": 804000 + }, + { + "epoch": 4.52, + "learning_rate": 4.8163825134769e-06, + "loss": 0.1632, + "step": 804100 + }, + { + "epoch": 4.52, + "learning_rate": 4.810761283214445e-06, + "loss": 0.1661, + "step": 804200 + }, + { + "epoch": 4.52, + "learning_rate": 4.805140052951989e-06, + "loss": 0.1634, + "step": 804300 + }, + { + "epoch": 4.52, + "learning_rate": 4.799518822689534e-06, + "loss": 0.1614, + "step": 804400 + }, + { + "epoch": 4.52, + "learning_rate": 4.793897592427079e-06, + "loss": 0.1619, + "step": 804500 + }, + { + "epoch": 4.52, + "learning_rate": 4.788276362164624e-06, + "loss": 0.1643, + "step": 804600 + }, + { + "epoch": 4.52, + "learning_rate": 4.782711344204793e-06, + "loss": 0.1659, + "step": 804700 + }, + { + "epoch": 4.52, + "learning_rate": 4.777090113942337e-06, + "loss": 0.1639, + "step": 804800 + }, + { + "epoch": 4.52, + "learning_rate": 4.771468883679883e-06, + "loss": 0.158, + "step": 804900 + }, + { + "epoch": 4.53, + "learning_rate": 4.765847653417427e-06, + "loss": 0.162, + "step": 805000 + }, + { + "epoch": 4.53, + "learning_rate": 4.760226423154972e-06, + "loss": 0.1634, + "step": 805100 + }, + { + "epoch": 4.53, + "learning_rate": 4.7546051928925165e-06, + "loss": 0.1625, + "step": 805200 + }, + { + "epoch": 4.53, + "learning_rate": 4.748983962630062e-06, + "loss": 0.163, + "step": 805300 + }, + { + "epoch": 4.53, + "learning_rate": 4.743362732367606e-06, + "loss": 0.161, + "step": 805400 + }, + { + "epoch": 4.53, + "learning_rate": 4.737741502105151e-06, + "loss": 0.1628, + "step": 805500 + }, + { + "epoch": 4.53, + "learning_rate": 4.732120271842696e-06, + "loss": 0.1657, + "step": 805600 + }, + { + "epoch": 4.53, + "learning_rate": 4.72649904158024e-06, + "loss": 0.1613, + "step": 805700 + }, + { + "epoch": 4.53, + "learning_rate": 4.720877811317785e-06, + "loss": 0.161, + "step": 805800 + }, + { + "epoch": 4.53, + "learning_rate": 4.71525658105533e-06, + "loss": 0.1698, + "step": 805900 + }, + { + "epoch": 4.53, + "learning_rate": 4.709635350792875e-06, + "loss": 0.164, + "step": 806000 + }, + { + "epoch": 4.53, + "learning_rate": 4.704014120530419e-06, + "loss": 0.1714, + "step": 806100 + }, + { + "epoch": 4.53, + "learning_rate": 4.698392890267964e-06, + "loss": 0.165, + "step": 806200 + }, + { + "epoch": 4.53, + "learning_rate": 4.692771660005509e-06, + "loss": 0.1654, + "step": 806300 + }, + { + "epoch": 4.53, + "learning_rate": 4.687150429743053e-06, + "loss": 0.1587, + "step": 806400 + }, + { + "epoch": 4.53, + "learning_rate": 4.681529199480599e-06, + "loss": 0.1617, + "step": 806500 + }, + { + "epoch": 4.53, + "learning_rate": 4.675907969218143e-06, + "loss": 0.1578, + "step": 806600 + }, + { + "epoch": 4.53, + "learning_rate": 4.670286738955689e-06, + "loss": 0.1613, + "step": 806700 + }, + { + "epoch": 4.54, + "learning_rate": 4.664665508693232e-06, + "loss": 0.1582, + "step": 806800 + }, + { + "epoch": 4.54, + "learning_rate": 4.659044278430778e-06, + "loss": 0.1645, + "step": 806900 + }, + { + "epoch": 4.54, + "learning_rate": 4.653423048168322e-06, + "loss": 0.1639, + "step": 807000 + }, + { + "epoch": 4.54, + "learning_rate": 4.647801817905867e-06, + "loss": 0.1646, + "step": 807100 + }, + { + "epoch": 4.54, + "learning_rate": 4.6421805876434115e-06, + "loss": 0.1548, + "step": 807200 + }, + { + "epoch": 4.54, + "learning_rate": 4.636615569683582e-06, + "loss": 0.1639, + "step": 807300 + }, + { + "epoch": 4.54, + "learning_rate": 4.630994339421126e-06, + "loss": 0.1666, + "step": 807400 + }, + { + "epoch": 4.54, + "learning_rate": 4.625373109158671e-06, + "loss": 0.1623, + "step": 807500 + }, + { + "epoch": 4.54, + "learning_rate": 4.6197518788962154e-06, + "loss": 0.1647, + "step": 807600 + }, + { + "epoch": 4.54, + "learning_rate": 4.61413064863376e-06, + "loss": 0.1671, + "step": 807700 + }, + { + "epoch": 4.54, + "learning_rate": 4.608509418371305e-06, + "loss": 0.1617, + "step": 807800 + }, + { + "epoch": 4.54, + "learning_rate": 4.60288818810885e-06, + "loss": 0.1588, + "step": 807900 + }, + { + "epoch": 4.54, + "learning_rate": 4.597266957846395e-06, + "loss": 0.1636, + "step": 808000 + }, + { + "epoch": 4.54, + "learning_rate": 4.591645727583939e-06, + "loss": 0.1644, + "step": 808100 + }, + { + "epoch": 4.54, + "learning_rate": 4.586024497321484e-06, + "loss": 0.1652, + "step": 808200 + }, + { + "epoch": 4.54, + "learning_rate": 4.580403267059029e-06, + "loss": 0.1605, + "step": 808300 + }, + { + "epoch": 4.54, + "learning_rate": 4.574782036796574e-06, + "loss": 0.1692, + "step": 808400 + }, + { + "epoch": 4.54, + "learning_rate": 4.569160806534118e-06, + "loss": 0.1618, + "step": 808500 + }, + { + "epoch": 4.55, + "learning_rate": 4.563539576271663e-06, + "loss": 0.1651, + "step": 808600 + }, + { + "epoch": 4.55, + "learning_rate": 4.557918346009208e-06, + "loss": 0.165, + "step": 808700 + }, + { + "epoch": 4.55, + "learning_rate": 4.552297115746752e-06, + "loss": 0.1595, + "step": 808800 + }, + { + "epoch": 4.55, + "learning_rate": 4.5466758854842975e-06, + "loss": 0.1565, + "step": 808900 + }, + { + "epoch": 4.55, + "learning_rate": 4.541054655221842e-06, + "loss": 0.17, + "step": 809000 + }, + { + "epoch": 4.55, + "learning_rate": 4.5354334249593876e-06, + "loss": 0.1589, + "step": 809100 + }, + { + "epoch": 4.55, + "learning_rate": 4.529812194696931e-06, + "loss": 0.1727, + "step": 809200 + }, + { + "epoch": 4.55, + "learning_rate": 4.524190964434477e-06, + "loss": 0.1642, + "step": 809300 + }, + { + "epoch": 4.55, + "learning_rate": 4.518569734172021e-06, + "loss": 0.1585, + "step": 809400 + }, + { + "epoch": 4.55, + "learning_rate": 4.512948503909566e-06, + "loss": 0.1697, + "step": 809500 + }, + { + "epoch": 4.55, + "learning_rate": 4.5073272736471105e-06, + "loss": 0.165, + "step": 809600 + }, + { + "epoch": 4.55, + "learning_rate": 4.501706043384656e-06, + "loss": 0.1607, + "step": 809700 + }, + { + "epoch": 4.55, + "learning_rate": 4.4960848131222e-06, + "loss": 0.1608, + "step": 809800 + }, + { + "epoch": 4.55, + "learning_rate": 4.490519795162369e-06, + "loss": 0.163, + "step": 809900 + }, + { + "epoch": 4.55, + "learning_rate": 4.484898564899914e-06, + "loss": 0.1658, + "step": 810000 + }, + { + "epoch": 4.55, + "learning_rate": 4.479277334637459e-06, + "loss": 0.1649, + "step": 810100 + }, + { + "epoch": 4.55, + "learning_rate": 4.4736561043750036e-06, + "loss": 0.1673, + "step": 810200 + }, + { + "epoch": 4.55, + "learning_rate": 4.468034874112548e-06, + "loss": 0.1615, + "step": 810300 + }, + { + "epoch": 4.56, + "learning_rate": 4.4624136438500936e-06, + "loss": 0.1661, + "step": 810400 + }, + { + "epoch": 4.56, + "learning_rate": 4.456792413587637e-06, + "loss": 0.1618, + "step": 810500 + }, + { + "epoch": 4.56, + "learning_rate": 4.451171183325183e-06, + "loss": 0.1651, + "step": 810600 + }, + { + "epoch": 4.56, + "learning_rate": 4.445549953062727e-06, + "loss": 0.1623, + "step": 810700 + }, + { + "epoch": 4.56, + "learning_rate": 4.439928722800273e-06, + "loss": 0.1662, + "step": 810800 + }, + { + "epoch": 4.56, + "learning_rate": 4.4343074925378165e-06, + "loss": 0.1622, + "step": 810900 + }, + { + "epoch": 4.56, + "learning_rate": 4.428686262275362e-06, + "loss": 0.1678, + "step": 811000 + }, + { + "epoch": 4.56, + "learning_rate": 4.4230650320129065e-06, + "loss": 0.1614, + "step": 811100 + }, + { + "epoch": 4.56, + "learning_rate": 4.417443801750451e-06, + "loss": 0.1663, + "step": 811200 + }, + { + "epoch": 4.56, + "learning_rate": 4.411822571487996e-06, + "loss": 0.1648, + "step": 811300 + }, + { + "epoch": 4.56, + "learning_rate": 4.406201341225541e-06, + "loss": 0.1621, + "step": 811400 + }, + { + "epoch": 4.56, + "learning_rate": 4.400580110963086e-06, + "loss": 0.1619, + "step": 811500 + }, + { + "epoch": 4.56, + "learning_rate": 4.39495888070063e-06, + "loss": 0.1639, + "step": 811600 + }, + { + "epoch": 4.56, + "learning_rate": 4.389337650438175e-06, + "loss": 0.1639, + "step": 811700 + }, + { + "epoch": 4.56, + "learning_rate": 4.38371642017572e-06, + "loss": 0.1697, + "step": 811800 + }, + { + "epoch": 4.56, + "learning_rate": 4.378151402215889e-06, + "loss": 0.1682, + "step": 811900 + }, + { + "epoch": 4.56, + "learning_rate": 4.372530171953434e-06, + "loss": 0.1647, + "step": 812000 + }, + { + "epoch": 4.57, + "learning_rate": 4.366908941690979e-06, + "loss": 0.1649, + "step": 812100 + }, + { + "epoch": 4.57, + "learning_rate": 4.361287711428523e-06, + "loss": 0.1646, + "step": 812200 + }, + { + "epoch": 4.57, + "learning_rate": 4.355666481166068e-06, + "loss": 0.1666, + "step": 812300 + }, + { + "epoch": 4.57, + "learning_rate": 4.350045250903613e-06, + "loss": 0.1629, + "step": 812400 + }, + { + "epoch": 4.57, + "learning_rate": 4.344424020641158e-06, + "loss": 0.1648, + "step": 812500 + }, + { + "epoch": 4.57, + "learning_rate": 4.3388027903787025e-06, + "loss": 0.1661, + "step": 812600 + }, + { + "epoch": 4.57, + "learning_rate": 4.333181560116247e-06, + "loss": 0.1649, + "step": 812700 + }, + { + "epoch": 4.57, + "learning_rate": 4.3275603298537925e-06, + "loss": 0.1636, + "step": 812800 + }, + { + "epoch": 4.57, + "learning_rate": 4.321939099591337e-06, + "loss": 0.1625, + "step": 812900 + }, + { + "epoch": 4.57, + "learning_rate": 4.316317869328882e-06, + "loss": 0.1648, + "step": 813000 + }, + { + "epoch": 4.57, + "learning_rate": 4.310696639066426e-06, + "loss": 0.1621, + "step": 813100 + }, + { + "epoch": 4.57, + "learning_rate": 4.305075408803972e-06, + "loss": 0.1706, + "step": 813200 + }, + { + "epoch": 4.57, + "learning_rate": 4.2994541785415155e-06, + "loss": 0.1609, + "step": 813300 + }, + { + "epoch": 4.57, + "learning_rate": 4.293832948279061e-06, + "loss": 0.1593, + "step": 813400 + }, + { + "epoch": 4.57, + "learning_rate": 4.2882117180166055e-06, + "loss": 0.166, + "step": 813500 + }, + { + "epoch": 4.57, + "learning_rate": 4.28259048775415e-06, + "loss": 0.1667, + "step": 813600 + }, + { + "epoch": 4.57, + "learning_rate": 4.276969257491695e-06, + "loss": 0.163, + "step": 813700 + }, + { + "epoch": 4.57, + "learning_rate": 4.27134802722924e-06, + "loss": 0.1617, + "step": 813800 + }, + { + "epoch": 4.58, + "learning_rate": 4.265783009269409e-06, + "loss": 0.171, + "step": 813900 + }, + { + "epoch": 4.58, + "learning_rate": 4.260161779006953e-06, + "loss": 0.1638, + "step": 814000 + }, + { + "epoch": 4.58, + "learning_rate": 4.2545405487444986e-06, + "loss": 0.1574, + "step": 814100 + }, + { + "epoch": 4.58, + "learning_rate": 4.248919318482043e-06, + "loss": 0.1628, + "step": 814200 + }, + { + "epoch": 4.58, + "learning_rate": 4.243298088219588e-06, + "loss": 0.1619, + "step": 814300 + }, + { + "epoch": 4.58, + "learning_rate": 4.237676857957132e-06, + "loss": 0.1649, + "step": 814400 + }, + { + "epoch": 4.58, + "learning_rate": 4.232055627694678e-06, + "loss": 0.1675, + "step": 814500 + }, + { + "epoch": 4.58, + "learning_rate": 4.226434397432222e-06, + "loss": 0.1652, + "step": 814600 + }, + { + "epoch": 4.58, + "learning_rate": 4.220869379472392e-06, + "loss": 0.1667, + "step": 814700 + }, + { + "epoch": 4.58, + "learning_rate": 4.215248149209936e-06, + "loss": 0.1686, + "step": 814800 + }, + { + "epoch": 4.58, + "learning_rate": 4.209626918947481e-06, + "loss": 0.1608, + "step": 814900 + }, + { + "epoch": 4.58, + "learning_rate": 4.204005688685026e-06, + "loss": 0.161, + "step": 815000 + }, + { + "epoch": 4.58, + "learning_rate": 4.19838445842257e-06, + "loss": 0.1586, + "step": 815100 + }, + { + "epoch": 4.58, + "learning_rate": 4.192763228160115e-06, + "loss": 0.1691, + "step": 815200 + }, + { + "epoch": 4.58, + "learning_rate": 4.18714199789766e-06, + "loss": 0.1628, + "step": 815300 + }, + { + "epoch": 4.58, + "learning_rate": 4.1815207676352046e-06, + "loss": 0.1638, + "step": 815400 + }, + { + "epoch": 4.58, + "learning_rate": 4.175899537372749e-06, + "loss": 0.1599, + "step": 815500 + }, + { + "epoch": 4.58, + "learning_rate": 4.170278307110295e-06, + "loss": 0.1681, + "step": 815600 + }, + { + "epoch": 4.59, + "learning_rate": 4.164657076847839e-06, + "loss": 0.1644, + "step": 815700 + }, + { + "epoch": 4.59, + "learning_rate": 4.159035846585384e-06, + "loss": 0.1653, + "step": 815800 + }, + { + "epoch": 4.59, + "learning_rate": 4.153414616322928e-06, + "loss": 0.1646, + "step": 815900 + }, + { + "epoch": 4.59, + "learning_rate": 4.147793386060474e-06, + "loss": 0.1595, + "step": 816000 + }, + { + "epoch": 4.59, + "learning_rate": 4.142172155798018e-06, + "loss": 0.1598, + "step": 816100 + }, + { + "epoch": 4.59, + "learning_rate": 4.136550925535563e-06, + "loss": 0.1608, + "step": 816200 + }, + { + "epoch": 4.59, + "learning_rate": 4.1309296952731075e-06, + "loss": 0.1626, + "step": 816300 + }, + { + "epoch": 4.59, + "learning_rate": 4.125308465010652e-06, + "loss": 0.1637, + "step": 816400 + }, + { + "epoch": 4.59, + "learning_rate": 4.1196872347481975e-06, + "loss": 0.1638, + "step": 816500 + }, + { + "epoch": 4.59, + "learning_rate": 4.114066004485742e-06, + "loss": 0.1648, + "step": 816600 + }, + { + "epoch": 4.59, + "learning_rate": 4.108444774223287e-06, + "loss": 0.1649, + "step": 816700 + }, + { + "epoch": 4.59, + "learning_rate": 4.102823543960831e-06, + "loss": 0.1701, + "step": 816800 + }, + { + "epoch": 4.59, + "learning_rate": 4.097202313698377e-06, + "loss": 0.1664, + "step": 816900 + }, + { + "epoch": 4.59, + "learning_rate": 4.091581083435921e-06, + "loss": 0.1629, + "step": 817000 + }, + { + "epoch": 4.59, + "learning_rate": 4.085959853173466e-06, + "loss": 0.1601, + "step": 817100 + }, + { + "epoch": 4.59, + "learning_rate": 4.0803386229110105e-06, + "loss": 0.1594, + "step": 817200 + }, + { + "epoch": 4.59, + "learning_rate": 4.074717392648555e-06, + "loss": 0.1633, + "step": 817300 + }, + { + "epoch": 4.59, + "learning_rate": 4.0690961623861e-06, + "loss": 0.166, + "step": 817400 + }, + { + "epoch": 4.6, + "learning_rate": 4.063474932123645e-06, + "loss": 0.1602, + "step": 817500 + }, + { + "epoch": 4.6, + "learning_rate": 4.05785370186119e-06, + "loss": 0.1619, + "step": 817600 + }, + { + "epoch": 4.6, + "learning_rate": 4.052232471598734e-06, + "loss": 0.1658, + "step": 817700 + }, + { + "epoch": 4.6, + "learning_rate": 4.046611241336279e-06, + "loss": 0.163, + "step": 817800 + }, + { + "epoch": 4.6, + "learning_rate": 4.040990011073824e-06, + "loss": 0.1636, + "step": 817900 + }, + { + "epoch": 4.6, + "learning_rate": 4.035368780811369e-06, + "loss": 0.161, + "step": 818000 + }, + { + "epoch": 4.6, + "learning_rate": 4.029747550548913e-06, + "loss": 0.1665, + "step": 818100 + }, + { + "epoch": 4.6, + "learning_rate": 4.024126320286458e-06, + "loss": 0.1657, + "step": 818200 + }, + { + "epoch": 4.6, + "learning_rate": 4.018561302326627e-06, + "loss": 0.1652, + "step": 818300 + }, + { + "epoch": 4.6, + "learning_rate": 4.012940072064173e-06, + "loss": 0.1644, + "step": 818400 + }, + { + "epoch": 4.6, + "learning_rate": 4.0073188418017165e-06, + "loss": 0.1618, + "step": 818500 + }, + { + "epoch": 4.6, + "learning_rate": 4.001697611539262e-06, + "loss": 0.1627, + "step": 818600 + }, + { + "epoch": 4.6, + "learning_rate": 3.9960763812768065e-06, + "loss": 0.1632, + "step": 818700 + }, + { + "epoch": 4.6, + "learning_rate": 3.990455151014351e-06, + "loss": 0.165, + "step": 818800 + }, + { + "epoch": 4.6, + "learning_rate": 3.984833920751896e-06, + "loss": 0.1644, + "step": 818900 + }, + { + "epoch": 4.6, + "learning_rate": 3.979212690489441e-06, + "loss": 0.1658, + "step": 819000 + }, + { + "epoch": 4.6, + "learning_rate": 3.973591460226986e-06, + "loss": 0.1677, + "step": 819100 + }, + { + "epoch": 4.6, + "learning_rate": 3.96797022996453e-06, + "loss": 0.1658, + "step": 819200 + }, + { + "epoch": 4.61, + "learning_rate": 3.962348999702075e-06, + "loss": 0.1599, + "step": 819300 + }, + { + "epoch": 4.61, + "learning_rate": 3.95672776943962e-06, + "loss": 0.1632, + "step": 819400 + }, + { + "epoch": 4.61, + "learning_rate": 3.951106539177164e-06, + "loss": 0.1592, + "step": 819500 + }, + { + "epoch": 4.61, + "learning_rate": 3.9454853089147094e-06, + "loss": 0.1626, + "step": 819600 + }, + { + "epoch": 4.61, + "learning_rate": 3.939864078652254e-06, + "loss": 0.1544, + "step": 819700 + }, + { + "epoch": 4.61, + "learning_rate": 3.934242848389799e-06, + "loss": 0.1602, + "step": 819800 + }, + { + "epoch": 4.61, + "learning_rate": 3.928621618127343e-06, + "loss": 0.1677, + "step": 819900 + }, + { + "epoch": 4.61, + "learning_rate": 3.923000387864889e-06, + "loss": 0.1617, + "step": 820000 + }, + { + "epoch": 4.61, + "learning_rate": 3.917379157602433e-06, + "loss": 0.1626, + "step": 820100 + }, + { + "epoch": 4.61, + "learning_rate": 3.911757927339978e-06, + "loss": 0.1586, + "step": 820200 + }, + { + "epoch": 4.61, + "learning_rate": 3.906136697077522e-06, + "loss": 0.1593, + "step": 820300 + }, + { + "epoch": 4.61, + "learning_rate": 3.900515466815068e-06, + "loss": 0.1612, + "step": 820400 + }, + { + "epoch": 4.61, + "learning_rate": 3.8948942365526115e-06, + "loss": 0.1669, + "step": 820500 + }, + { + "epoch": 4.61, + "learning_rate": 3.889273006290157e-06, + "loss": 0.1606, + "step": 820600 + }, + { + "epoch": 4.61, + "learning_rate": 3.8836517760277015e-06, + "loss": 0.1644, + "step": 820700 + }, + { + "epoch": 4.61, + "learning_rate": 3.878030545765246e-06, + "loss": 0.17, + "step": 820800 + }, + { + "epoch": 4.61, + "learning_rate": 3.872409315502791e-06, + "loss": 0.1609, + "step": 820900 + }, + { + "epoch": 4.62, + "learning_rate": 3.866788085240336e-06, + "loss": 0.1602, + "step": 821000 + }, + { + "epoch": 4.62, + "learning_rate": 3.861166854977881e-06, + "loss": 0.1602, + "step": 821100 + }, + { + "epoch": 4.62, + "learning_rate": 3.855545624715425e-06, + "loss": 0.1688, + "step": 821200 + }, + { + "epoch": 4.62, + "learning_rate": 3.84992439445297e-06, + "loss": 0.164, + "step": 821300 + }, + { + "epoch": 4.62, + "learning_rate": 3.844303164190515e-06, + "loss": 0.1656, + "step": 821400 + }, + { + "epoch": 4.62, + "learning_rate": 3.838681933928059e-06, + "loss": 0.1628, + "step": 821500 + }, + { + "epoch": 4.62, + "learning_rate": 3.8330607036656045e-06, + "loss": 0.1606, + "step": 821600 + }, + { + "epoch": 4.62, + "learning_rate": 3.827439473403149e-06, + "loss": 0.1666, + "step": 821700 + }, + { + "epoch": 4.62, + "learning_rate": 3.821818243140694e-06, + "loss": 0.1649, + "step": 821800 + }, + { + "epoch": 4.62, + "learning_rate": 3.816197012878238e-06, + "loss": 0.1669, + "step": 821900 + }, + { + "epoch": 4.62, + "learning_rate": 3.8105757826157837e-06, + "loss": 0.164, + "step": 822000 + }, + { + "epoch": 4.62, + "learning_rate": 3.8049545523533287e-06, + "loss": 0.1616, + "step": 822100 + }, + { + "epoch": 4.62, + "learning_rate": 3.799333322090873e-06, + "loss": 0.1644, + "step": 822200 + }, + { + "epoch": 4.62, + "learning_rate": 3.793712091828418e-06, + "loss": 0.1667, + "step": 822300 + }, + { + "epoch": 4.62, + "learning_rate": 3.788090861565963e-06, + "loss": 0.1602, + "step": 822400 + }, + { + "epoch": 4.62, + "learning_rate": 3.782469631303507e-06, + "loss": 0.1601, + "step": 822500 + }, + { + "epoch": 4.62, + "learning_rate": 3.776848401041052e-06, + "loss": 0.1665, + "step": 822600 + }, + { + "epoch": 4.62, + "learning_rate": 3.771227170778597e-06, + "loss": 0.1657, + "step": 822700 + }, + { + "epoch": 4.63, + "learning_rate": 3.765605940516142e-06, + "loss": 0.163, + "step": 822800 + }, + { + "epoch": 4.63, + "learning_rate": 3.759984710253686e-06, + "loss": 0.1631, + "step": 822900 + }, + { + "epoch": 4.63, + "learning_rate": 3.7544196922938555e-06, + "loss": 0.1595, + "step": 823000 + }, + { + "epoch": 4.63, + "learning_rate": 3.7487984620314005e-06, + "loss": 0.1663, + "step": 823100 + }, + { + "epoch": 4.63, + "learning_rate": 3.7431772317689447e-06, + "loss": 0.163, + "step": 823200 + }, + { + "epoch": 4.63, + "learning_rate": 3.7375560015064897e-06, + "loss": 0.1697, + "step": 823300 + }, + { + "epoch": 4.63, + "learning_rate": 3.7319347712440347e-06, + "loss": 0.164, + "step": 823400 + }, + { + "epoch": 4.63, + "learning_rate": 3.7263135409815797e-06, + "loss": 0.1599, + "step": 823500 + }, + { + "epoch": 4.63, + "learning_rate": 3.720692310719124e-06, + "loss": 0.1631, + "step": 823600 + }, + { + "epoch": 4.63, + "learning_rate": 3.715071080456669e-06, + "loss": 0.1648, + "step": 823700 + }, + { + "epoch": 4.63, + "learning_rate": 3.709449850194214e-06, + "loss": 0.1658, + "step": 823800 + }, + { + "epoch": 4.63, + "learning_rate": 3.703828619931758e-06, + "loss": 0.1618, + "step": 823900 + }, + { + "epoch": 4.63, + "learning_rate": 3.698207389669303e-06, + "loss": 0.1569, + "step": 824000 + }, + { + "epoch": 4.63, + "learning_rate": 3.692586159406848e-06, + "loss": 0.1599, + "step": 824100 + }, + { + "epoch": 4.63, + "learning_rate": 3.686964929144393e-06, + "loss": 0.1598, + "step": 824200 + }, + { + "epoch": 4.63, + "learning_rate": 3.681343698881937e-06, + "loss": 0.1655, + "step": 824300 + }, + { + "epoch": 4.63, + "learning_rate": 3.675722468619482e-06, + "loss": 0.1579, + "step": 824400 + }, + { + "epoch": 4.63, + "learning_rate": 3.670101238357027e-06, + "loss": 0.1606, + "step": 824500 + }, + { + "epoch": 4.64, + "learning_rate": 3.6644800080945714e-06, + "loss": 0.1637, + "step": 824600 + }, + { + "epoch": 4.64, + "learning_rate": 3.6588587778321164e-06, + "loss": 0.1622, + "step": 824700 + }, + { + "epoch": 4.64, + "learning_rate": 3.6532375475696614e-06, + "loss": 0.1588, + "step": 824800 + }, + { + "epoch": 4.64, + "learning_rate": 3.6476163173072055e-06, + "loss": 0.1641, + "step": 824900 + }, + { + "epoch": 4.64, + "learning_rate": 3.6419950870447505e-06, + "loss": 0.1639, + "step": 825000 + }, + { + "epoch": 4.64, + "learning_rate": 3.6363738567822956e-06, + "loss": 0.1653, + "step": 825100 + }, + { + "epoch": 4.64, + "learning_rate": 3.6307526265198406e-06, + "loss": 0.1646, + "step": 825200 + }, + { + "epoch": 4.64, + "learning_rate": 3.6251313962573847e-06, + "loss": 0.1602, + "step": 825300 + }, + { + "epoch": 4.64, + "learning_rate": 3.6195663782975545e-06, + "loss": 0.1627, + "step": 825400 + }, + { + "epoch": 4.64, + "learning_rate": 3.6139451480350995e-06, + "loss": 0.1607, + "step": 825500 + }, + { + "epoch": 4.64, + "learning_rate": 3.6083239177726436e-06, + "loss": 0.1693, + "step": 825600 + }, + { + "epoch": 4.64, + "learning_rate": 3.6027026875101886e-06, + "loss": 0.1623, + "step": 825700 + }, + { + "epoch": 4.64, + "learning_rate": 3.5970814572477336e-06, + "loss": 0.1633, + "step": 825800 + }, + { + "epoch": 4.64, + "learning_rate": 3.5914602269852786e-06, + "loss": 0.1643, + "step": 825900 + }, + { + "epoch": 4.64, + "learning_rate": 3.585838996722823e-06, + "loss": 0.1623, + "step": 826000 + }, + { + "epoch": 4.64, + "learning_rate": 3.580217766460368e-06, + "loss": 0.1641, + "step": 826100 + }, + { + "epoch": 4.64, + "learning_rate": 3.574596536197913e-06, + "loss": 0.1639, + "step": 826200 + }, + { + "epoch": 4.64, + "learning_rate": 3.568975305935457e-06, + "loss": 0.1665, + "step": 826300 + }, + { + "epoch": 4.65, + "learning_rate": 3.563354075673002e-06, + "loss": 0.1662, + "step": 826400 + }, + { + "epoch": 4.65, + "learning_rate": 3.557732845410547e-06, + "loss": 0.159, + "step": 826500 + }, + { + "epoch": 4.65, + "learning_rate": 3.552111615148092e-06, + "loss": 0.1675, + "step": 826600 + }, + { + "epoch": 4.65, + "learning_rate": 3.546490384885636e-06, + "loss": 0.1624, + "step": 826700 + }, + { + "epoch": 4.65, + "learning_rate": 3.540869154623181e-06, + "loss": 0.1617, + "step": 826800 + }, + { + "epoch": 4.65, + "learning_rate": 3.535247924360726e-06, + "loss": 0.1676, + "step": 826900 + }, + { + "epoch": 4.65, + "learning_rate": 3.5296266940982703e-06, + "loss": 0.1601, + "step": 827000 + }, + { + "epoch": 4.65, + "learning_rate": 3.5240054638358153e-06, + "loss": 0.1636, + "step": 827100 + }, + { + "epoch": 4.65, + "learning_rate": 3.5183842335733603e-06, + "loss": 0.1572, + "step": 827200 + }, + { + "epoch": 4.65, + "learning_rate": 3.5127630033109045e-06, + "loss": 0.1557, + "step": 827300 + }, + { + "epoch": 4.65, + "learning_rate": 3.5071417730484495e-06, + "loss": 0.1588, + "step": 827400 + }, + { + "epoch": 4.65, + "learning_rate": 3.5015205427859945e-06, + "loss": 0.164, + "step": 827500 + }, + { + "epoch": 4.65, + "learning_rate": 3.4958993125235395e-06, + "loss": 0.1687, + "step": 827600 + }, + { + "epoch": 4.65, + "learning_rate": 3.4902780822610837e-06, + "loss": 0.162, + "step": 827700 + }, + { + "epoch": 4.65, + "learning_rate": 3.4846568519986287e-06, + "loss": 0.1628, + "step": 827800 + }, + { + "epoch": 4.65, + "learning_rate": 3.4790356217361737e-06, + "loss": 0.162, + "step": 827900 + }, + { + "epoch": 4.65, + "learning_rate": 3.473414391473718e-06, + "loss": 0.1698, + "step": 828000 + }, + { + "epoch": 4.65, + "learning_rate": 3.467793161211263e-06, + "loss": 0.1644, + "step": 828100 + }, + { + "epoch": 4.66, + "learning_rate": 3.462171930948808e-06, + "loss": 0.1601, + "step": 828200 + }, + { + "epoch": 4.66, + "learning_rate": 3.456550700686352e-06, + "loss": 0.1611, + "step": 828300 + }, + { + "epoch": 4.66, + "learning_rate": 3.450929470423897e-06, + "loss": 0.1581, + "step": 828400 + }, + { + "epoch": 4.66, + "learning_rate": 3.445308240161442e-06, + "loss": 0.163, + "step": 828500 + }, + { + "epoch": 4.66, + "learning_rate": 3.439687009898987e-06, + "loss": 0.1619, + "step": 828600 + }, + { + "epoch": 4.66, + "learning_rate": 3.4341219919391555e-06, + "loss": 0.1649, + "step": 828700 + }, + { + "epoch": 4.66, + "learning_rate": 3.4285007616767005e-06, + "loss": 0.1625, + "step": 828800 + }, + { + "epoch": 4.66, + "learning_rate": 3.4228795314142455e-06, + "loss": 0.1667, + "step": 828900 + }, + { + "epoch": 4.66, + "learning_rate": 3.4172583011517905e-06, + "loss": 0.159, + "step": 829000 + }, + { + "epoch": 4.66, + "learning_rate": 3.4116370708893347e-06, + "loss": 0.1634, + "step": 829100 + }, + { + "epoch": 4.66, + "learning_rate": 3.4060158406268797e-06, + "loss": 0.1672, + "step": 829200 + }, + { + "epoch": 4.66, + "learning_rate": 3.4003946103644247e-06, + "loss": 0.1628, + "step": 829300 + }, + { + "epoch": 4.66, + "learning_rate": 3.394773380101969e-06, + "loss": 0.1633, + "step": 829400 + }, + { + "epoch": 4.66, + "learning_rate": 3.389152149839514e-06, + "loss": 0.1635, + "step": 829500 + }, + { + "epoch": 4.66, + "learning_rate": 3.383530919577059e-06, + "loss": 0.1586, + "step": 829600 + }, + { + "epoch": 4.66, + "learning_rate": 3.377909689314603e-06, + "loss": 0.1652, + "step": 829700 + }, + { + "epoch": 4.66, + "learning_rate": 3.372288459052148e-06, + "loss": 0.1582, + "step": 829800 + }, + { + "epoch": 4.67, + "learning_rate": 3.366667228789693e-06, + "loss": 0.1616, + "step": 829900 + }, + { + "epoch": 4.67, + "learning_rate": 3.361045998527238e-06, + "loss": 0.1651, + "step": 830000 + }, + { + "epoch": 4.67, + "learning_rate": 3.3554247682647822e-06, + "loss": 0.1614, + "step": 830100 + }, + { + "epoch": 4.67, + "learning_rate": 3.3498035380023272e-06, + "loss": 0.1621, + "step": 830200 + }, + { + "epoch": 4.67, + "learning_rate": 3.3441823077398722e-06, + "loss": 0.1631, + "step": 830300 + }, + { + "epoch": 4.67, + "learning_rate": 3.3385610774774164e-06, + "loss": 0.1702, + "step": 830400 + }, + { + "epoch": 4.67, + "learning_rate": 3.332996059517586e-06, + "loss": 0.1626, + "step": 830500 + }, + { + "epoch": 4.67, + "learning_rate": 3.327374829255131e-06, + "loss": 0.1655, + "step": 830600 + }, + { + "epoch": 4.67, + "learning_rate": 3.321753598992676e-06, + "loss": 0.1637, + "step": 830700 + }, + { + "epoch": 4.67, + "learning_rate": 3.3161323687302203e-06, + "loss": 0.1624, + "step": 830800 + }, + { + "epoch": 4.67, + "learning_rate": 3.3105111384677653e-06, + "loss": 0.1609, + "step": 830900 + }, + { + "epoch": 4.67, + "learning_rate": 3.3048899082053103e-06, + "loss": 0.1584, + "step": 831000 + }, + { + "epoch": 4.67, + "learning_rate": 3.2992686779428545e-06, + "loss": 0.1618, + "step": 831100 + }, + { + "epoch": 4.67, + "learning_rate": 3.2936474476803995e-06, + "loss": 0.1595, + "step": 831200 + }, + { + "epoch": 4.67, + "learning_rate": 3.2880262174179445e-06, + "loss": 0.1647, + "step": 831300 + }, + { + "epoch": 4.67, + "learning_rate": 3.2824049871554895e-06, + "loss": 0.1603, + "step": 831400 + }, + { + "epoch": 4.67, + "learning_rate": 3.2767837568930337e-06, + "loss": 0.1663, + "step": 831500 + }, + { + "epoch": 4.67, + "learning_rate": 3.2711625266305787e-06, + "loss": 0.1621, + "step": 831600 + }, + { + "epoch": 4.68, + "learning_rate": 3.2655412963681237e-06, + "loss": 0.1654, + "step": 831700 + }, + { + "epoch": 4.68, + "learning_rate": 3.259920066105668e-06, + "loss": 0.1649, + "step": 831800 + }, + { + "epoch": 4.68, + "learning_rate": 3.254298835843213e-06, + "loss": 0.1626, + "step": 831900 + }, + { + "epoch": 4.68, + "learning_rate": 3.248677605580758e-06, + "loss": 0.1596, + "step": 832000 + }, + { + "epoch": 4.68, + "learning_rate": 3.243056375318302e-06, + "loss": 0.1595, + "step": 832100 + }, + { + "epoch": 4.68, + "learning_rate": 3.237435145055847e-06, + "loss": 0.1609, + "step": 832200 + }, + { + "epoch": 4.68, + "learning_rate": 3.231813914793392e-06, + "loss": 0.165, + "step": 832300 + }, + { + "epoch": 4.68, + "learning_rate": 3.226192684530937e-06, + "loss": 0.1581, + "step": 832400 + }, + { + "epoch": 4.68, + "learning_rate": 3.220571454268481e-06, + "loss": 0.1626, + "step": 832500 + }, + { + "epoch": 4.68, + "learning_rate": 3.2150064363086505e-06, + "loss": 0.1633, + "step": 832600 + }, + { + "epoch": 4.68, + "learning_rate": 3.2093852060461955e-06, + "loss": 0.16, + "step": 832700 + }, + { + "epoch": 4.68, + "learning_rate": 3.2037639757837405e-06, + "loss": 0.1611, + "step": 832800 + }, + { + "epoch": 4.68, + "learning_rate": 3.1981427455212847e-06, + "loss": 0.1648, + "step": 832900 + }, + { + "epoch": 4.68, + "learning_rate": 3.1925215152588297e-06, + "loss": 0.1653, + "step": 833000 + }, + { + "epoch": 4.68, + "learning_rate": 3.1869002849963747e-06, + "loss": 0.1643, + "step": 833100 + }, + { + "epoch": 4.68, + "learning_rate": 3.181279054733919e-06, + "loss": 0.1646, + "step": 833200 + }, + { + "epoch": 4.68, + "learning_rate": 3.175657824471464e-06, + "loss": 0.1638, + "step": 833300 + }, + { + "epoch": 4.68, + "learning_rate": 3.170036594209009e-06, + "loss": 0.1644, + "step": 833400 + }, + { + "epoch": 4.69, + "learning_rate": 3.164415363946553e-06, + "loss": 0.1666, + "step": 833500 + }, + { + "epoch": 4.69, + "learning_rate": 3.158794133684098e-06, + "loss": 0.1615, + "step": 833600 + }, + { + "epoch": 4.69, + "learning_rate": 3.153172903421643e-06, + "loss": 0.1617, + "step": 833700 + }, + { + "epoch": 4.69, + "learning_rate": 3.147551673159188e-06, + "loss": 0.157, + "step": 833800 + }, + { + "epoch": 4.69, + "learning_rate": 3.1419304428967322e-06, + "loss": 0.1621, + "step": 833900 + }, + { + "epoch": 4.69, + "learning_rate": 3.1363092126342772e-06, + "loss": 0.1652, + "step": 834000 + }, + { + "epoch": 4.69, + "learning_rate": 3.1306879823718222e-06, + "loss": 0.1627, + "step": 834100 + }, + { + "epoch": 4.69, + "learning_rate": 3.1250667521093664e-06, + "loss": 0.1605, + "step": 834200 + }, + { + "epoch": 4.69, + "learning_rate": 3.1194455218469114e-06, + "loss": 0.1627, + "step": 834300 + }, + { + "epoch": 4.69, + "learning_rate": 3.1138242915844564e-06, + "loss": 0.1607, + "step": 834400 + }, + { + "epoch": 4.69, + "learning_rate": 3.108203061322001e-06, + "loss": 0.1616, + "step": 834500 + }, + { + "epoch": 4.69, + "learning_rate": 3.1025818310595456e-06, + "loss": 0.1644, + "step": 834600 + }, + { + "epoch": 4.69, + "learning_rate": 3.0969606007970906e-06, + "loss": 0.1597, + "step": 834700 + }, + { + "epoch": 4.69, + "learning_rate": 3.091339370534635e-06, + "loss": 0.1608, + "step": 834800 + }, + { + "epoch": 4.69, + "learning_rate": 3.08571814027218e-06, + "loss": 0.1619, + "step": 834900 + }, + { + "epoch": 4.69, + "learning_rate": 3.0800969100097248e-06, + "loss": 0.1613, + "step": 835000 + }, + { + "epoch": 4.69, + "learning_rate": 3.0744756797472693e-06, + "loss": 0.1592, + "step": 835100 + }, + { + "epoch": 4.69, + "learning_rate": 3.0688544494848143e-06, + "loss": 0.1607, + "step": 835200 + }, + { + "epoch": 4.7, + "learning_rate": 3.0632894315249837e-06, + "loss": 0.1589, + "step": 835300 + }, + { + "epoch": 4.7, + "learning_rate": 3.0576682012625287e-06, + "loss": 0.1635, + "step": 835400 + }, + { + "epoch": 4.7, + "learning_rate": 3.0520469710000733e-06, + "loss": 0.1582, + "step": 835500 + }, + { + "epoch": 4.7, + "learning_rate": 3.0464257407376183e-06, + "loss": 0.163, + "step": 835600 + }, + { + "epoch": 4.7, + "learning_rate": 3.040804510475163e-06, + "loss": 0.1669, + "step": 835700 + }, + { + "epoch": 4.7, + "learning_rate": 3.0351832802127074e-06, + "loss": 0.1614, + "step": 835800 + }, + { + "epoch": 4.7, + "learning_rate": 3.0295620499502524e-06, + "loss": 0.1685, + "step": 835900 + }, + { + "epoch": 4.7, + "learning_rate": 3.023940819687797e-06, + "loss": 0.1592, + "step": 836000 + }, + { + "epoch": 4.7, + "learning_rate": 3.018319589425342e-06, + "loss": 0.159, + "step": 836100 + }, + { + "epoch": 4.7, + "learning_rate": 3.0126983591628866e-06, + "loss": 0.164, + "step": 836200 + }, + { + "epoch": 4.7, + "learning_rate": 3.0070771289004316e-06, + "loss": 0.1624, + "step": 836300 + }, + { + "epoch": 4.7, + "learning_rate": 3.001455898637976e-06, + "loss": 0.1614, + "step": 836400 + }, + { + "epoch": 4.7, + "learning_rate": 2.9958346683755208e-06, + "loss": 0.1663, + "step": 836500 + }, + { + "epoch": 4.7, + "learning_rate": 2.9902134381130658e-06, + "loss": 0.1576, + "step": 836600 + }, + { + "epoch": 4.7, + "learning_rate": 2.9845922078506104e-06, + "loss": 0.1685, + "step": 836700 + }, + { + "epoch": 4.7, + "learning_rate": 2.9789709775881554e-06, + "loss": 0.16, + "step": 836800 + }, + { + "epoch": 4.7, + "learning_rate": 2.9733497473257e-06, + "loss": 0.1637, + "step": 836900 + }, + { + "epoch": 4.7, + "learning_rate": 2.9677285170632445e-06, + "loss": 0.1656, + "step": 837000 + }, + { + "epoch": 4.71, + "learning_rate": 2.9621072868007895e-06, + "loss": 0.1612, + "step": 837100 + }, + { + "epoch": 4.71, + "learning_rate": 2.956486056538334e-06, + "loss": 0.1593, + "step": 837200 + }, + { + "epoch": 4.71, + "learning_rate": 2.950864826275879e-06, + "loss": 0.1621, + "step": 837300 + }, + { + "epoch": 4.71, + "learning_rate": 2.9452435960134237e-06, + "loss": 0.1657, + "step": 837400 + }, + { + "epoch": 4.71, + "learning_rate": 2.9396223657509683e-06, + "loss": 0.1571, + "step": 837500 + }, + { + "epoch": 4.71, + "learning_rate": 2.9340011354885133e-06, + "loss": 0.16, + "step": 837600 + }, + { + "epoch": 4.71, + "learning_rate": 2.928379905226058e-06, + "loss": 0.1645, + "step": 837700 + }, + { + "epoch": 4.71, + "learning_rate": 2.922758674963603e-06, + "loss": 0.1699, + "step": 837800 + }, + { + "epoch": 4.71, + "learning_rate": 2.9171374447011475e-06, + "loss": 0.1649, + "step": 837900 + }, + { + "epoch": 4.71, + "learning_rate": 2.911516214438692e-06, + "loss": 0.1602, + "step": 838000 + }, + { + "epoch": 4.71, + "learning_rate": 2.905894984176237e-06, + "loss": 0.1662, + "step": 838100 + }, + { + "epoch": 4.71, + "learning_rate": 2.9002737539137817e-06, + "loss": 0.1608, + "step": 838200 + }, + { + "epoch": 4.71, + "learning_rate": 2.8946525236513267e-06, + "loss": 0.1647, + "step": 838300 + }, + { + "epoch": 4.71, + "learning_rate": 2.8890312933888712e-06, + "loss": 0.1635, + "step": 838400 + }, + { + "epoch": 4.71, + "learning_rate": 2.883410063126416e-06, + "loss": 0.1624, + "step": 838500 + }, + { + "epoch": 4.71, + "learning_rate": 2.877845045166585e-06, + "loss": 0.1662, + "step": 838600 + }, + { + "epoch": 4.71, + "learning_rate": 2.87222381490413e-06, + "loss": 0.1631, + "step": 838700 + }, + { + "epoch": 4.72, + "learning_rate": 2.8666025846416747e-06, + "loss": 0.1562, + "step": 838800 + }, + { + "epoch": 4.72, + "learning_rate": 2.8609813543792193e-06, + "loss": 0.1634, + "step": 838900 + }, + { + "epoch": 4.72, + "learning_rate": 2.8553601241167643e-06, + "loss": 0.1636, + "step": 839000 + }, + { + "epoch": 4.72, + "learning_rate": 2.849738893854309e-06, + "loss": 0.1605, + "step": 839100 + }, + { + "epoch": 4.72, + "learning_rate": 2.844117663591854e-06, + "loss": 0.1654, + "step": 839200 + }, + { + "epoch": 4.72, + "learning_rate": 2.8384964333293985e-06, + "loss": 0.1616, + "step": 839300 + }, + { + "epoch": 4.72, + "learning_rate": 2.832875203066943e-06, + "loss": 0.1637, + "step": 839400 + }, + { + "epoch": 4.72, + "learning_rate": 2.827253972804488e-06, + "loss": 0.1629, + "step": 839500 + }, + { + "epoch": 4.72, + "learning_rate": 2.8216327425420327e-06, + "loss": 0.1591, + "step": 839600 + }, + { + "epoch": 4.72, + "learning_rate": 2.8160115122795777e-06, + "loss": 0.1624, + "step": 839700 + }, + { + "epoch": 4.72, + "learning_rate": 2.8103902820171223e-06, + "loss": 0.1656, + "step": 839800 + }, + { + "epoch": 4.72, + "learning_rate": 2.804769051754667e-06, + "loss": 0.16, + "step": 839900 + }, + { + "epoch": 4.72, + "learning_rate": 2.799147821492212e-06, + "loss": 0.161, + "step": 840000 + }, + { + "epoch": 4.72, + "learning_rate": 2.7935265912297564e-06, + "loss": 0.1659, + "step": 840100 + }, + { + "epoch": 4.72, + "learning_rate": 2.7879053609673014e-06, + "loss": 0.166, + "step": 840200 + }, + { + "epoch": 4.72, + "learning_rate": 2.782284130704846e-06, + "loss": 0.1629, + "step": 840300 + }, + { + "epoch": 4.72, + "learning_rate": 2.7766629004423906e-06, + "loss": 0.1617, + "step": 840400 + }, + { + "epoch": 4.72, + "learning_rate": 2.7710416701799356e-06, + "loss": 0.163, + "step": 840500 + }, + { + "epoch": 4.73, + "learning_rate": 2.76542043991748e-06, + "loss": 0.1623, + "step": 840600 + }, + { + "epoch": 4.73, + "learning_rate": 2.759799209655025e-06, + "loss": 0.1605, + "step": 840700 + }, + { + "epoch": 4.73, + "learning_rate": 2.75417797939257e-06, + "loss": 0.161, + "step": 840800 + }, + { + "epoch": 4.73, + "learning_rate": 2.7485567491301144e-06, + "loss": 0.1617, + "step": 840900 + }, + { + "epoch": 4.73, + "learning_rate": 2.7429355188676594e-06, + "loss": 0.1611, + "step": 841000 + }, + { + "epoch": 4.73, + "learning_rate": 2.737314288605204e-06, + "loss": 0.1672, + "step": 841100 + }, + { + "epoch": 4.73, + "learning_rate": 2.731693058342749e-06, + "loss": 0.1566, + "step": 841200 + }, + { + "epoch": 4.73, + "learning_rate": 2.7260718280802936e-06, + "loss": 0.1584, + "step": 841300 + }, + { + "epoch": 4.73, + "learning_rate": 2.7204505978178386e-06, + "loss": 0.1663, + "step": 841400 + }, + { + "epoch": 4.73, + "learning_rate": 2.714829367555383e-06, + "loss": 0.1631, + "step": 841500 + }, + { + "epoch": 4.73, + "learning_rate": 2.709208137292928e-06, + "loss": 0.164, + "step": 841600 + }, + { + "epoch": 4.73, + "learning_rate": 2.7035869070304727e-06, + "loss": 0.1613, + "step": 841700 + }, + { + "epoch": 4.73, + "learning_rate": 2.6979656767680177e-06, + "loss": 0.1639, + "step": 841800 + }, + { + "epoch": 4.73, + "learning_rate": 2.6923444465055623e-06, + "loss": 0.1596, + "step": 841900 + }, + { + "epoch": 4.73, + "learning_rate": 2.6867232162431073e-06, + "loss": 0.1605, + "step": 842000 + }, + { + "epoch": 4.73, + "learning_rate": 2.681101985980652e-06, + "loss": 0.1643, + "step": 842100 + }, + { + "epoch": 4.73, + "learning_rate": 2.6754807557181965e-06, + "loss": 0.1668, + "step": 842200 + }, + { + "epoch": 4.73, + "learning_rate": 2.6698595254557415e-06, + "loss": 0.1619, + "step": 842300 + }, + { + "epoch": 4.74, + "learning_rate": 2.664238295193286e-06, + "loss": 0.1653, + "step": 842400 + }, + { + "epoch": 4.74, + "learning_rate": 2.658617064930831e-06, + "loss": 0.161, + "step": 842500 + }, + { + "epoch": 4.74, + "learning_rate": 2.6529958346683757e-06, + "loss": 0.1678, + "step": 842600 + }, + { + "epoch": 4.74, + "learning_rate": 2.6473746044059207e-06, + "loss": 0.1607, + "step": 842700 + }, + { + "epoch": 4.74, + "learning_rate": 2.6417533741434653e-06, + "loss": 0.1628, + "step": 842800 + }, + { + "epoch": 4.74, + "learning_rate": 2.6361321438810103e-06, + "loss": 0.1678, + "step": 842900 + }, + { + "epoch": 4.74, + "learning_rate": 2.630510913618555e-06, + "loss": 0.1656, + "step": 843000 + }, + { + "epoch": 4.74, + "learning_rate": 2.6248896833560994e-06, + "loss": 0.168, + "step": 843100 + }, + { + "epoch": 4.74, + "learning_rate": 2.6192684530936444e-06, + "loss": 0.1625, + "step": 843200 + }, + { + "epoch": 4.74, + "learning_rate": 2.613647222831189e-06, + "loss": 0.1636, + "step": 843300 + }, + { + "epoch": 4.74, + "learning_rate": 2.6080822048713583e-06, + "loss": 0.1584, + "step": 843400 + }, + { + "epoch": 4.74, + "learning_rate": 2.602460974608903e-06, + "loss": 0.168, + "step": 843500 + }, + { + "epoch": 4.74, + "learning_rate": 2.596839744346448e-06, + "loss": 0.1606, + "step": 843600 + }, + { + "epoch": 4.74, + "learning_rate": 2.5912185140839925e-06, + "loss": 0.1657, + "step": 843700 + }, + { + "epoch": 4.74, + "learning_rate": 2.5855972838215375e-06, + "loss": 0.1603, + "step": 843800 + }, + { + "epoch": 4.74, + "learning_rate": 2.579976053559082e-06, + "loss": 0.1599, + "step": 843900 + }, + { + "epoch": 4.74, + "learning_rate": 2.5743548232966267e-06, + "loss": 0.1629, + "step": 844000 + }, + { + "epoch": 4.74, + "learning_rate": 2.5687335930341717e-06, + "loss": 0.1592, + "step": 844100 + }, + { + "epoch": 4.75, + "learning_rate": 2.5631123627717163e-06, + "loss": 0.1635, + "step": 844200 + }, + { + "epoch": 4.75, + "learning_rate": 2.5574911325092613e-06, + "loss": 0.163, + "step": 844300 + }, + { + "epoch": 4.75, + "learning_rate": 2.551869902246806e-06, + "loss": 0.1612, + "step": 844400 + }, + { + "epoch": 4.75, + "learning_rate": 2.5462486719843505e-06, + "loss": 0.1613, + "step": 844500 + }, + { + "epoch": 4.75, + "learning_rate": 2.5406274417218955e-06, + "loss": 0.1641, + "step": 844600 + }, + { + "epoch": 4.75, + "learning_rate": 2.53500621145944e-06, + "loss": 0.1645, + "step": 844700 + }, + { + "epoch": 4.75, + "learning_rate": 2.529384981196985e-06, + "loss": 0.1637, + "step": 844800 + }, + { + "epoch": 4.75, + "learning_rate": 2.5237637509345296e-06, + "loss": 0.1604, + "step": 844900 + }, + { + "epoch": 4.75, + "learning_rate": 2.5181425206720742e-06, + "loss": 0.165, + "step": 845000 + }, + { + "epoch": 4.75, + "learning_rate": 2.5125212904096192e-06, + "loss": 0.1637, + "step": 845100 + }, + { + "epoch": 4.75, + "learning_rate": 2.506900060147164e-06, + "loss": 0.1599, + "step": 845200 + }, + { + "epoch": 4.75, + "learning_rate": 2.501278829884709e-06, + "loss": 0.165, + "step": 845300 + }, + { + "epoch": 4.75, + "learning_rate": 2.4956575996222534e-06, + "loss": 0.1555, + "step": 845400 + }, + { + "epoch": 4.75, + "learning_rate": 2.490036369359798e-06, + "loss": 0.1629, + "step": 845500 + }, + { + "epoch": 4.75, + "learning_rate": 2.484415139097343e-06, + "loss": 0.1655, + "step": 845600 + }, + { + "epoch": 4.75, + "learning_rate": 2.4787939088348876e-06, + "loss": 0.1576, + "step": 845700 + }, + { + "epoch": 4.75, + "learning_rate": 2.4731726785724326e-06, + "loss": 0.1614, + "step": 845800 + }, + { + "epoch": 4.75, + "learning_rate": 2.467551448309977e-06, + "loss": 0.1578, + "step": 845900 + }, + { + "epoch": 4.76, + "learning_rate": 2.4619302180475217e-06, + "loss": 0.1638, + "step": 846000 + }, + { + "epoch": 4.76, + "learning_rate": 2.4563089877850668e-06, + "loss": 0.1628, + "step": 846100 + }, + { + "epoch": 4.76, + "learning_rate": 2.4506877575226113e-06, + "loss": 0.1559, + "step": 846200 + }, + { + "epoch": 4.76, + "learning_rate": 2.4451227395627807e-06, + "loss": 0.1603, + "step": 846300 + }, + { + "epoch": 4.76, + "learning_rate": 2.4395015093003257e-06, + "loss": 0.1632, + "step": 846400 + }, + { + "epoch": 4.76, + "learning_rate": 2.4338802790378702e-06, + "loss": 0.1561, + "step": 846500 + }, + { + "epoch": 4.76, + "learning_rate": 2.4282590487754153e-06, + "loss": 0.1648, + "step": 846600 + }, + { + "epoch": 4.76, + "learning_rate": 2.42263781851296e-06, + "loss": 0.1612, + "step": 846700 + }, + { + "epoch": 4.76, + "learning_rate": 2.4170165882505044e-06, + "loss": 0.1644, + "step": 846800 + }, + { + "epoch": 4.76, + "learning_rate": 2.4113953579880494e-06, + "loss": 0.1599, + "step": 846900 + }, + { + "epoch": 4.76, + "learning_rate": 2.405774127725594e-06, + "loss": 0.1595, + "step": 847000 + }, + { + "epoch": 4.76, + "learning_rate": 2.400152897463139e-06, + "loss": 0.1594, + "step": 847100 + }, + { + "epoch": 4.76, + "learning_rate": 2.3945316672006836e-06, + "loss": 0.1624, + "step": 847200 + }, + { + "epoch": 4.76, + "learning_rate": 2.3889104369382286e-06, + "loss": 0.1574, + "step": 847300 + }, + { + "epoch": 4.76, + "learning_rate": 2.383289206675773e-06, + "loss": 0.1571, + "step": 847400 + }, + { + "epoch": 4.76, + "learning_rate": 2.377667976413318e-06, + "loss": 0.16, + "step": 847500 + }, + { + "epoch": 4.76, + "learning_rate": 2.3720467461508628e-06, + "loss": 0.1604, + "step": 847600 + }, + { + "epoch": 4.77, + "learning_rate": 2.3664255158884078e-06, + "loss": 0.1673, + "step": 847700 + }, + { + "epoch": 4.77, + "learning_rate": 2.3608042856259524e-06, + "loss": 0.1603, + "step": 847800 + }, + { + "epoch": 4.77, + "learning_rate": 2.355183055363497e-06, + "loss": 0.1577, + "step": 847900 + }, + { + "epoch": 4.77, + "learning_rate": 2.349561825101042e-06, + "loss": 0.1643, + "step": 848000 + }, + { + "epoch": 4.77, + "learning_rate": 2.3439405948385865e-06, + "loss": 0.1639, + "step": 848100 + }, + { + "epoch": 4.77, + "learning_rate": 2.3383193645761315e-06, + "loss": 0.1668, + "step": 848200 + }, + { + "epoch": 4.77, + "learning_rate": 2.332698134313676e-06, + "loss": 0.1638, + "step": 848300 + }, + { + "epoch": 4.77, + "learning_rate": 2.3271331163538455e-06, + "loss": 0.1617, + "step": 848400 + }, + { + "epoch": 4.77, + "learning_rate": 2.32151188609139e-06, + "loss": 0.1673, + "step": 848500 + }, + { + "epoch": 4.77, + "learning_rate": 2.315890655828935e-06, + "loss": 0.1604, + "step": 848600 + }, + { + "epoch": 4.77, + "learning_rate": 2.3102694255664796e-06, + "loss": 0.1661, + "step": 848700 + }, + { + "epoch": 4.77, + "learning_rate": 2.304648195304024e-06, + "loss": 0.1643, + "step": 848800 + }, + { + "epoch": 4.77, + "learning_rate": 2.2990269650415692e-06, + "loss": 0.1622, + "step": 848900 + }, + { + "epoch": 4.77, + "learning_rate": 2.293461947081738e-06, + "loss": 0.1601, + "step": 849000 + }, + { + "epoch": 4.77, + "learning_rate": 2.287840716819283e-06, + "loss": 0.1664, + "step": 849100 + }, + { + "epoch": 4.77, + "learning_rate": 2.2822194865568277e-06, + "loss": 0.1632, + "step": 849200 + }, + { + "epoch": 4.77, + "learning_rate": 2.2765982562943727e-06, + "loss": 0.1629, + "step": 849300 + }, + { + "epoch": 4.77, + "learning_rate": 2.2709770260319173e-06, + "loss": 0.1623, + "step": 849400 + }, + { + "epoch": 4.78, + "learning_rate": 2.2653557957694623e-06, + "loss": 0.1569, + "step": 849500 + }, + { + "epoch": 4.78, + "learning_rate": 2.259734565507007e-06, + "loss": 0.1615, + "step": 849600 + }, + { + "epoch": 4.78, + "learning_rate": 2.2541133352445515e-06, + "loss": 0.1634, + "step": 849700 + }, + { + "epoch": 4.78, + "learning_rate": 2.2484921049820965e-06, + "loss": 0.1633, + "step": 849800 + }, + { + "epoch": 4.78, + "learning_rate": 2.242870874719641e-06, + "loss": 0.1638, + "step": 849900 + }, + { + "epoch": 4.78, + "learning_rate": 2.237249644457186e-06, + "loss": 0.1585, + "step": 850000 + }, + { + "epoch": 4.78, + "learning_rate": 2.2316284141947306e-06, + "loss": 0.1597, + "step": 850100 + }, + { + "epoch": 4.78, + "learning_rate": 2.2260071839322752e-06, + "loss": 0.1593, + "step": 850200 + }, + { + "epoch": 4.78, + "learning_rate": 2.2203859536698202e-06, + "loss": 0.1597, + "step": 850300 + }, + { + "epoch": 4.78, + "learning_rate": 2.214764723407365e-06, + "loss": 0.1664, + "step": 850400 + }, + { + "epoch": 4.78, + "learning_rate": 2.20914349314491e-06, + "loss": 0.1624, + "step": 850500 + }, + { + "epoch": 4.78, + "learning_rate": 2.2035222628824544e-06, + "loss": 0.1654, + "step": 850600 + }, + { + "epoch": 4.78, + "learning_rate": 2.197901032619999e-06, + "loss": 0.164, + "step": 850700 + }, + { + "epoch": 4.78, + "learning_rate": 2.192279802357544e-06, + "loss": 0.1648, + "step": 850800 + }, + { + "epoch": 4.78, + "learning_rate": 2.1866585720950886e-06, + "loss": 0.161, + "step": 850900 + }, + { + "epoch": 4.78, + "learning_rate": 2.1810373418326336e-06, + "loss": 0.1579, + "step": 851000 + }, + { + "epoch": 4.78, + "learning_rate": 2.175416111570178e-06, + "loss": 0.1688, + "step": 851100 + }, + { + "epoch": 4.78, + "learning_rate": 2.169794881307723e-06, + "loss": 0.1607, + "step": 851200 + }, + { + "epoch": 4.79, + "learning_rate": 2.1642298633478925e-06, + "loss": 0.1697, + "step": 851300 + }, + { + "epoch": 4.79, + "learning_rate": 2.158608633085437e-06, + "loss": 0.1632, + "step": 851400 + }, + { + "epoch": 4.79, + "learning_rate": 2.152987402822982e-06, + "loss": 0.1618, + "step": 851500 + }, + { + "epoch": 4.79, + "learning_rate": 2.1473661725605267e-06, + "loss": 0.1615, + "step": 851600 + }, + { + "epoch": 4.79, + "learning_rate": 2.1417449422980717e-06, + "loss": 0.1596, + "step": 851700 + }, + { + "epoch": 4.79, + "learning_rate": 2.1361237120356163e-06, + "loss": 0.1632, + "step": 851800 + }, + { + "epoch": 4.79, + "learning_rate": 2.1305024817731613e-06, + "loss": 0.1636, + "step": 851900 + }, + { + "epoch": 4.79, + "learning_rate": 2.124881251510706e-06, + "loss": 0.1664, + "step": 852000 + }, + { + "epoch": 4.79, + "learning_rate": 2.1192600212482504e-06, + "loss": 0.1667, + "step": 852100 + }, + { + "epoch": 4.79, + "learning_rate": 2.1136387909857954e-06, + "loss": 0.1565, + "step": 852200 + }, + { + "epoch": 4.79, + "learning_rate": 2.10801756072334e-06, + "loss": 0.1589, + "step": 852300 + }, + { + "epoch": 4.79, + "learning_rate": 2.102396330460885e-06, + "loss": 0.1608, + "step": 852400 + }, + { + "epoch": 4.79, + "learning_rate": 2.0967751001984296e-06, + "loss": 0.1595, + "step": 852500 + }, + { + "epoch": 4.79, + "learning_rate": 2.091153869935974e-06, + "loss": 0.1646, + "step": 852600 + }, + { + "epoch": 4.79, + "learning_rate": 2.085532639673519e-06, + "loss": 0.1594, + "step": 852700 + }, + { + "epoch": 4.79, + "learning_rate": 2.079911409411064e-06, + "loss": 0.1636, + "step": 852800 + }, + { + "epoch": 4.79, + "learning_rate": 2.074290179148609e-06, + "loss": 0.1627, + "step": 852900 + }, + { + "epoch": 4.79, + "learning_rate": 2.0686689488861534e-06, + "loss": 0.1555, + "step": 853000 + }, + { + "epoch": 4.8, + "learning_rate": 2.063047718623698e-06, + "loss": 0.1631, + "step": 853100 + }, + { + "epoch": 4.8, + "learning_rate": 2.057426488361243e-06, + "loss": 0.1633, + "step": 853200 + }, + { + "epoch": 4.8, + "learning_rate": 2.0518052580987876e-06, + "loss": 0.1651, + "step": 853300 + }, + { + "epoch": 4.8, + "learning_rate": 2.0461840278363326e-06, + "loss": 0.161, + "step": 853400 + }, + { + "epoch": 4.8, + "learning_rate": 2.040562797573877e-06, + "loss": 0.1645, + "step": 853500 + }, + { + "epoch": 4.8, + "learning_rate": 2.0349415673114217e-06, + "loss": 0.1614, + "step": 853600 + }, + { + "epoch": 4.8, + "learning_rate": 2.0293203370489667e-06, + "loss": 0.1547, + "step": 853700 + }, + { + "epoch": 4.8, + "learning_rate": 2.0236991067865113e-06, + "loss": 0.1594, + "step": 853800 + }, + { + "epoch": 4.8, + "learning_rate": 2.0180778765240563e-06, + "loss": 0.1615, + "step": 853900 + }, + { + "epoch": 4.8, + "learning_rate": 2.012456646261601e-06, + "loss": 0.157, + "step": 854000 + }, + { + "epoch": 4.8, + "learning_rate": 2.0068354159991455e-06, + "loss": 0.161, + "step": 854100 + }, + { + "epoch": 4.8, + "learning_rate": 2.0012141857366905e-06, + "loss": 0.1635, + "step": 854200 + }, + { + "epoch": 4.8, + "learning_rate": 1.995592955474235e-06, + "loss": 0.1609, + "step": 854300 + }, + { + "epoch": 4.8, + "learning_rate": 1.98997172521178e-06, + "loss": 0.1592, + "step": 854400 + }, + { + "epoch": 4.8, + "learning_rate": 1.9843504949493247e-06, + "loss": 0.1616, + "step": 854500 + }, + { + "epoch": 4.8, + "learning_rate": 1.9787292646868692e-06, + "loss": 0.1607, + "step": 854600 + }, + { + "epoch": 4.8, + "learning_rate": 1.9731080344244143e-06, + "loss": 0.1607, + "step": 854700 + }, + { + "epoch": 4.81, + "learning_rate": 1.967486804161959e-06, + "loss": 0.1641, + "step": 854800 + }, + { + "epoch": 4.81, + "learning_rate": 1.961865573899504e-06, + "loss": 0.1635, + "step": 854900 + }, + { + "epoch": 4.81, + "learning_rate": 1.9562443436370484e-06, + "loss": 0.1608, + "step": 855000 + }, + { + "epoch": 4.81, + "learning_rate": 1.9506231133745934e-06, + "loss": 0.1634, + "step": 855100 + }, + { + "epoch": 4.81, + "learning_rate": 1.945001883112138e-06, + "loss": 0.1632, + "step": 855200 + }, + { + "epoch": 4.81, + "learning_rate": 1.9393806528496826e-06, + "loss": 0.1592, + "step": 855300 + }, + { + "epoch": 4.81, + "learning_rate": 1.9337594225872276e-06, + "loss": 0.1648, + "step": 855400 + }, + { + "epoch": 4.81, + "learning_rate": 1.928138192324772e-06, + "loss": 0.1608, + "step": 855500 + }, + { + "epoch": 4.81, + "learning_rate": 1.9225731743649415e-06, + "loss": 0.1626, + "step": 855600 + }, + { + "epoch": 4.81, + "learning_rate": 1.916951944102486e-06, + "loss": 0.1638, + "step": 855700 + }, + { + "epoch": 4.81, + "learning_rate": 1.911330713840031e-06, + "loss": 0.1648, + "step": 855800 + }, + { + "epoch": 4.81, + "learning_rate": 1.905709483577576e-06, + "loss": 0.1589, + "step": 855900 + }, + { + "epoch": 4.81, + "learning_rate": 1.9000882533151205e-06, + "loss": 0.167, + "step": 856000 + }, + { + "epoch": 4.81, + "learning_rate": 1.8944670230526655e-06, + "loss": 0.1613, + "step": 856100 + }, + { + "epoch": 4.81, + "learning_rate": 1.88884579279021e-06, + "loss": 0.1591, + "step": 856200 + }, + { + "epoch": 4.81, + "learning_rate": 1.883224562527755e-06, + "loss": 0.1584, + "step": 856300 + }, + { + "epoch": 4.81, + "learning_rate": 1.8776033322652997e-06, + "loss": 0.1582, + "step": 856400 + }, + { + "epoch": 4.81, + "learning_rate": 1.8719821020028442e-06, + "loss": 0.1565, + "step": 856500 + }, + { + "epoch": 4.82, + "learning_rate": 1.8663608717403893e-06, + "loss": 0.1613, + "step": 856600 + }, + { + "epoch": 4.82, + "learning_rate": 1.8607396414779338e-06, + "loss": 0.1671, + "step": 856700 + }, + { + "epoch": 4.82, + "learning_rate": 1.8551184112154788e-06, + "loss": 0.1647, + "step": 856800 + }, + { + "epoch": 4.82, + "learning_rate": 1.8494971809530234e-06, + "loss": 0.1633, + "step": 856900 + }, + { + "epoch": 4.82, + "learning_rate": 1.8438759506905684e-06, + "loss": 0.1597, + "step": 857000 + }, + { + "epoch": 4.82, + "learning_rate": 1.838254720428113e-06, + "loss": 0.1595, + "step": 857100 + }, + { + "epoch": 4.82, + "learning_rate": 1.8326334901656576e-06, + "loss": 0.1561, + "step": 857200 + }, + { + "epoch": 4.82, + "learning_rate": 1.8270122599032026e-06, + "loss": 0.1633, + "step": 857300 + }, + { + "epoch": 4.82, + "learning_rate": 1.8213910296407472e-06, + "loss": 0.1589, + "step": 857400 + }, + { + "epoch": 4.82, + "learning_rate": 1.8157697993782922e-06, + "loss": 0.1573, + "step": 857500 + }, + { + "epoch": 4.82, + "learning_rate": 1.8101485691158368e-06, + "loss": 0.1633, + "step": 857600 + }, + { + "epoch": 4.82, + "learning_rate": 1.8045273388533814e-06, + "loss": 0.1671, + "step": 857700 + }, + { + "epoch": 4.82, + "learning_rate": 1.7989061085909264e-06, + "loss": 0.1621, + "step": 857800 + }, + { + "epoch": 4.82, + "learning_rate": 1.793284878328471e-06, + "loss": 0.1671, + "step": 857900 + }, + { + "epoch": 4.82, + "learning_rate": 1.787663648066016e-06, + "loss": 0.1605, + "step": 858000 + }, + { + "epoch": 4.82, + "learning_rate": 1.782098630106185e-06, + "loss": 0.1701, + "step": 858100 + }, + { + "epoch": 4.82, + "learning_rate": 1.77647739984373e-06, + "loss": 0.163, + "step": 858200 + }, + { + "epoch": 4.82, + "learning_rate": 1.7708561695812747e-06, + "loss": 0.158, + "step": 858300 + }, + { + "epoch": 4.83, + "learning_rate": 1.7652349393188192e-06, + "loss": 0.1589, + "step": 858400 + }, + { + "epoch": 4.83, + "learning_rate": 1.7596137090563642e-06, + "loss": 0.1644, + "step": 858500 + }, + { + "epoch": 4.83, + "learning_rate": 1.7539924787939088e-06, + "loss": 0.1575, + "step": 858600 + }, + { + "epoch": 4.83, + "learning_rate": 1.7483712485314538e-06, + "loss": 0.1624, + "step": 858700 + }, + { + "epoch": 4.83, + "learning_rate": 1.7427500182689984e-06, + "loss": 0.1611, + "step": 858800 + }, + { + "epoch": 4.83, + "learning_rate": 1.737128788006543e-06, + "loss": 0.1591, + "step": 858900 + }, + { + "epoch": 4.83, + "learning_rate": 1.731507557744088e-06, + "loss": 0.1672, + "step": 859000 + }, + { + "epoch": 4.83, + "learning_rate": 1.7258863274816326e-06, + "loss": 0.1631, + "step": 859100 + }, + { + "epoch": 4.83, + "learning_rate": 1.7202650972191776e-06, + "loss": 0.1613, + "step": 859200 + }, + { + "epoch": 4.83, + "learning_rate": 1.7146438669567222e-06, + "loss": 0.1639, + "step": 859300 + }, + { + "epoch": 4.83, + "learning_rate": 1.7090226366942672e-06, + "loss": 0.1669, + "step": 859400 + }, + { + "epoch": 4.83, + "learning_rate": 1.7034014064318118e-06, + "loss": 0.162, + "step": 859500 + }, + { + "epoch": 4.83, + "learning_rate": 1.6977801761693564e-06, + "loss": 0.1617, + "step": 859600 + }, + { + "epoch": 4.83, + "learning_rate": 1.6921589459069014e-06, + "loss": 0.1624, + "step": 859700 + }, + { + "epoch": 4.83, + "learning_rate": 1.686537715644446e-06, + "loss": 0.1632, + "step": 859800 + }, + { + "epoch": 4.83, + "learning_rate": 1.680916485381991e-06, + "loss": 0.159, + "step": 859900 + }, + { + "epoch": 4.83, + "learning_rate": 1.6752952551195355e-06, + "loss": 0.1566, + "step": 860000 + }, + { + "epoch": 4.83, + "learning_rate": 1.6696740248570801e-06, + "loss": 0.1608, + "step": 860100 + }, + { + "epoch": 4.84, + "learning_rate": 1.6640527945946251e-06, + "loss": 0.1634, + "step": 860200 + }, + { + "epoch": 4.84, + "learning_rate": 1.6584877766347942e-06, + "loss": 0.1597, + "step": 860300 + }, + { + "epoch": 4.84, + "learning_rate": 1.6528665463723392e-06, + "loss": 0.166, + "step": 860400 + }, + { + "epoch": 4.84, + "learning_rate": 1.6472453161098838e-06, + "loss": 0.1583, + "step": 860500 + }, + { + "epoch": 4.84, + "learning_rate": 1.6416240858474288e-06, + "loss": 0.1613, + "step": 860600 + }, + { + "epoch": 4.84, + "learning_rate": 1.6360028555849734e-06, + "loss": 0.1637, + "step": 860700 + }, + { + "epoch": 4.84, + "learning_rate": 1.630381625322518e-06, + "loss": 0.1565, + "step": 860800 + }, + { + "epoch": 4.84, + "learning_rate": 1.624760395060063e-06, + "loss": 0.1603, + "step": 860900 + }, + { + "epoch": 4.84, + "learning_rate": 1.6191391647976076e-06, + "loss": 0.1632, + "step": 861000 + }, + { + "epoch": 4.84, + "learning_rate": 1.6135179345351526e-06, + "loss": 0.1634, + "step": 861100 + }, + { + "epoch": 4.84, + "learning_rate": 1.6078967042726972e-06, + "loss": 0.1599, + "step": 861200 + }, + { + "epoch": 4.84, + "learning_rate": 1.6022754740102422e-06, + "loss": 0.1569, + "step": 861300 + }, + { + "epoch": 4.84, + "learning_rate": 1.5966542437477868e-06, + "loss": 0.166, + "step": 861400 + }, + { + "epoch": 4.84, + "learning_rate": 1.5910330134853313e-06, + "loss": 0.1545, + "step": 861500 + }, + { + "epoch": 4.84, + "learning_rate": 1.5854117832228764e-06, + "loss": 0.1615, + "step": 861600 + }, + { + "epoch": 4.84, + "learning_rate": 1.579790552960421e-06, + "loss": 0.1627, + "step": 861700 + }, + { + "epoch": 4.84, + "learning_rate": 1.574169322697966e-06, + "loss": 0.1643, + "step": 861800 + }, + { + "epoch": 4.84, + "learning_rate": 1.5685480924355105e-06, + "loss": 0.1575, + "step": 861900 + }, + { + "epoch": 4.85, + "learning_rate": 1.5629268621730551e-06, + "loss": 0.1602, + "step": 862000 + }, + { + "epoch": 4.85, + "learning_rate": 1.5573056319106001e-06, + "loss": 0.1603, + "step": 862100 + }, + { + "epoch": 4.85, + "learning_rate": 1.5516844016481447e-06, + "loss": 0.1659, + "step": 862200 + }, + { + "epoch": 4.85, + "learning_rate": 1.5460631713856895e-06, + "loss": 0.1626, + "step": 862300 + }, + { + "epoch": 4.85, + "learning_rate": 1.5404419411232343e-06, + "loss": 0.1669, + "step": 862400 + }, + { + "epoch": 4.85, + "learning_rate": 1.534820710860779e-06, + "loss": 0.1673, + "step": 862500 + }, + { + "epoch": 4.85, + "learning_rate": 1.5291994805983239e-06, + "loss": 0.1542, + "step": 862600 + }, + { + "epoch": 4.85, + "learning_rate": 1.5235782503358685e-06, + "loss": 0.1604, + "step": 862700 + }, + { + "epoch": 4.85, + "learning_rate": 1.5179570200734133e-06, + "loss": 0.1607, + "step": 862800 + }, + { + "epoch": 4.85, + "learning_rate": 1.512335789810958e-06, + "loss": 0.1636, + "step": 862900 + }, + { + "epoch": 4.85, + "learning_rate": 1.5067145595485028e-06, + "loss": 0.1612, + "step": 863000 + }, + { + "epoch": 4.85, + "learning_rate": 1.5010933292860476e-06, + "loss": 0.1598, + "step": 863100 + }, + { + "epoch": 4.85, + "learning_rate": 1.4954720990235922e-06, + "loss": 0.1629, + "step": 863200 + }, + { + "epoch": 4.85, + "learning_rate": 1.489850868761137e-06, + "loss": 0.1583, + "step": 863300 + }, + { + "epoch": 4.85, + "learning_rate": 1.4842296384986818e-06, + "loss": 0.1634, + "step": 863400 + }, + { + "epoch": 4.85, + "learning_rate": 1.4786084082362266e-06, + "loss": 0.1616, + "step": 863500 + }, + { + "epoch": 4.85, + "learning_rate": 1.4729871779737714e-06, + "loss": 0.1609, + "step": 863600 + }, + { + "epoch": 4.86, + "learning_rate": 1.4673659477113162e-06, + "loss": 0.1603, + "step": 863700 + }, + { + "epoch": 4.86, + "learning_rate": 1.4617447174488608e-06, + "loss": 0.1634, + "step": 863800 + }, + { + "epoch": 4.86, + "learning_rate": 1.4561234871864056e-06, + "loss": 0.1661, + "step": 863900 + }, + { + "epoch": 4.86, + "learning_rate": 1.4505022569239504e-06, + "loss": 0.1634, + "step": 864000 + }, + { + "epoch": 4.86, + "learning_rate": 1.4448810266614952e-06, + "loss": 0.1606, + "step": 864100 + }, + { + "epoch": 4.86, + "learning_rate": 1.43925979639904e-06, + "loss": 0.1562, + "step": 864200 + }, + { + "epoch": 4.86, + "learning_rate": 1.4336947784392093e-06, + "loss": 0.1647, + "step": 864300 + }, + { + "epoch": 4.86, + "learning_rate": 1.428073548176754e-06, + "loss": 0.1659, + "step": 864400 + }, + { + "epoch": 4.86, + "learning_rate": 1.4224523179142989e-06, + "loss": 0.1648, + "step": 864500 + }, + { + "epoch": 4.86, + "learning_rate": 1.4168310876518435e-06, + "loss": 0.1594, + "step": 864600 + }, + { + "epoch": 4.86, + "learning_rate": 1.4112098573893883e-06, + "loss": 0.1561, + "step": 864700 + }, + { + "epoch": 4.86, + "learning_rate": 1.4056448394295578e-06, + "loss": 0.1555, + "step": 864800 + }, + { + "epoch": 4.86, + "learning_rate": 1.4000236091671024e-06, + "loss": 0.1579, + "step": 864900 + }, + { + "epoch": 4.86, + "learning_rate": 1.3944023789046472e-06, + "loss": 0.158, + "step": 865000 + }, + { + "epoch": 4.86, + "learning_rate": 1.388781148642192e-06, + "loss": 0.1677, + "step": 865100 + }, + { + "epoch": 4.86, + "learning_rate": 1.3831599183797368e-06, + "loss": 0.1569, + "step": 865200 + }, + { + "epoch": 4.86, + "learning_rate": 1.3775386881172815e-06, + "loss": 0.162, + "step": 865300 + }, + { + "epoch": 4.86, + "learning_rate": 1.3719174578548261e-06, + "loss": 0.1667, + "step": 865400 + }, + { + "epoch": 4.87, + "learning_rate": 1.366296227592371e-06, + "loss": 0.1636, + "step": 865500 + }, + { + "epoch": 4.87, + "learning_rate": 1.3606749973299157e-06, + "loss": 0.1596, + "step": 865600 + }, + { + "epoch": 4.87, + "learning_rate": 1.3550537670674605e-06, + "loss": 0.161, + "step": 865700 + }, + { + "epoch": 4.87, + "learning_rate": 1.3494325368050053e-06, + "loss": 0.1605, + "step": 865800 + }, + { + "epoch": 4.87, + "learning_rate": 1.3438113065425501e-06, + "loss": 0.1591, + "step": 865900 + }, + { + "epoch": 4.87, + "learning_rate": 1.3381900762800947e-06, + "loss": 0.1623, + "step": 866000 + }, + { + "epoch": 4.87, + "learning_rate": 1.3325688460176395e-06, + "loss": 0.1594, + "step": 866100 + }, + { + "epoch": 4.87, + "learning_rate": 1.3269476157551843e-06, + "loss": 0.1648, + "step": 866200 + }, + { + "epoch": 4.87, + "learning_rate": 1.321326385492729e-06, + "loss": 0.1617, + "step": 866300 + }, + { + "epoch": 4.87, + "learning_rate": 1.3157051552302739e-06, + "loss": 0.157, + "step": 866400 + }, + { + "epoch": 4.87, + "learning_rate": 1.3100839249678185e-06, + "loss": 0.1551, + "step": 866500 + }, + { + "epoch": 4.87, + "learning_rate": 1.3044626947053632e-06, + "loss": 0.1653, + "step": 866600 + }, + { + "epoch": 4.87, + "learning_rate": 1.298841464442908e-06, + "loss": 0.1583, + "step": 866700 + }, + { + "epoch": 4.87, + "learning_rate": 1.2932202341804528e-06, + "loss": 0.1606, + "step": 866800 + }, + { + "epoch": 4.87, + "learning_rate": 1.2875990039179976e-06, + "loss": 0.1643, + "step": 866900 + }, + { + "epoch": 4.87, + "learning_rate": 1.2819777736555422e-06, + "loss": 0.1641, + "step": 867000 + }, + { + "epoch": 4.87, + "learning_rate": 1.276356543393087e-06, + "loss": 0.1609, + "step": 867100 + }, + { + "epoch": 4.87, + "learning_rate": 1.2707353131306318e-06, + "loss": 0.1688, + "step": 867200 + }, + { + "epoch": 4.88, + "learning_rate": 1.2651140828681766e-06, + "loss": 0.1656, + "step": 867300 + }, + { + "epoch": 4.88, + "learning_rate": 1.2594928526057214e-06, + "loss": 0.1641, + "step": 867400 + }, + { + "epoch": 4.88, + "learning_rate": 1.2539278346458907e-06, + "loss": 0.1634, + "step": 867500 + }, + { + "epoch": 4.88, + "learning_rate": 1.2483066043834355e-06, + "loss": 0.1671, + "step": 867600 + }, + { + "epoch": 4.88, + "learning_rate": 1.2426853741209803e-06, + "loss": 0.1607, + "step": 867700 + }, + { + "epoch": 4.88, + "learning_rate": 1.2370641438585249e-06, + "loss": 0.1632, + "step": 867800 + }, + { + "epoch": 4.88, + "learning_rate": 1.2314429135960697e-06, + "loss": 0.1611, + "step": 867900 + }, + { + "epoch": 4.88, + "learning_rate": 1.2258216833336145e-06, + "loss": 0.1624, + "step": 868000 + }, + { + "epoch": 4.88, + "learning_rate": 1.2202004530711593e-06, + "loss": 0.1614, + "step": 868100 + }, + { + "epoch": 4.88, + "learning_rate": 1.214579222808704e-06, + "loss": 0.1629, + "step": 868200 + }, + { + "epoch": 4.88, + "learning_rate": 1.2089579925462489e-06, + "loss": 0.1622, + "step": 868300 + }, + { + "epoch": 4.88, + "learning_rate": 1.2033367622837934e-06, + "loss": 0.1642, + "step": 868400 + }, + { + "epoch": 4.88, + "learning_rate": 1.1977155320213382e-06, + "loss": 0.1573, + "step": 868500 + }, + { + "epoch": 4.88, + "learning_rate": 1.192094301758883e-06, + "loss": 0.1649, + "step": 868600 + }, + { + "epoch": 4.88, + "learning_rate": 1.1864730714964278e-06, + "loss": 0.1595, + "step": 868700 + }, + { + "epoch": 4.88, + "learning_rate": 1.1808518412339726e-06, + "loss": 0.1615, + "step": 868800 + }, + { + "epoch": 4.88, + "learning_rate": 1.1752306109715172e-06, + "loss": 0.16, + "step": 868900 + }, + { + "epoch": 4.88, + "learning_rate": 1.169609380709062e-06, + "loss": 0.1614, + "step": 869000 + }, + { + "epoch": 4.89, + "learning_rate": 1.1639881504466068e-06, + "loss": 0.164, + "step": 869100 + }, + { + "epoch": 4.89, + "learning_rate": 1.1583669201841516e-06, + "loss": 0.1645, + "step": 869200 + }, + { + "epoch": 4.89, + "learning_rate": 1.1527456899216964e-06, + "loss": 0.1647, + "step": 869300 + }, + { + "epoch": 4.89, + "learning_rate": 1.147124459659241e-06, + "loss": 0.1577, + "step": 869400 + }, + { + "epoch": 4.89, + "learning_rate": 1.1415032293967858e-06, + "loss": 0.159, + "step": 869500 + }, + { + "epoch": 4.89, + "learning_rate": 1.1358819991343306e-06, + "loss": 0.1631, + "step": 869600 + }, + { + "epoch": 4.89, + "learning_rate": 1.1302607688718754e-06, + "loss": 0.153, + "step": 869700 + }, + { + "epoch": 4.89, + "learning_rate": 1.1246395386094202e-06, + "loss": 0.1625, + "step": 869800 + }, + { + "epoch": 4.89, + "learning_rate": 1.119018308346965e-06, + "loss": 0.1645, + "step": 869900 + }, + { + "epoch": 4.89, + "learning_rate": 1.1133970780845095e-06, + "loss": 0.1602, + "step": 870000 + }, + { + "epoch": 4.89, + "learning_rate": 1.1077758478220543e-06, + "loss": 0.1613, + "step": 870100 + }, + { + "epoch": 4.89, + "learning_rate": 1.1021546175595991e-06, + "loss": 0.1611, + "step": 870200 + }, + { + "epoch": 4.89, + "learning_rate": 1.096533387297144e-06, + "loss": 0.1664, + "step": 870300 + }, + { + "epoch": 4.89, + "learning_rate": 1.0909121570346887e-06, + "loss": 0.1614, + "step": 870400 + }, + { + "epoch": 4.89, + "learning_rate": 1.0852909267722333e-06, + "loss": 0.1612, + "step": 870500 + }, + { + "epoch": 4.89, + "learning_rate": 1.079669696509778e-06, + "loss": 0.1558, + "step": 870600 + }, + { + "epoch": 4.89, + "learning_rate": 1.0740484662473229e-06, + "loss": 0.166, + "step": 870700 + }, + { + "epoch": 4.89, + "learning_rate": 1.0684272359848677e-06, + "loss": 0.1566, + "step": 870800 + }, + { + "epoch": 4.9, + "learning_rate": 1.062862218025037e-06, + "loss": 0.1582, + "step": 870900 + }, + { + "epoch": 4.9, + "learning_rate": 1.0572409877625818e-06, + "loss": 0.1643, + "step": 871000 + }, + { + "epoch": 4.9, + "learning_rate": 1.0516197575001266e-06, + "loss": 0.161, + "step": 871100 + }, + { + "epoch": 4.9, + "learning_rate": 1.0459985272376714e-06, + "loss": 0.157, + "step": 871200 + }, + { + "epoch": 4.9, + "learning_rate": 1.040377296975216e-06, + "loss": 0.1593, + "step": 871300 + }, + { + "epoch": 4.9, + "learning_rate": 1.0347560667127608e-06, + "loss": 0.1644, + "step": 871400 + }, + { + "epoch": 4.9, + "learning_rate": 1.0291348364503056e-06, + "loss": 0.1684, + "step": 871500 + }, + { + "epoch": 4.9, + "learning_rate": 1.0235698184904749e-06, + "loss": 0.159, + "step": 871600 + }, + { + "epoch": 4.9, + "learning_rate": 1.0179485882280197e-06, + "loss": 0.1617, + "step": 871700 + }, + { + "epoch": 4.9, + "learning_rate": 1.0123273579655645e-06, + "loss": 0.1617, + "step": 871800 + }, + { + "epoch": 4.9, + "learning_rate": 1.0067061277031093e-06, + "loss": 0.1632, + "step": 871900 + }, + { + "epoch": 4.9, + "learning_rate": 1.001084897440654e-06, + "loss": 0.1639, + "step": 872000 + }, + { + "epoch": 4.9, + "learning_rate": 9.954636671781989e-07, + "loss": 0.1659, + "step": 872100 + }, + { + "epoch": 4.9, + "learning_rate": 9.898424369157434e-07, + "loss": 0.1605, + "step": 872200 + }, + { + "epoch": 4.9, + "learning_rate": 9.842212066532882e-07, + "loss": 0.1559, + "step": 872300 + }, + { + "epoch": 4.9, + "learning_rate": 9.78599976390833e-07, + "loss": 0.1638, + "step": 872400 + }, + { + "epoch": 4.9, + "learning_rate": 9.729787461283778e-07, + "loss": 0.1598, + "step": 872500 + }, + { + "epoch": 4.91, + "learning_rate": 9.673575158659226e-07, + "loss": 0.1592, + "step": 872600 + }, + { + "epoch": 4.91, + "learning_rate": 9.617362856034672e-07, + "loss": 0.1575, + "step": 872700 + }, + { + "epoch": 4.91, + "learning_rate": 9.56115055341012e-07, + "loss": 0.1622, + "step": 872800 + }, + { + "epoch": 4.91, + "learning_rate": 9.504938250785567e-07, + "loss": 0.1598, + "step": 872900 + }, + { + "epoch": 4.91, + "learning_rate": 9.448725948161015e-07, + "loss": 0.1584, + "step": 873000 + }, + { + "epoch": 4.91, + "learning_rate": 9.392513645536463e-07, + "loss": 0.166, + "step": 873100 + }, + { + "epoch": 4.91, + "learning_rate": 9.33630134291191e-07, + "loss": 0.1642, + "step": 873200 + }, + { + "epoch": 4.91, + "learning_rate": 9.280089040287358e-07, + "loss": 0.1643, + "step": 873300 + }, + { + "epoch": 4.91, + "learning_rate": 9.223876737662806e-07, + "loss": 0.1568, + "step": 873400 + }, + { + "epoch": 4.91, + "learning_rate": 9.167664435038253e-07, + "loss": 0.1599, + "step": 873500 + }, + { + "epoch": 4.91, + "learning_rate": 9.111452132413701e-07, + "loss": 0.1656, + "step": 873600 + }, + { + "epoch": 4.91, + "learning_rate": 9.055239829789149e-07, + "loss": 0.1609, + "step": 873700 + }, + { + "epoch": 4.91, + "learning_rate": 8.999027527164595e-07, + "loss": 0.1544, + "step": 873800 + }, + { + "epoch": 4.91, + "learning_rate": 8.942815224540043e-07, + "loss": 0.167, + "step": 873900 + }, + { + "epoch": 4.91, + "learning_rate": 8.886602921915491e-07, + "loss": 0.1594, + "step": 874000 + }, + { + "epoch": 4.91, + "learning_rate": 8.830390619290939e-07, + "loss": 0.1597, + "step": 874100 + }, + { + "epoch": 4.91, + "learning_rate": 8.774178316666387e-07, + "loss": 0.1679, + "step": 874200 + }, + { + "epoch": 4.91, + "learning_rate": 8.717966014041833e-07, + "loss": 0.1594, + "step": 874300 + }, + { + "epoch": 4.92, + "learning_rate": 8.661753711417281e-07, + "loss": 0.1617, + "step": 874400 + }, + { + "epoch": 4.92, + "learning_rate": 8.605541408792729e-07, + "loss": 0.1578, + "step": 874500 + }, + { + "epoch": 4.92, + "learning_rate": 8.549329106168177e-07, + "loss": 0.1602, + "step": 874600 + }, + { + "epoch": 4.92, + "learning_rate": 8.493116803543625e-07, + "loss": 0.1602, + "step": 874700 + }, + { + "epoch": 4.92, + "learning_rate": 8.43690450091907e-07, + "loss": 0.1631, + "step": 874800 + }, + { + "epoch": 4.92, + "learning_rate": 8.380692198294518e-07, + "loss": 0.1645, + "step": 874900 + }, + { + "epoch": 4.92, + "learning_rate": 8.324479895669966e-07, + "loss": 0.1618, + "step": 875000 + }, + { + "epoch": 4.92, + "learning_rate": 8.268267593045414e-07, + "loss": 0.1661, + "step": 875100 + }, + { + "epoch": 4.92, + "learning_rate": 8.212055290420862e-07, + "loss": 0.1585, + "step": 875200 + }, + { + "epoch": 4.92, + "learning_rate": 8.156405110822554e-07, + "loss": 0.1649, + "step": 875300 + }, + { + "epoch": 4.92, + "learning_rate": 8.100192808198002e-07, + "loss": 0.1657, + "step": 875400 + }, + { + "epoch": 4.92, + "learning_rate": 8.04398050557345e-07, + "loss": 0.1697, + "step": 875500 + }, + { + "epoch": 4.92, + "learning_rate": 7.987768202948897e-07, + "loss": 0.1596, + "step": 875600 + }, + { + "epoch": 4.92, + "learning_rate": 7.931555900324345e-07, + "loss": 0.1642, + "step": 875700 + }, + { + "epoch": 4.92, + "learning_rate": 7.875343597699793e-07, + "loss": 0.1641, + "step": 875800 + }, + { + "epoch": 4.92, + "learning_rate": 7.819131295075241e-07, + "loss": 0.1568, + "step": 875900 + }, + { + "epoch": 4.92, + "learning_rate": 7.762918992450688e-07, + "loss": 0.1565, + "step": 876000 + }, + { + "epoch": 4.92, + "learning_rate": 7.706706689826136e-07, + "loss": 0.1602, + "step": 876100 + }, + { + "epoch": 4.93, + "learning_rate": 7.650494387201584e-07, + "loss": 0.1624, + "step": 876200 + }, + { + "epoch": 4.93, + "learning_rate": 7.594282084577031e-07, + "loss": 0.1607, + "step": 876300 + }, + { + "epoch": 4.93, + "learning_rate": 7.538069781952479e-07, + "loss": 0.1669, + "step": 876400 + }, + { + "epoch": 4.93, + "learning_rate": 7.481857479327927e-07, + "loss": 0.1629, + "step": 876500 + }, + { + "epoch": 4.93, + "learning_rate": 7.425645176703373e-07, + "loss": 0.1552, + "step": 876600 + }, + { + "epoch": 4.93, + "learning_rate": 7.369432874078821e-07, + "loss": 0.1604, + "step": 876700 + }, + { + "epoch": 4.93, + "learning_rate": 7.313220571454268e-07, + "loss": 0.1594, + "step": 876800 + }, + { + "epoch": 4.93, + "learning_rate": 7.257008268829716e-07, + "loss": 0.1597, + "step": 876900 + }, + { + "epoch": 4.93, + "learning_rate": 7.200795966205164e-07, + "loss": 0.1666, + "step": 877000 + }, + { + "epoch": 4.93, + "learning_rate": 7.144583663580611e-07, + "loss": 0.1646, + "step": 877100 + }, + { + "epoch": 4.93, + "learning_rate": 7.088371360956059e-07, + "loss": 0.1632, + "step": 877200 + }, + { + "epoch": 4.93, + "learning_rate": 7.032159058331507e-07, + "loss": 0.1605, + "step": 877300 + }, + { + "epoch": 4.93, + "learning_rate": 6.975946755706954e-07, + "loss": 0.1564, + "step": 877400 + }, + { + "epoch": 4.93, + "learning_rate": 6.919734453082402e-07, + "loss": 0.161, + "step": 877500 + }, + { + "epoch": 4.93, + "learning_rate": 6.863522150457849e-07, + "loss": 0.1659, + "step": 877600 + }, + { + "epoch": 4.93, + "learning_rate": 6.807309847833297e-07, + "loss": 0.1626, + "step": 877700 + }, + { + "epoch": 4.93, + "learning_rate": 6.751097545208745e-07, + "loss": 0.1607, + "step": 877800 + }, + { + "epoch": 4.93, + "learning_rate": 6.694885242584192e-07, + "loss": 0.1575, + "step": 877900 + }, + { + "epoch": 4.94, + "learning_rate": 6.638672939959639e-07, + "loss": 0.1634, + "step": 878000 + }, + { + "epoch": 4.94, + "learning_rate": 6.582460637335087e-07, + "loss": 0.1603, + "step": 878100 + }, + { + "epoch": 4.94, + "learning_rate": 6.526248334710535e-07, + "loss": 0.1603, + "step": 878200 + }, + { + "epoch": 4.94, + "learning_rate": 6.470036032085983e-07, + "loss": 0.1615, + "step": 878300 + }, + { + "epoch": 4.94, + "learning_rate": 6.41382372946143e-07, + "loss": 0.1655, + "step": 878400 + }, + { + "epoch": 4.94, + "learning_rate": 6.358173549863123e-07, + "loss": 0.1591, + "step": 878500 + }, + { + "epoch": 4.94, + "learning_rate": 6.301961247238571e-07, + "loss": 0.1623, + "step": 878600 + }, + { + "epoch": 4.94, + "learning_rate": 6.245748944614018e-07, + "loss": 0.1628, + "step": 878700 + }, + { + "epoch": 4.94, + "learning_rate": 6.189536641989466e-07, + "loss": 0.1633, + "step": 878800 + }, + { + "epoch": 4.94, + "learning_rate": 6.133324339364914e-07, + "loss": 0.1564, + "step": 878900 + }, + { + "epoch": 4.94, + "learning_rate": 6.077112036740361e-07, + "loss": 0.1632, + "step": 879000 + }, + { + "epoch": 4.94, + "learning_rate": 6.020899734115809e-07, + "loss": 0.1649, + "step": 879100 + }, + { + "epoch": 4.94, + "learning_rate": 5.964687431491257e-07, + "loss": 0.1645, + "step": 879200 + }, + { + "epoch": 4.94, + "learning_rate": 5.908475128866704e-07, + "loss": 0.1635, + "step": 879300 + }, + { + "epoch": 4.94, + "learning_rate": 5.852262826242152e-07, + "loss": 0.1627, + "step": 879400 + }, + { + "epoch": 4.94, + "learning_rate": 5.796050523617599e-07, + "loss": 0.1606, + "step": 879500 + }, + { + "epoch": 4.94, + "learning_rate": 5.739838220993047e-07, + "loss": 0.1598, + "step": 879600 + }, + { + "epoch": 4.94, + "learning_rate": 5.683625918368495e-07, + "loss": 0.1617, + "step": 879700 + }, + { + "epoch": 4.95, + "learning_rate": 5.627413615743941e-07, + "loss": 0.1596, + "step": 879800 + }, + { + "epoch": 4.95, + "learning_rate": 5.571201313119389e-07, + "loss": 0.1625, + "step": 879900 + }, + { + "epoch": 4.95, + "learning_rate": 5.514989010494837e-07, + "loss": 0.1622, + "step": 880000 + }, + { + "epoch": 4.95, + "learning_rate": 5.458776707870284e-07, + "loss": 0.1602, + "step": 880100 + }, + { + "epoch": 4.95, + "learning_rate": 5.402564405245732e-07, + "loss": 0.1608, + "step": 880200 + }, + { + "epoch": 4.95, + "learning_rate": 5.346352102621179e-07, + "loss": 0.1628, + "step": 880300 + }, + { + "epoch": 4.95, + "learning_rate": 5.290139799996627e-07, + "loss": 0.1631, + "step": 880400 + }, + { + "epoch": 4.95, + "learning_rate": 5.233927497372075e-07, + "loss": 0.1643, + "step": 880500 + }, + { + "epoch": 4.95, + "learning_rate": 5.177715194747523e-07, + "loss": 0.1642, + "step": 880600 + }, + { + "epoch": 4.95, + "learning_rate": 5.121502892122971e-07, + "loss": 0.1564, + "step": 880700 + }, + { + "epoch": 4.95, + "learning_rate": 5.065290589498418e-07, + "loss": 0.1564, + "step": 880800 + }, + { + "epoch": 4.95, + "learning_rate": 5.009078286873866e-07, + "loss": 0.1587, + "step": 880900 + }, + { + "epoch": 4.95, + "learning_rate": 4.952865984249314e-07, + "loss": 0.164, + "step": 881000 + }, + { + "epoch": 4.95, + "learning_rate": 4.896653681624761e-07, + "loss": 0.1615, + "step": 881100 + }, + { + "epoch": 4.95, + "learning_rate": 4.840441379000209e-07, + "loss": 0.1643, + "step": 881200 + }, + { + "epoch": 4.95, + "learning_rate": 4.784229076375656e-07, + "loss": 0.1597, + "step": 881300 + }, + { + "epoch": 4.95, + "learning_rate": 4.7280167737511033e-07, + "loss": 0.1618, + "step": 881400 + }, + { + "epoch": 4.96, + "learning_rate": 4.6718044711265513e-07, + "loss": 0.1629, + "step": 881500 + }, + { + "epoch": 4.96, + "learning_rate": 4.615592168501999e-07, + "loss": 0.1605, + "step": 881600 + }, + { + "epoch": 4.96, + "learning_rate": 4.559379865877446e-07, + "loss": 0.1597, + "step": 881700 + }, + { + "epoch": 4.96, + "learning_rate": 4.503167563252894e-07, + "loss": 0.16, + "step": 881800 + }, + { + "epoch": 4.96, + "learning_rate": 4.446955260628341e-07, + "loss": 0.165, + "step": 881900 + }, + { + "epoch": 4.96, + "learning_rate": 4.390742958003789e-07, + "loss": 0.1692, + "step": 882000 + }, + { + "epoch": 4.96, + "learning_rate": 4.334530655379237e-07, + "loss": 0.1613, + "step": 882100 + }, + { + "epoch": 4.96, + "learning_rate": 4.278318352754684e-07, + "loss": 0.1608, + "step": 882200 + }, + { + "epoch": 4.96, + "learning_rate": 4.2221060501301317e-07, + "loss": 0.1648, + "step": 882300 + }, + { + "epoch": 4.96, + "learning_rate": 4.1658937475055786e-07, + "loss": 0.1612, + "step": 882400 + }, + { + "epoch": 4.96, + "learning_rate": 4.1096814448810265e-07, + "loss": 0.1579, + "step": 882500 + }, + { + "epoch": 4.96, + "learning_rate": 4.0534691422564745e-07, + "loss": 0.167, + "step": 882600 + }, + { + "epoch": 4.96, + "learning_rate": 3.997256839631922e-07, + "loss": 0.1602, + "step": 882700 + }, + { + "epoch": 4.96, + "learning_rate": 3.94104453700737e-07, + "loss": 0.1605, + "step": 882800 + }, + { + "epoch": 4.96, + "learning_rate": 3.885394357409063e-07, + "loss": 0.1603, + "step": 882900 + }, + { + "epoch": 4.96, + "learning_rate": 3.8291820547845105e-07, + "loss": 0.1614, + "step": 883000 + }, + { + "epoch": 4.96, + "learning_rate": 3.772969752159958e-07, + "loss": 0.1607, + "step": 883100 + }, + { + "epoch": 4.96, + "learning_rate": 3.7167574495354053e-07, + "loss": 0.1592, + "step": 883200 + }, + { + "epoch": 4.97, + "learning_rate": 3.660545146910853e-07, + "loss": 0.1612, + "step": 883300 + }, + { + "epoch": 4.97, + "learning_rate": 3.6043328442863007e-07, + "loss": 0.1604, + "step": 883400 + }, + { + "epoch": 4.97, + "learning_rate": 3.548120541661748e-07, + "loss": 0.1621, + "step": 883500 + }, + { + "epoch": 4.97, + "learning_rate": 3.4919082390371955e-07, + "loss": 0.1626, + "step": 883600 + }, + { + "epoch": 4.97, + "learning_rate": 3.435695936412643e-07, + "loss": 0.1598, + "step": 883700 + }, + { + "epoch": 4.97, + "learning_rate": 3.379483633788091e-07, + "loss": 0.1714, + "step": 883800 + }, + { + "epoch": 4.97, + "learning_rate": 3.323271331163539e-07, + "loss": 0.1615, + "step": 883900 + }, + { + "epoch": 4.97, + "learning_rate": 3.2670590285389863e-07, + "loss": 0.1634, + "step": 884000 + }, + { + "epoch": 4.97, + "learning_rate": 3.2108467259144337e-07, + "loss": 0.1596, + "step": 884100 + }, + { + "epoch": 4.97, + "learning_rate": 3.1546344232898816e-07, + "loss": 0.1569, + "step": 884200 + }, + { + "epoch": 4.97, + "learning_rate": 3.098422120665329e-07, + "loss": 0.1605, + "step": 884300 + }, + { + "epoch": 4.97, + "learning_rate": 3.0422098180407765e-07, + "loss": 0.1578, + "step": 884400 + }, + { + "epoch": 4.97, + "learning_rate": 2.9865596384424697e-07, + "loss": 0.1642, + "step": 884500 + }, + { + "epoch": 4.97, + "learning_rate": 2.930347335817917e-07, + "loss": 0.1595, + "step": 884600 + }, + { + "epoch": 4.97, + "learning_rate": 2.8741350331933646e-07, + "loss": 0.1583, + "step": 884700 + }, + { + "epoch": 4.97, + "learning_rate": 2.817922730568812e-07, + "loss": 0.162, + "step": 884800 + }, + { + "epoch": 4.97, + "learning_rate": 2.76171042794426e-07, + "loss": 0.1574, + "step": 884900 + }, + { + "epoch": 4.97, + "learning_rate": 2.705498125319708e-07, + "loss": 0.1534, + "step": 885000 + }, + { + "epoch": 4.98, + "learning_rate": 2.6492858226951553e-07, + "loss": 0.1582, + "step": 885100 + }, + { + "epoch": 4.98, + "learning_rate": 2.5930735200706027e-07, + "loss": 0.1624, + "step": 885200 + }, + { + "epoch": 4.98, + "learning_rate": 2.5368612174460507e-07, + "loss": 0.1587, + "step": 885300 + }, + { + "epoch": 4.98, + "learning_rate": 2.480648914821498e-07, + "loss": 0.1563, + "step": 885400 + }, + { + "epoch": 4.98, + "learning_rate": 2.4244366121969455e-07, + "loss": 0.1654, + "step": 885500 + }, + { + "epoch": 4.98, + "learning_rate": 2.368224309572393e-07, + "loss": 0.1621, + "step": 885600 + }, + { + "epoch": 4.98, + "learning_rate": 2.3120120069478409e-07, + "loss": 0.1618, + "step": 885700 + }, + { + "epoch": 4.98, + "learning_rate": 2.2557997043232883e-07, + "loss": 0.1595, + "step": 885800 + }, + { + "epoch": 4.98, + "learning_rate": 2.1995874016987357e-07, + "loss": 0.1623, + "step": 885900 + }, + { + "epoch": 4.98, + "learning_rate": 2.1433750990741834e-07, + "loss": 0.1588, + "step": 886000 + }, + { + "epoch": 4.98, + "learning_rate": 2.0871627964496313e-07, + "loss": 0.1607, + "step": 886100 + }, + { + "epoch": 4.98, + "learning_rate": 2.0309504938250788e-07, + "loss": 0.1542, + "step": 886200 + }, + { + "epoch": 4.98, + "learning_rate": 1.9747381912005262e-07, + "loss": 0.1623, + "step": 886300 + }, + { + "epoch": 4.98, + "learning_rate": 1.9185258885759739e-07, + "loss": 0.1628, + "step": 886400 + }, + { + "epoch": 4.98, + "learning_rate": 1.8623135859514213e-07, + "loss": 0.1673, + "step": 886500 + }, + { + "epoch": 4.98, + "learning_rate": 1.806101283326869e-07, + "loss": 0.1613, + "step": 886600 + }, + { + "epoch": 4.98, + "learning_rate": 1.7498889807023166e-07, + "loss": 0.1571, + "step": 886700 + }, + { + "epoch": 4.98, + "learning_rate": 1.6936766780777643e-07, + "loss": 0.1616, + "step": 886800 + }, + { + "epoch": 4.99, + "learning_rate": 1.6374643754532117e-07, + "loss": 0.1603, + "step": 886900 + }, + { + "epoch": 4.99, + "learning_rate": 1.5818141958549047e-07, + "loss": 0.1647, + "step": 887000 + }, + { + "epoch": 4.99, + "learning_rate": 1.5256018932303524e-07, + "loss": 0.1655, + "step": 887100 + }, + { + "epoch": 4.99, + "learning_rate": 1.4693895906058e-07, + "loss": 0.1589, + "step": 887200 + }, + { + "epoch": 4.99, + "learning_rate": 1.4131772879812478e-07, + "loss": 0.1656, + "step": 887300 + }, + { + "epoch": 4.99, + "learning_rate": 1.3569649853566952e-07, + "loss": 0.1586, + "step": 887400 + }, + { + "epoch": 4.99, + "learning_rate": 1.3007526827321429e-07, + "loss": 0.1642, + "step": 887500 + }, + { + "epoch": 4.99, + "learning_rate": 1.2445403801075903e-07, + "loss": 0.1574, + "step": 887600 + }, + { + "epoch": 4.99, + "learning_rate": 1.1883280774830381e-07, + "loss": 0.1584, + "step": 887700 + }, + { + "epoch": 4.99, + "learning_rate": 1.1321157748584855e-07, + "loss": 0.1642, + "step": 887800 + }, + { + "epoch": 4.99, + "learning_rate": 1.0759034722339332e-07, + "loss": 0.1608, + "step": 887900 + }, + { + "epoch": 4.99, + "learning_rate": 1.0196911696093808e-07, + "loss": 0.1634, + "step": 888000 + }, + { + "epoch": 4.99, + "learning_rate": 9.634788669848283e-08, + "loss": 0.1654, + "step": 888100 + }, + { + "epoch": 4.99, + "learning_rate": 9.072665643602759e-08, + "loss": 0.158, + "step": 888200 + }, + { + "epoch": 4.99, + "learning_rate": 8.510542617357235e-08, + "loss": 0.1569, + "step": 888300 + }, + { + "epoch": 4.99, + "learning_rate": 7.948419591111711e-08, + "loss": 0.1644, + "step": 888400 + }, + { + "epoch": 4.99, + "learning_rate": 7.386296564866186e-08, + "loss": 0.1607, + "step": 888500 + }, + { + "epoch": 5.0, + "learning_rate": 6.824173538620662e-08, + "loss": 0.1668, + "step": 888600 + }, + { + "epoch": 5.0, + "learning_rate": 6.262050512375139e-08, + "loss": 0.1691, + "step": 888700 + }, + { + "epoch": 5.0, + "learning_rate": 5.699927486129614e-08, + "loss": 0.1632, + "step": 888800 + }, + { + "epoch": 5.0, + "learning_rate": 5.1378044598840904e-08, + "loss": 0.1616, + "step": 888900 + }, + { + "epoch": 5.0, + "learning_rate": 4.575681433638566e-08, + "loss": 0.1613, + "step": 889000 + }, + { + "epoch": 5.0, + "learning_rate": 4.013558407393042e-08, + "loss": 0.159, + "step": 889100 + }, + { + "epoch": 5.0, + "learning_rate": 3.4514353811475176e-08, + "loss": 0.1656, + "step": 889200 + }, + { + "epoch": 5.0, + "learning_rate": 2.8893123549019938e-08, + "loss": 0.1598, + "step": 889300 + }, + { + "epoch": 5.0, + "learning_rate": 2.32718932865647e-08, + "loss": 0.1655, + "step": 889400 + }, + { + "epoch": 5.0, + "eval_bleu": 78.2542, + "eval_cer": 2.1669, + "eval_chrF": 95.73544319509031, + "eval_gen_len": 16.777936, + "eval_loss": 0.48148927092552185, + "eval_runtime": 7460.8529, + "eval_samples_per_second": 33.508, + "eval_steps_per_second": 0.524, + "eval_wer": 12.1294, + "step": 889485 + }, + { + "epoch": 5.0, + "step": 889485, + "total_flos": 9.809053433870746e+17, + "train_loss": 0.2851236777235189, + "train_runtime": 119967.4266, + "train_samples_per_second": 474.518, + "train_steps_per_second": 7.414 + } + ], + "logging_steps": 100, + "max_steps": 889485, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 9.809053433870746e+17, + "trial_name": null, + "trial_params": null +}