diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,90025 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0447579417436312, + "global_step": 15000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.444444444444445e-08, + "loss": 4.4104, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.88888888888889e-08, + "loss": 4.7311, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.3333333333333336e-07, + "loss": 4.4087, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.777777777777778e-07, + "loss": 3.7874, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.2222222222222224e-07, + "loss": 3.4691, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.666666666666667e-07, + "loss": 3.3043, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 3.111111111111111e-07, + "loss": 3.2606, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 3.555555555555556e-07, + "loss": 3.2142, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 4.0000000000000003e-07, + "loss": 3.0412, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 4.444444444444445e-07, + "loss": 2.9603, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.88888888888889e-07, + "loss": 2.9296, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 5.333333333333335e-07, + "loss": 2.9142, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 5.777777777777778e-07, + "loss": 2.8472, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 6.222222222222223e-07, + "loss": 2.8519, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 6.666666666666667e-07, + "loss": 2.827, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 7.111111111111112e-07, + "loss": 2.7273, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 7.555555555555556e-07, + "loss": 2.6759, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-07, + "loss": 2.7779, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 8.444444444444445e-07, + "loss": 2.6916, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 8.88888888888889e-07, + "loss": 2.7232, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 9.333333333333334e-07, + "loss": 2.6514, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 9.77777777777778e-07, + "loss": 2.6538, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 1.0222222222222223e-06, + "loss": 2.6245, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 1.066666666666667e-06, + "loss": 2.5824, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 1.111111111111111e-06, + "loss": 2.7241, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 1.1555555555555556e-06, + "loss": 2.5888, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 1.2000000000000002e-06, + "loss": 2.6742, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 1.2444444444444445e-06, + "loss": 2.6171, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 1.288888888888889e-06, + "loss": 2.5888, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 1.3333333333333334e-06, + "loss": 2.599, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 1.377777777777778e-06, + "loss": 2.5183, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 1.4222222222222223e-06, + "loss": 2.561, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 1.4666666666666669e-06, + "loss": 2.5267, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 1.5111111111111112e-06, + "loss": 2.5259, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 1.5555555555555558e-06, + "loss": 2.5245, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000001e-06, + "loss": 2.4725, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 1.6444444444444447e-06, + "loss": 2.4586, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 1.688888888888889e-06, + "loss": 2.4661, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 1.7333333333333336e-06, + "loss": 2.5591, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 1.777777777777778e-06, + "loss": 2.4832, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.8222222222222225e-06, + "loss": 2.4917, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 1.8666666666666669e-06, + "loss": 2.5252, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 1.9111111111111112e-06, + "loss": 2.4689, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 1.955555555555556e-06, + "loss": 2.4951, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 2.0000000000000003e-06, + "loss": 2.4628, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 2.0444444444444447e-06, + "loss": 2.4817, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 2.088888888888889e-06, + "loss": 2.4257, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 2.133333333333334e-06, + "loss": 2.5135, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 2.1777777777777777e-06, + "loss": 2.3578, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 2.222222222222222e-06, + "loss": 2.461, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2.266666666666667e-06, + "loss": 2.4099, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 2.311111111111111e-06, + "loss": 2.4331, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 2.3555555555555555e-06, + "loss": 2.4437, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 2.4000000000000003e-06, + "loss": 2.4993, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 2.4444444444444447e-06, + "loss": 2.3919, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 2.488888888888889e-06, + "loss": 2.5015, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 2.5333333333333338e-06, + "loss": 2.3989, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 2.577777777777778e-06, + "loss": 2.402, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 2.6222222222222225e-06, + "loss": 2.4046, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 2.666666666666667e-06, + "loss": 2.4419, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 2.7111111111111116e-06, + "loss": 2.4573, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 2.755555555555556e-06, + "loss": 2.3891, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-06, + "loss": 2.3936, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 2.8444444444444446e-06, + "loss": 2.4293, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 2.888888888888889e-06, + "loss": 2.3764, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 2.9333333333333338e-06, + "loss": 2.3835, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 2.9777777777777777e-06, + "loss": 2.3791, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 3.0222222222222225e-06, + "loss": 2.4402, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 3.066666666666667e-06, + "loss": 2.3952, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 3.1111111111111116e-06, + "loss": 2.3626, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 3.1555555555555555e-06, + "loss": 2.3848, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 3.2000000000000003e-06, + "loss": 2.3911, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 3.2444444444444446e-06, + "loss": 2.4376, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 3.2888888888888894e-06, + "loss": 2.4535, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 3.3333333333333333e-06, + "loss": 2.4319, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 3.377777777777778e-06, + "loss": 2.407, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 3.4222222222222224e-06, + "loss": 2.447, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 3.4666666666666672e-06, + "loss": 2.2612, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 3.511111111111111e-06, + "loss": 2.4062, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 3.555555555555556e-06, + "loss": 2.4107, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 3.6000000000000003e-06, + "loss": 2.4206, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 3.644444444444445e-06, + "loss": 2.3087, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 3.688888888888889e-06, + "loss": 2.3087, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 3.7333333333333337e-06, + "loss": 2.3544, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 3.777777777777778e-06, + "loss": 2.4208, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 3.8222222222222224e-06, + "loss": 2.424, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 3.866666666666667e-06, + "loss": 2.3243, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 3.911111111111112e-06, + "loss": 2.4105, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 3.955555555555556e-06, + "loss": 2.2982, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 4.000000000000001e-06, + "loss": 2.4061, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 4.044444444444445e-06, + "loss": 2.3698, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 4.088888888888889e-06, + "loss": 2.3651, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 4.133333333333333e-06, + "loss": 2.3414, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 4.177777777777778e-06, + "loss": 2.435, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 4.222222222222223e-06, + "loss": 2.3679, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 4.266666666666668e-06, + "loss": 2.3888, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 4.3111111111111115e-06, + "loss": 2.4288, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 4.3555555555555555e-06, + "loss": 2.2927, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 4.4e-06, + "loss": 2.3606, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 4.444444444444444e-06, + "loss": 2.3484, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 4.488888888888889e-06, + "loss": 2.3345, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 4.533333333333334e-06, + "loss": 2.2628, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 4.5777777777777785e-06, + "loss": 2.3345, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 4.622222222222222e-06, + "loss": 2.3478, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 4.666666666666667e-06, + "loss": 2.2823, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 4.711111111111111e-06, + "loss": 2.3245, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 4.755555555555556e-06, + "loss": 2.3692, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 4.800000000000001e-06, + "loss": 2.3607, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 4.8444444444444446e-06, + "loss": 2.3551, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 4.888888888888889e-06, + "loss": 2.4453, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 4.933333333333334e-06, + "loss": 2.351, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 4.977777777777778e-06, + "loss": 2.2924, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 5.022222222222223e-06, + "loss": 2.3471, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 5.0666666666666676e-06, + "loss": 2.3138, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 5.1111111111111115e-06, + "loss": 2.355, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 5.155555555555556e-06, + "loss": 2.4289, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 5.2e-06, + "loss": 2.3339, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 5.244444444444445e-06, + "loss": 2.3107, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 5.288888888888889e-06, + "loss": 2.2676, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 5.333333333333334e-06, + "loss": 2.3388, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 5.3777777777777784e-06, + "loss": 2.3541, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 5.422222222222223e-06, + "loss": 2.2991, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 5.466666666666667e-06, + "loss": 2.3427, + "step": 123 + }, + { + "epoch": 0.03, + "learning_rate": 5.511111111111112e-06, + "loss": 2.3871, + "step": 124 + }, + { + "epoch": 0.03, + "learning_rate": 5.555555555555557e-06, + "loss": 2.3833, + "step": 125 + }, + { + "epoch": 0.03, + "learning_rate": 5.600000000000001e-06, + "loss": 2.3598, + "step": 126 + }, + { + "epoch": 0.03, + "learning_rate": 5.6444444444444445e-06, + "loss": 2.3622, + "step": 127 + }, + { + "epoch": 0.03, + "learning_rate": 5.688888888888889e-06, + "loss": 2.3487, + "step": 128 + }, + { + "epoch": 0.03, + "learning_rate": 5.733333333333334e-06, + "loss": 2.3378, + "step": 129 + }, + { + "epoch": 0.03, + "learning_rate": 5.777777777777778e-06, + "loss": 2.4014, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 5.822222222222223e-06, + "loss": 2.3588, + "step": 131 + }, + { + "epoch": 0.03, + "learning_rate": 5.8666666666666675e-06, + "loss": 2.3098, + "step": 132 + }, + { + "epoch": 0.03, + "learning_rate": 5.911111111111112e-06, + "loss": 2.3438, + "step": 133 + }, + { + "epoch": 0.03, + "learning_rate": 5.955555555555555e-06, + "loss": 2.3602, + "step": 134 + }, + { + "epoch": 0.03, + "learning_rate": 6e-06, + "loss": 2.3273, + "step": 135 + }, + { + "epoch": 0.03, + "learning_rate": 6.044444444444445e-06, + "loss": 2.2099, + "step": 136 + }, + { + "epoch": 0.03, + "learning_rate": 6.08888888888889e-06, + "loss": 2.3072, + "step": 137 + }, + { + "epoch": 0.03, + "learning_rate": 6.133333333333334e-06, + "loss": 2.2971, + "step": 138 + }, + { + "epoch": 0.03, + "learning_rate": 6.177777777777778e-06, + "loss": 2.2635, + "step": 139 + }, + { + "epoch": 0.03, + "learning_rate": 6.222222222222223e-06, + "loss": 2.2499, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 6.266666666666668e-06, + "loss": 2.392, + "step": 141 + }, + { + "epoch": 0.03, + "learning_rate": 6.311111111111111e-06, + "loss": 2.361, + "step": 142 + }, + { + "epoch": 0.03, + "learning_rate": 6.355555555555556e-06, + "loss": 2.3518, + "step": 143 + }, + { + "epoch": 0.03, + "learning_rate": 6.4000000000000006e-06, + "loss": 2.3061, + "step": 144 + }, + { + "epoch": 0.03, + "learning_rate": 6.444444444444445e-06, + "loss": 2.3045, + "step": 145 + }, + { + "epoch": 0.03, + "learning_rate": 6.488888888888889e-06, + "loss": 2.2516, + "step": 146 + }, + { + "epoch": 0.03, + "learning_rate": 6.533333333333334e-06, + "loss": 2.2343, + "step": 147 + }, + { + "epoch": 0.03, + "learning_rate": 6.577777777777779e-06, + "loss": 2.3422, + "step": 148 + }, + { + "epoch": 0.03, + "learning_rate": 6.6222222222222236e-06, + "loss": 2.3534, + "step": 149 + }, + { + "epoch": 0.03, + "learning_rate": 6.666666666666667e-06, + "loss": 2.3005, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 6.711111111111111e-06, + "loss": 2.3148, + "step": 151 + }, + { + "epoch": 0.03, + "learning_rate": 6.755555555555556e-06, + "loss": 2.2966, + "step": 152 + }, + { + "epoch": 0.03, + "learning_rate": 6.800000000000001e-06, + "loss": 2.3215, + "step": 153 + }, + { + "epoch": 0.03, + "learning_rate": 6.844444444444445e-06, + "loss": 2.3737, + "step": 154 + }, + { + "epoch": 0.03, + "learning_rate": 6.88888888888889e-06, + "loss": 2.4356, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 6.9333333333333344e-06, + "loss": 2.2767, + "step": 156 + }, + { + "epoch": 0.03, + "learning_rate": 6.977777777777779e-06, + "loss": 2.3723, + "step": 157 + }, + { + "epoch": 0.03, + "learning_rate": 7.022222222222222e-06, + "loss": 2.21, + "step": 158 + }, + { + "epoch": 0.03, + "learning_rate": 7.066666666666667e-06, + "loss": 2.3073, + "step": 159 + }, + { + "epoch": 0.03, + "learning_rate": 7.111111111111112e-06, + "loss": 2.2555, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 7.155555555555556e-06, + "loss": 2.2868, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 7.2000000000000005e-06, + "loss": 2.3065, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 7.244444444444445e-06, + "loss": 2.2769, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 7.28888888888889e-06, + "loss": 2.2627, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 7.333333333333333e-06, + "loss": 2.3454, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 7.377777777777778e-06, + "loss": 2.3309, + "step": 166 + }, + { + "epoch": 0.03, + "learning_rate": 7.422222222222223e-06, + "loss": 2.3452, + "step": 167 + }, + { + "epoch": 0.03, + "learning_rate": 7.4666666666666675e-06, + "loss": 2.3356, + "step": 168 + }, + { + "epoch": 0.03, + "learning_rate": 7.511111111111111e-06, + "loss": 2.3396, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 7.555555555555556e-06, + "loss": 2.3131, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 7.600000000000001e-06, + "loss": 2.2454, + "step": 171 + }, + { + "epoch": 0.03, + "learning_rate": 7.644444444444445e-06, + "loss": 2.2748, + "step": 172 + }, + { + "epoch": 0.04, + "learning_rate": 7.68888888888889e-06, + "loss": 2.2758, + "step": 173 + }, + { + "epoch": 0.04, + "learning_rate": 7.733333333333334e-06, + "loss": 2.2792, + "step": 174 + }, + { + "epoch": 0.04, + "learning_rate": 7.77777777777778e-06, + "loss": 2.2847, + "step": 175 + }, + { + "epoch": 0.04, + "learning_rate": 7.822222222222224e-06, + "loss": 2.3566, + "step": 176 + }, + { + "epoch": 0.04, + "learning_rate": 7.866666666666667e-06, + "loss": 2.2576, + "step": 177 + }, + { + "epoch": 0.04, + "learning_rate": 7.911111111111112e-06, + "loss": 2.3183, + "step": 178 + }, + { + "epoch": 0.04, + "learning_rate": 7.955555555555557e-06, + "loss": 2.3016, + "step": 179 + }, + { + "epoch": 0.04, + "learning_rate": 8.000000000000001e-06, + "loss": 2.3268, + "step": 180 + }, + { + "epoch": 0.04, + "learning_rate": 8.044444444444444e-06, + "loss": 2.3519, + "step": 181 + }, + { + "epoch": 0.04, + "learning_rate": 8.08888888888889e-06, + "loss": 2.3268, + "step": 182 + }, + { + "epoch": 0.04, + "learning_rate": 8.133333333333334e-06, + "loss": 2.285, + "step": 183 + }, + { + "epoch": 0.04, + "learning_rate": 8.177777777777779e-06, + "loss": 2.2955, + "step": 184 + }, + { + "epoch": 0.04, + "learning_rate": 8.222222222222222e-06, + "loss": 2.3034, + "step": 185 + }, + { + "epoch": 0.04, + "learning_rate": 8.266666666666667e-06, + "loss": 2.3217, + "step": 186 + }, + { + "epoch": 0.04, + "learning_rate": 8.311111111111111e-06, + "loss": 2.2932, + "step": 187 + }, + { + "epoch": 0.04, + "learning_rate": 8.355555555555556e-06, + "loss": 2.2486, + "step": 188 + }, + { + "epoch": 0.04, + "learning_rate": 8.400000000000001e-06, + "loss": 2.2597, + "step": 189 + }, + { + "epoch": 0.04, + "learning_rate": 8.444444444444446e-06, + "loss": 2.3797, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 8.48888888888889e-06, + "loss": 2.3087, + "step": 191 + }, + { + "epoch": 0.04, + "learning_rate": 8.533333333333335e-06, + "loss": 2.2966, + "step": 192 + }, + { + "epoch": 0.04, + "learning_rate": 8.577777777777778e-06, + "loss": 2.2333, + "step": 193 + }, + { + "epoch": 0.04, + "learning_rate": 8.622222222222223e-06, + "loss": 2.2859, + "step": 194 + }, + { + "epoch": 0.04, + "learning_rate": 8.666666666666668e-06, + "loss": 2.3147, + "step": 195 + }, + { + "epoch": 0.04, + "learning_rate": 8.711111111111111e-06, + "loss": 2.2746, + "step": 196 + }, + { + "epoch": 0.04, + "learning_rate": 8.755555555555556e-06, + "loss": 2.365, + "step": 197 + }, + { + "epoch": 0.04, + "learning_rate": 8.8e-06, + "loss": 2.2635, + "step": 198 + }, + { + "epoch": 0.04, + "learning_rate": 8.844444444444445e-06, + "loss": 2.3151, + "step": 199 + }, + { + "epoch": 0.04, + "learning_rate": 8.888888888888888e-06, + "loss": 2.2947, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 8.933333333333333e-06, + "loss": 2.3216, + "step": 201 + }, + { + "epoch": 0.04, + "learning_rate": 8.977777777777778e-06, + "loss": 2.2109, + "step": 202 + }, + { + "epoch": 0.04, + "learning_rate": 9.022222222222223e-06, + "loss": 2.3098, + "step": 203 + }, + { + "epoch": 0.04, + "learning_rate": 9.066666666666667e-06, + "loss": 2.3722, + "step": 204 + }, + { + "epoch": 0.04, + "learning_rate": 9.111111111111112e-06, + "loss": 2.363, + "step": 205 + }, + { + "epoch": 0.04, + "learning_rate": 9.155555555555557e-06, + "loss": 2.3435, + "step": 206 + }, + { + "epoch": 0.04, + "learning_rate": 9.200000000000002e-06, + "loss": 2.2546, + "step": 207 + }, + { + "epoch": 0.04, + "learning_rate": 9.244444444444445e-06, + "loss": 2.2715, + "step": 208 + }, + { + "epoch": 0.04, + "learning_rate": 9.28888888888889e-06, + "loss": 2.3358, + "step": 209 + }, + { + "epoch": 0.04, + "learning_rate": 9.333333333333334e-06, + "loss": 2.3181, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 9.377777777777779e-06, + "loss": 2.348, + "step": 211 + }, + { + "epoch": 0.04, + "learning_rate": 9.422222222222222e-06, + "loss": 2.2654, + "step": 212 + }, + { + "epoch": 0.04, + "learning_rate": 9.466666666666667e-06, + "loss": 2.2877, + "step": 213 + }, + { + "epoch": 0.04, + "learning_rate": 9.511111111111112e-06, + "loss": 2.3036, + "step": 214 + }, + { + "epoch": 0.04, + "learning_rate": 9.555555555555556e-06, + "loss": 2.308, + "step": 215 + }, + { + "epoch": 0.04, + "learning_rate": 9.600000000000001e-06, + "loss": 2.2552, + "step": 216 + }, + { + "epoch": 0.04, + "learning_rate": 9.644444444444444e-06, + "loss": 2.2423, + "step": 217 + }, + { + "epoch": 0.04, + "learning_rate": 9.688888888888889e-06, + "loss": 2.2643, + "step": 218 + }, + { + "epoch": 0.04, + "learning_rate": 9.733333333333334e-06, + "loss": 2.3047, + "step": 219 + }, + { + "epoch": 0.04, + "learning_rate": 9.777777777777779e-06, + "loss": 2.2961, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 9.822222222222223e-06, + "loss": 2.3513, + "step": 221 + }, + { + "epoch": 0.05, + "learning_rate": 9.866666666666668e-06, + "loss": 2.3006, + "step": 222 + }, + { + "epoch": 0.05, + "learning_rate": 9.911111111111113e-06, + "loss": 2.3253, + "step": 223 + }, + { + "epoch": 0.05, + "learning_rate": 9.955555555555556e-06, + "loss": 2.2636, + "step": 224 + }, + { + "epoch": 0.05, + "learning_rate": 1e-05, + "loss": 2.2784, + "step": 225 + }, + { + "epoch": 0.05, + "learning_rate": 1.0044444444444446e-05, + "loss": 2.2225, + "step": 226 + }, + { + "epoch": 0.05, + "learning_rate": 1.008888888888889e-05, + "loss": 2.3477, + "step": 227 + }, + { + "epoch": 0.05, + "learning_rate": 1.0133333333333335e-05, + "loss": 2.161, + "step": 228 + }, + { + "epoch": 0.05, + "learning_rate": 1.0177777777777778e-05, + "loss": 2.3363, + "step": 229 + }, + { + "epoch": 0.05, + "learning_rate": 1.0222222222222223e-05, + "loss": 2.283, + "step": 230 + }, + { + "epoch": 0.05, + "learning_rate": 1.0266666666666668e-05, + "loss": 2.2389, + "step": 231 + }, + { + "epoch": 0.05, + "learning_rate": 1.0311111111111113e-05, + "loss": 2.2745, + "step": 232 + }, + { + "epoch": 0.05, + "learning_rate": 1.0355555555555557e-05, + "loss": 2.2835, + "step": 233 + }, + { + "epoch": 0.05, + "learning_rate": 1.04e-05, + "loss": 2.2165, + "step": 234 + }, + { + "epoch": 0.05, + "learning_rate": 1.0444444444444445e-05, + "loss": 2.345, + "step": 235 + }, + { + "epoch": 0.05, + "learning_rate": 1.048888888888889e-05, + "loss": 2.295, + "step": 236 + }, + { + "epoch": 0.05, + "learning_rate": 1.0533333333333333e-05, + "loss": 2.3146, + "step": 237 + }, + { + "epoch": 0.05, + "learning_rate": 1.0577777777777778e-05, + "loss": 2.2874, + "step": 238 + }, + { + "epoch": 0.05, + "learning_rate": 1.0622222222222223e-05, + "loss": 2.3309, + "step": 239 + }, + { + "epoch": 0.05, + "learning_rate": 1.0666666666666667e-05, + "loss": 2.2414, + "step": 240 + }, + { + "epoch": 0.05, + "learning_rate": 1.0711111111111112e-05, + "loss": 2.226, + "step": 241 + }, + { + "epoch": 0.05, + "learning_rate": 1.0755555555555557e-05, + "loss": 2.2894, + "step": 242 + }, + { + "epoch": 0.05, + "learning_rate": 1.0800000000000002e-05, + "loss": 2.2193, + "step": 243 + }, + { + "epoch": 0.05, + "learning_rate": 1.0844444444444446e-05, + "loss": 2.2964, + "step": 244 + }, + { + "epoch": 0.05, + "learning_rate": 1.088888888888889e-05, + "loss": 2.2472, + "step": 245 + }, + { + "epoch": 0.05, + "learning_rate": 1.0933333333333334e-05, + "loss": 2.3213, + "step": 246 + }, + { + "epoch": 0.05, + "learning_rate": 1.0977777777777779e-05, + "loss": 2.2658, + "step": 247 + }, + { + "epoch": 0.05, + "learning_rate": 1.1022222222222224e-05, + "loss": 2.3517, + "step": 248 + }, + { + "epoch": 0.05, + "learning_rate": 1.1066666666666669e-05, + "loss": 2.2687, + "step": 249 + }, + { + "epoch": 0.05, + "learning_rate": 1.1111111111111113e-05, + "loss": 2.2633, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 1.1155555555555556e-05, + "loss": 2.291, + "step": 251 + }, + { + "epoch": 0.05, + "learning_rate": 1.1200000000000001e-05, + "loss": 2.3223, + "step": 252 + }, + { + "epoch": 0.05, + "learning_rate": 1.1244444444444444e-05, + "loss": 2.2822, + "step": 253 + }, + { + "epoch": 0.05, + "learning_rate": 1.1288888888888889e-05, + "loss": 2.3195, + "step": 254 + }, + { + "epoch": 0.05, + "learning_rate": 1.1333333333333334e-05, + "loss": 2.3378, + "step": 255 + }, + { + "epoch": 0.05, + "learning_rate": 1.1377777777777779e-05, + "loss": 2.3096, + "step": 256 + }, + { + "epoch": 0.05, + "learning_rate": 1.1422222222222223e-05, + "loss": 2.2773, + "step": 257 + }, + { + "epoch": 0.05, + "learning_rate": 1.1466666666666668e-05, + "loss": 2.2341, + "step": 258 + }, + { + "epoch": 0.05, + "learning_rate": 1.1511111111111113e-05, + "loss": 2.2244, + "step": 259 + }, + { + "epoch": 0.05, + "learning_rate": 1.1555555555555556e-05, + "loss": 2.3304, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 1.16e-05, + "loss": 2.3489, + "step": 261 + }, + { + "epoch": 0.05, + "learning_rate": 1.1644444444444446e-05, + "loss": 2.2439, + "step": 262 + }, + { + "epoch": 0.05, + "learning_rate": 1.168888888888889e-05, + "loss": 2.2736, + "step": 263 + }, + { + "epoch": 0.05, + "learning_rate": 1.1733333333333335e-05, + "loss": 2.3522, + "step": 264 + }, + { + "epoch": 0.05, + "learning_rate": 1.177777777777778e-05, + "loss": 2.3146, + "step": 265 + }, + { + "epoch": 0.05, + "learning_rate": 1.1822222222222225e-05, + "loss": 2.2632, + "step": 266 + }, + { + "epoch": 0.05, + "learning_rate": 1.186666666666667e-05, + "loss": 2.2624, + "step": 267 + }, + { + "epoch": 0.05, + "learning_rate": 1.191111111111111e-05, + "loss": 2.2736, + "step": 268 + }, + { + "epoch": 0.05, + "learning_rate": 1.1955555555555556e-05, + "loss": 2.2702, + "step": 269 + }, + { + "epoch": 0.05, + "learning_rate": 1.2e-05, + "loss": 2.281, + "step": 270 + }, + { + "epoch": 0.06, + "learning_rate": 1.2044444444444445e-05, + "loss": 2.1915, + "step": 271 + }, + { + "epoch": 0.06, + "learning_rate": 1.208888888888889e-05, + "loss": 2.3112, + "step": 272 + }, + { + "epoch": 0.06, + "learning_rate": 1.2133333333333335e-05, + "loss": 2.2431, + "step": 273 + }, + { + "epoch": 0.06, + "learning_rate": 1.217777777777778e-05, + "loss": 2.3157, + "step": 274 + }, + { + "epoch": 0.06, + "learning_rate": 1.2222222222222224e-05, + "loss": 2.3411, + "step": 275 + }, + { + "epoch": 0.06, + "learning_rate": 1.2266666666666667e-05, + "loss": 2.3427, + "step": 276 + }, + { + "epoch": 0.06, + "learning_rate": 1.2311111111111112e-05, + "loss": 2.2928, + "step": 277 + }, + { + "epoch": 0.06, + "learning_rate": 1.2355555555555557e-05, + "loss": 2.2816, + "step": 278 + }, + { + "epoch": 0.06, + "learning_rate": 1.2400000000000002e-05, + "loss": 2.1884, + "step": 279 + }, + { + "epoch": 0.06, + "learning_rate": 1.2444444444444446e-05, + "loss": 2.3104, + "step": 280 + }, + { + "epoch": 0.06, + "learning_rate": 1.2488888888888891e-05, + "loss": 2.2724, + "step": 281 + }, + { + "epoch": 0.06, + "learning_rate": 1.2533333333333336e-05, + "loss": 2.2366, + "step": 282 + }, + { + "epoch": 0.06, + "learning_rate": 1.257777777777778e-05, + "loss": 2.2804, + "step": 283 + }, + { + "epoch": 0.06, + "learning_rate": 1.2622222222222222e-05, + "loss": 2.2889, + "step": 284 + }, + { + "epoch": 0.06, + "learning_rate": 1.2666666666666667e-05, + "loss": 2.2733, + "step": 285 + }, + { + "epoch": 0.06, + "learning_rate": 1.2711111111111112e-05, + "loss": 2.2486, + "step": 286 + }, + { + "epoch": 0.06, + "learning_rate": 1.2755555555555556e-05, + "loss": 2.2404, + "step": 287 + }, + { + "epoch": 0.06, + "learning_rate": 1.2800000000000001e-05, + "loss": 2.2808, + "step": 288 + }, + { + "epoch": 0.06, + "learning_rate": 1.2844444444444446e-05, + "loss": 2.2866, + "step": 289 + }, + { + "epoch": 0.06, + "learning_rate": 1.288888888888889e-05, + "loss": 2.2782, + "step": 290 + }, + { + "epoch": 0.06, + "learning_rate": 1.2933333333333334e-05, + "loss": 2.2684, + "step": 291 + }, + { + "epoch": 0.06, + "learning_rate": 1.2977777777777779e-05, + "loss": 2.2419, + "step": 292 + }, + { + "epoch": 0.06, + "learning_rate": 1.3022222222222223e-05, + "loss": 2.2703, + "step": 293 + }, + { + "epoch": 0.06, + "learning_rate": 1.3066666666666668e-05, + "loss": 2.2271, + "step": 294 + }, + { + "epoch": 0.06, + "learning_rate": 1.3111111111111113e-05, + "loss": 2.2386, + "step": 295 + }, + { + "epoch": 0.06, + "learning_rate": 1.3155555555555558e-05, + "loss": 2.2071, + "step": 296 + }, + { + "epoch": 0.06, + "learning_rate": 1.3200000000000002e-05, + "loss": 2.2721, + "step": 297 + }, + { + "epoch": 0.06, + "learning_rate": 1.3244444444444447e-05, + "loss": 2.322, + "step": 298 + }, + { + "epoch": 0.06, + "learning_rate": 1.3288888888888889e-05, + "loss": 2.3625, + "step": 299 + }, + { + "epoch": 0.06, + "learning_rate": 1.3333333333333333e-05, + "loss": 2.2301, + "step": 300 + }, + { + "epoch": 0.06, + "learning_rate": 1.3377777777777778e-05, + "loss": 2.2856, + "step": 301 + }, + { + "epoch": 0.06, + "learning_rate": 1.3422222222222223e-05, + "loss": 2.297, + "step": 302 + }, + { + "epoch": 0.06, + "learning_rate": 1.3466666666666668e-05, + "loss": 2.3471, + "step": 303 + }, + { + "epoch": 0.06, + "learning_rate": 1.3511111111111112e-05, + "loss": 2.2344, + "step": 304 + }, + { + "epoch": 0.06, + "learning_rate": 1.3555555555555557e-05, + "loss": 2.2555, + "step": 305 + }, + { + "epoch": 0.06, + "learning_rate": 1.3600000000000002e-05, + "loss": 2.2384, + "step": 306 + }, + { + "epoch": 0.06, + "learning_rate": 1.3644444444444445e-05, + "loss": 2.3158, + "step": 307 + }, + { + "epoch": 0.06, + "learning_rate": 1.368888888888889e-05, + "loss": 2.2425, + "step": 308 + }, + { + "epoch": 0.06, + "learning_rate": 1.3733333333333335e-05, + "loss": 2.2616, + "step": 309 + }, + { + "epoch": 0.06, + "learning_rate": 1.377777777777778e-05, + "loss": 2.3088, + "step": 310 + }, + { + "epoch": 0.06, + "learning_rate": 1.3822222222222224e-05, + "loss": 2.273, + "step": 311 + }, + { + "epoch": 0.06, + "learning_rate": 1.3866666666666669e-05, + "loss": 2.2236, + "step": 312 + }, + { + "epoch": 0.06, + "learning_rate": 1.3911111111111114e-05, + "loss": 2.2751, + "step": 313 + }, + { + "epoch": 0.06, + "learning_rate": 1.3955555555555558e-05, + "loss": 2.2538, + "step": 314 + }, + { + "epoch": 0.06, + "learning_rate": 1.4e-05, + "loss": 2.288, + "step": 315 + }, + { + "epoch": 0.06, + "learning_rate": 1.4044444444444445e-05, + "loss": 2.2277, + "step": 316 + }, + { + "epoch": 0.06, + "learning_rate": 1.408888888888889e-05, + "loss": 2.2954, + "step": 317 + }, + { + "epoch": 0.06, + "learning_rate": 1.4133333333333334e-05, + "loss": 2.2792, + "step": 318 + }, + { + "epoch": 0.06, + "learning_rate": 1.4177777777777779e-05, + "loss": 2.2249, + "step": 319 + }, + { + "epoch": 0.06, + "learning_rate": 1.4222222222222224e-05, + "loss": 2.2958, + "step": 320 + }, + { + "epoch": 0.07, + "learning_rate": 1.4266666666666668e-05, + "loss": 2.2689, + "step": 321 + }, + { + "epoch": 0.07, + "learning_rate": 1.4311111111111111e-05, + "loss": 2.2898, + "step": 322 + }, + { + "epoch": 0.07, + "learning_rate": 1.4355555555555556e-05, + "loss": 2.2728, + "step": 323 + }, + { + "epoch": 0.07, + "learning_rate": 1.4400000000000001e-05, + "loss": 2.2758, + "step": 324 + }, + { + "epoch": 0.07, + "learning_rate": 1.4444444444444446e-05, + "loss": 2.1893, + "step": 325 + }, + { + "epoch": 0.07, + "learning_rate": 1.448888888888889e-05, + "loss": 2.3245, + "step": 326 + }, + { + "epoch": 0.07, + "learning_rate": 1.4533333333333335e-05, + "loss": 2.2772, + "step": 327 + }, + { + "epoch": 0.07, + "learning_rate": 1.457777777777778e-05, + "loss": 2.2813, + "step": 328 + }, + { + "epoch": 0.07, + "learning_rate": 1.4622222222222225e-05, + "loss": 2.2867, + "step": 329 + }, + { + "epoch": 0.07, + "learning_rate": 1.4666666666666666e-05, + "loss": 2.2052, + "step": 330 + }, + { + "epoch": 0.07, + "learning_rate": 1.4711111111111111e-05, + "loss": 2.2782, + "step": 331 + }, + { + "epoch": 0.07, + "learning_rate": 1.4755555555555556e-05, + "loss": 2.2591, + "step": 332 + }, + { + "epoch": 0.07, + "learning_rate": 1.48e-05, + "loss": 2.2637, + "step": 333 + }, + { + "epoch": 0.07, + "learning_rate": 1.4844444444444445e-05, + "loss": 2.3195, + "step": 334 + }, + { + "epoch": 0.07, + "learning_rate": 1.488888888888889e-05, + "loss": 2.1918, + "step": 335 + }, + { + "epoch": 0.07, + "learning_rate": 1.4933333333333335e-05, + "loss": 2.2769, + "step": 336 + }, + { + "epoch": 0.07, + "learning_rate": 1.497777777777778e-05, + "loss": 2.188, + "step": 337 + }, + { + "epoch": 0.07, + "learning_rate": 1.5022222222222223e-05, + "loss": 2.2737, + "step": 338 + }, + { + "epoch": 0.07, + "learning_rate": 1.5066666666666668e-05, + "loss": 2.293, + "step": 339 + }, + { + "epoch": 0.07, + "learning_rate": 1.5111111111111112e-05, + "loss": 2.2043, + "step": 340 + }, + { + "epoch": 0.07, + "learning_rate": 1.5155555555555557e-05, + "loss": 2.2421, + "step": 341 + }, + { + "epoch": 0.07, + "learning_rate": 1.5200000000000002e-05, + "loss": 2.3279, + "step": 342 + }, + { + "epoch": 0.07, + "learning_rate": 1.5244444444444447e-05, + "loss": 2.1843, + "step": 343 + }, + { + "epoch": 0.07, + "learning_rate": 1.528888888888889e-05, + "loss": 2.3248, + "step": 344 + }, + { + "epoch": 0.07, + "learning_rate": 1.5333333333333334e-05, + "loss": 2.2223, + "step": 345 + }, + { + "epoch": 0.07, + "learning_rate": 1.537777777777778e-05, + "loss": 2.1891, + "step": 346 + }, + { + "epoch": 0.07, + "learning_rate": 1.5422222222222224e-05, + "loss": 2.3165, + "step": 347 + }, + { + "epoch": 0.07, + "learning_rate": 1.546666666666667e-05, + "loss": 2.2562, + "step": 348 + }, + { + "epoch": 0.07, + "learning_rate": 1.5511111111111114e-05, + "loss": 2.2469, + "step": 349 + }, + { + "epoch": 0.07, + "learning_rate": 1.555555555555556e-05, + "loss": 2.1994, + "step": 350 + }, + { + "epoch": 0.07, + "learning_rate": 1.5600000000000003e-05, + "loss": 2.2611, + "step": 351 + }, + { + "epoch": 0.07, + "learning_rate": 1.5644444444444448e-05, + "loss": 2.2356, + "step": 352 + }, + { + "epoch": 0.07, + "learning_rate": 1.5688888888888893e-05, + "loss": 2.2247, + "step": 353 + }, + { + "epoch": 0.07, + "learning_rate": 1.5733333333333334e-05, + "loss": 2.2717, + "step": 354 + }, + { + "epoch": 0.07, + "learning_rate": 1.577777777777778e-05, + "loss": 2.1981, + "step": 355 + }, + { + "epoch": 0.07, + "learning_rate": 1.5822222222222224e-05, + "loss": 2.3089, + "step": 356 + }, + { + "epoch": 0.07, + "learning_rate": 1.586666666666667e-05, + "loss": 2.1798, + "step": 357 + }, + { + "epoch": 0.07, + "learning_rate": 1.5911111111111113e-05, + "loss": 2.2363, + "step": 358 + }, + { + "epoch": 0.07, + "learning_rate": 1.5955555555555558e-05, + "loss": 2.2579, + "step": 359 + }, + { + "epoch": 0.07, + "learning_rate": 1.6000000000000003e-05, + "loss": 2.2976, + "step": 360 + }, + { + "epoch": 0.07, + "learning_rate": 1.6044444444444444e-05, + "loss": 2.289, + "step": 361 + }, + { + "epoch": 0.07, + "learning_rate": 1.608888888888889e-05, + "loss": 2.1725, + "step": 362 + }, + { + "epoch": 0.07, + "learning_rate": 1.6133333333333334e-05, + "loss": 2.2695, + "step": 363 + }, + { + "epoch": 0.07, + "learning_rate": 1.617777777777778e-05, + "loss": 2.2762, + "step": 364 + }, + { + "epoch": 0.07, + "learning_rate": 1.6222222222222223e-05, + "loss": 2.2613, + "step": 365 + }, + { + "epoch": 0.07, + "learning_rate": 1.6266666666666668e-05, + "loss": 2.2945, + "step": 366 + }, + { + "epoch": 0.07, + "learning_rate": 1.6311111111111113e-05, + "loss": 2.2495, + "step": 367 + }, + { + "epoch": 0.07, + "learning_rate": 1.6355555555555557e-05, + "loss": 2.2834, + "step": 368 + }, + { + "epoch": 0.07, + "learning_rate": 1.64e-05, + "loss": 2.2623, + "step": 369 + }, + { + "epoch": 0.08, + "learning_rate": 1.6444444444444444e-05, + "loss": 2.3004, + "step": 370 + }, + { + "epoch": 0.08, + "learning_rate": 1.648888888888889e-05, + "loss": 2.2807, + "step": 371 + }, + { + "epoch": 0.08, + "learning_rate": 1.6533333333333333e-05, + "loss": 2.148, + "step": 372 + }, + { + "epoch": 0.08, + "learning_rate": 1.6577777777777778e-05, + "loss": 2.2127, + "step": 373 + }, + { + "epoch": 0.08, + "learning_rate": 1.6622222222222223e-05, + "loss": 2.186, + "step": 374 + }, + { + "epoch": 0.08, + "learning_rate": 1.6666666666666667e-05, + "loss": 2.192, + "step": 375 + }, + { + "epoch": 0.08, + "learning_rate": 1.6711111111111112e-05, + "loss": 2.2806, + "step": 376 + }, + { + "epoch": 0.08, + "learning_rate": 1.6755555555555557e-05, + "loss": 2.2432, + "step": 377 + }, + { + "epoch": 0.08, + "learning_rate": 1.6800000000000002e-05, + "loss": 2.3456, + "step": 378 + }, + { + "epoch": 0.08, + "learning_rate": 1.6844444444444447e-05, + "loss": 2.1574, + "step": 379 + }, + { + "epoch": 0.08, + "learning_rate": 1.688888888888889e-05, + "loss": 2.2422, + "step": 380 + }, + { + "epoch": 0.08, + "learning_rate": 1.6933333333333336e-05, + "loss": 2.1977, + "step": 381 + }, + { + "epoch": 0.08, + "learning_rate": 1.697777777777778e-05, + "loss": 2.2756, + "step": 382 + }, + { + "epoch": 0.08, + "learning_rate": 1.7022222222222226e-05, + "loss": 2.2504, + "step": 383 + }, + { + "epoch": 0.08, + "learning_rate": 1.706666666666667e-05, + "loss": 2.1757, + "step": 384 + }, + { + "epoch": 0.08, + "learning_rate": 1.7111111111111112e-05, + "loss": 2.183, + "step": 385 + }, + { + "epoch": 0.08, + "learning_rate": 1.7155555555555557e-05, + "loss": 2.278, + "step": 386 + }, + { + "epoch": 0.08, + "learning_rate": 1.72e-05, + "loss": 2.2446, + "step": 387 + }, + { + "epoch": 0.08, + "learning_rate": 1.7244444444444446e-05, + "loss": 2.1889, + "step": 388 + }, + { + "epoch": 0.08, + "learning_rate": 1.728888888888889e-05, + "loss": 2.1945, + "step": 389 + }, + { + "epoch": 0.08, + "learning_rate": 1.7333333333333336e-05, + "loss": 2.2273, + "step": 390 + }, + { + "epoch": 0.08, + "learning_rate": 1.737777777777778e-05, + "loss": 2.3213, + "step": 391 + }, + { + "epoch": 0.08, + "learning_rate": 1.7422222222222222e-05, + "loss": 2.2001, + "step": 392 + }, + { + "epoch": 0.08, + "learning_rate": 1.7466666666666667e-05, + "loss": 2.289, + "step": 393 + }, + { + "epoch": 0.08, + "learning_rate": 1.751111111111111e-05, + "loss": 2.2416, + "step": 394 + }, + { + "epoch": 0.08, + "learning_rate": 1.7555555555555556e-05, + "loss": 2.2664, + "step": 395 + }, + { + "epoch": 0.08, + "learning_rate": 1.76e-05, + "loss": 2.2502, + "step": 396 + }, + { + "epoch": 0.08, + "learning_rate": 1.7644444444444446e-05, + "loss": 2.1794, + "step": 397 + }, + { + "epoch": 0.08, + "learning_rate": 1.768888888888889e-05, + "loss": 2.2681, + "step": 398 + }, + { + "epoch": 0.08, + "learning_rate": 1.7733333333333335e-05, + "loss": 2.2515, + "step": 399 + }, + { + "epoch": 0.08, + "learning_rate": 1.7777777777777777e-05, + "loss": 2.2114, + "step": 400 + }, + { + "epoch": 0.08, + "learning_rate": 1.782222222222222e-05, + "loss": 2.3267, + "step": 401 + }, + { + "epoch": 0.08, + "learning_rate": 1.7866666666666666e-05, + "loss": 2.2699, + "step": 402 + }, + { + "epoch": 0.08, + "learning_rate": 1.791111111111111e-05, + "loss": 2.2843, + "step": 403 + }, + { + "epoch": 0.08, + "learning_rate": 1.7955555555555556e-05, + "loss": 2.3071, + "step": 404 + }, + { + "epoch": 0.08, + "learning_rate": 1.8e-05, + "loss": 2.1794, + "step": 405 + }, + { + "epoch": 0.08, + "learning_rate": 1.8044444444444445e-05, + "loss": 2.2548, + "step": 406 + }, + { + "epoch": 0.08, + "learning_rate": 1.808888888888889e-05, + "loss": 2.1928, + "step": 407 + }, + { + "epoch": 0.08, + "learning_rate": 1.8133333333333335e-05, + "loss": 2.1618, + "step": 408 + }, + { + "epoch": 0.08, + "learning_rate": 1.817777777777778e-05, + "loss": 2.2159, + "step": 409 + }, + { + "epoch": 0.08, + "learning_rate": 1.8222222222222224e-05, + "loss": 2.2898, + "step": 410 + }, + { + "epoch": 0.08, + "learning_rate": 1.826666666666667e-05, + "loss": 2.2999, + "step": 411 + }, + { + "epoch": 0.08, + "learning_rate": 1.8311111111111114e-05, + "loss": 2.236, + "step": 412 + }, + { + "epoch": 0.08, + "learning_rate": 1.835555555555556e-05, + "loss": 2.1895, + "step": 413 + }, + { + "epoch": 0.08, + "learning_rate": 1.8400000000000003e-05, + "loss": 2.2692, + "step": 414 + }, + { + "epoch": 0.08, + "learning_rate": 1.8444444444444448e-05, + "loss": 2.168, + "step": 415 + }, + { + "epoch": 0.08, + "learning_rate": 1.848888888888889e-05, + "loss": 2.223, + "step": 416 + }, + { + "epoch": 0.08, + "learning_rate": 1.8533333333333334e-05, + "loss": 2.2726, + "step": 417 + }, + { + "epoch": 0.08, + "learning_rate": 1.857777777777778e-05, + "loss": 2.2643, + "step": 418 + }, + { + "epoch": 0.09, + "learning_rate": 1.8622222222222224e-05, + "loss": 2.315, + "step": 419 + }, + { + "epoch": 0.09, + "learning_rate": 1.866666666666667e-05, + "loss": 2.3022, + "step": 420 + }, + { + "epoch": 0.09, + "learning_rate": 1.8711111111111113e-05, + "loss": 2.233, + "step": 421 + }, + { + "epoch": 0.09, + "learning_rate": 1.8755555555555558e-05, + "loss": 2.2109, + "step": 422 + }, + { + "epoch": 0.09, + "learning_rate": 1.88e-05, + "loss": 2.2722, + "step": 423 + }, + { + "epoch": 0.09, + "learning_rate": 1.8844444444444444e-05, + "loss": 2.1968, + "step": 424 + }, + { + "epoch": 0.09, + "learning_rate": 1.888888888888889e-05, + "loss": 2.215, + "step": 425 + }, + { + "epoch": 0.09, + "learning_rate": 1.8933333333333334e-05, + "loss": 2.1681, + "step": 426 + }, + { + "epoch": 0.09, + "learning_rate": 1.897777777777778e-05, + "loss": 2.2078, + "step": 427 + }, + { + "epoch": 0.09, + "learning_rate": 1.9022222222222223e-05, + "loss": 2.2237, + "step": 428 + }, + { + "epoch": 0.09, + "learning_rate": 1.9066666666666668e-05, + "loss": 2.2933, + "step": 429 + }, + { + "epoch": 0.09, + "learning_rate": 1.9111111111111113e-05, + "loss": 2.2677, + "step": 430 + }, + { + "epoch": 0.09, + "learning_rate": 1.9155555555555558e-05, + "loss": 2.2134, + "step": 431 + }, + { + "epoch": 0.09, + "learning_rate": 1.9200000000000003e-05, + "loss": 2.2304, + "step": 432 + }, + { + "epoch": 0.09, + "learning_rate": 1.9244444444444444e-05, + "loss": 2.2146, + "step": 433 + }, + { + "epoch": 0.09, + "learning_rate": 1.928888888888889e-05, + "loss": 2.1302, + "step": 434 + }, + { + "epoch": 0.09, + "learning_rate": 1.9333333333333333e-05, + "loss": 2.2149, + "step": 435 + }, + { + "epoch": 0.09, + "learning_rate": 1.9377777777777778e-05, + "loss": 2.2246, + "step": 436 + }, + { + "epoch": 0.09, + "learning_rate": 1.9422222222222223e-05, + "loss": 2.142, + "step": 437 + }, + { + "epoch": 0.09, + "learning_rate": 1.9466666666666668e-05, + "loss": 2.2521, + "step": 438 + }, + { + "epoch": 0.09, + "learning_rate": 1.9511111111111113e-05, + "loss": 2.2227, + "step": 439 + }, + { + "epoch": 0.09, + "learning_rate": 1.9555555555555557e-05, + "loss": 2.1902, + "step": 440 + }, + { + "epoch": 0.09, + "learning_rate": 1.9600000000000002e-05, + "loss": 2.199, + "step": 441 + }, + { + "epoch": 0.09, + "learning_rate": 1.9644444444444447e-05, + "loss": 2.2279, + "step": 442 + }, + { + "epoch": 0.09, + "learning_rate": 1.968888888888889e-05, + "loss": 2.2465, + "step": 443 + }, + { + "epoch": 0.09, + "learning_rate": 1.9733333333333336e-05, + "loss": 2.25, + "step": 444 + }, + { + "epoch": 0.09, + "learning_rate": 1.977777777777778e-05, + "loss": 2.1882, + "step": 445 + }, + { + "epoch": 0.09, + "learning_rate": 1.9822222222222226e-05, + "loss": 2.2283, + "step": 446 + }, + { + "epoch": 0.09, + "learning_rate": 1.9866666666666667e-05, + "loss": 2.2139, + "step": 447 + }, + { + "epoch": 0.09, + "learning_rate": 1.9911111111111112e-05, + "loss": 2.2699, + "step": 448 + }, + { + "epoch": 0.09, + "learning_rate": 1.9955555555555557e-05, + "loss": 2.2683, + "step": 449 + }, + { + "epoch": 0.09, + "learning_rate": 2e-05, + "loss": 2.162, + "step": 450 + }, + { + "epoch": 0.09, + "learning_rate": 1.999999985081549e-05, + "loss": 2.2555, + "step": 451 + }, + { + "epoch": 0.09, + "learning_rate": 1.999999940326196e-05, + "loss": 2.226, + "step": 452 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999998657339427e-05, + "loss": 2.2241, + "step": 453 + }, + { + "epoch": 0.09, + "learning_rate": 1.999999761304791e-05, + "loss": 2.1683, + "step": 454 + }, + { + "epoch": 0.09, + "learning_rate": 1.999999627038744e-05, + "loss": 2.2076, + "step": 455 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999994629358062e-05, + "loss": 2.2572, + "step": 456 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999992689959818e-05, + "loss": 2.2557, + "step": 457 + }, + { + "epoch": 0.09, + "learning_rate": 1.999999045219277e-05, + "loss": 2.2576, + "step": 458 + }, + { + "epoch": 0.09, + "learning_rate": 1.999998791605699e-05, + "loss": 2.2076, + "step": 459 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999985081552543e-05, + "loss": 2.2312, + "step": 460 + }, + { + "epoch": 0.09, + "learning_rate": 1.999998194867952e-05, + "loss": 2.2406, + "step": 461 + }, + { + "epoch": 0.09, + "learning_rate": 1.999997851743801e-05, + "loss": 2.275, + "step": 462 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999974787828123e-05, + "loss": 2.2354, + "step": 463 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999970759849963e-05, + "loss": 2.193, + "step": 464 + }, + { + "epoch": 0.09, + "learning_rate": 1.999996643350365e-05, + "loss": 2.1726, + "step": 465 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999961808789323e-05, + "loss": 2.2347, + "step": 466 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999956885707107e-05, + "loss": 2.2465, + "step": 467 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999951664257157e-05, + "loss": 2.2175, + "step": 468 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999946144439626e-05, + "loss": 2.2105, + "step": 469 + }, + { + "epoch": 0.1, + "learning_rate": 1.999994032625468e-05, + "loss": 2.2836, + "step": 470 + }, + { + "epoch": 0.1, + "learning_rate": 1.999993420970249e-05, + "loss": 2.2842, + "step": 471 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999927794783244e-05, + "loss": 2.1839, + "step": 472 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999921081497128e-05, + "loss": 2.2287, + "step": 473 + }, + { + "epoch": 0.1, + "learning_rate": 1.999991406984434e-05, + "loss": 2.2095, + "step": 474 + }, + { + "epoch": 0.1, + "learning_rate": 1.99999067598251e-05, + "loss": 2.2182, + "step": 475 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999899151439613e-05, + "loss": 2.2114, + "step": 476 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999891244688117e-05, + "loss": 2.1972, + "step": 477 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999883039570843e-05, + "loss": 2.3088, + "step": 478 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999874536088037e-05, + "loss": 2.2275, + "step": 479 + }, + { + "epoch": 0.1, + "learning_rate": 1.999986573423995e-05, + "loss": 2.2504, + "step": 480 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999856634026846e-05, + "loss": 2.2269, + "step": 481 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999847235448996e-05, + "loss": 2.2805, + "step": 482 + }, + { + "epoch": 0.1, + "learning_rate": 1.999983753850668e-05, + "loss": 2.2457, + "step": 483 + }, + { + "epoch": 0.1, + "learning_rate": 1.999982754320019e-05, + "loss": 2.1873, + "step": 484 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999817249529826e-05, + "loss": 2.2113, + "step": 485 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999806657495892e-05, + "loss": 2.2936, + "step": 486 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999795767098703e-05, + "loss": 2.1877, + "step": 487 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999784578338586e-05, + "loss": 2.207, + "step": 488 + }, + { + "epoch": 0.1, + "learning_rate": 1.999977309121587e-05, + "loss": 2.2493, + "step": 489 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999761305730906e-05, + "loss": 2.2057, + "step": 490 + }, + { + "epoch": 0.1, + "learning_rate": 1.999974922188404e-05, + "loss": 2.2224, + "step": 491 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999736839675635e-05, + "loss": 2.2557, + "step": 492 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999724159106056e-05, + "loss": 2.2338, + "step": 493 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999711180175686e-05, + "loss": 2.1483, + "step": 494 + }, + { + "epoch": 0.1, + "learning_rate": 1.999969790288491e-05, + "loss": 2.2977, + "step": 495 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999684327234127e-05, + "loss": 2.2978, + "step": 496 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999670453223737e-05, + "loss": 2.236, + "step": 497 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999656280854164e-05, + "loss": 2.2114, + "step": 498 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999641810125817e-05, + "loss": 2.1677, + "step": 499 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999627041039137e-05, + "loss": 2.1462, + "step": 500 + }, + { + "epoch": 0.1, + "learning_rate": 1.999961197359456e-05, + "loss": 2.2035, + "step": 501 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999596607792543e-05, + "loss": 2.253, + "step": 502 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999580943633537e-05, + "loss": 2.1986, + "step": 503 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999564981118008e-05, + "loss": 2.2803, + "step": 504 + }, + { + "epoch": 0.1, + "learning_rate": 1.999954872024644e-05, + "loss": 2.268, + "step": 505 + }, + { + "epoch": 0.1, + "learning_rate": 1.999953216101931e-05, + "loss": 2.1761, + "step": 506 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999515303437124e-05, + "loss": 2.1871, + "step": 507 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999498147500374e-05, + "loss": 2.2231, + "step": 508 + }, + { + "epoch": 0.1, + "learning_rate": 1.999948069320957e-05, + "loss": 2.293, + "step": 509 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999462940565242e-05, + "loss": 2.2838, + "step": 510 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999444889567917e-05, + "loss": 2.264, + "step": 511 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999426540218133e-05, + "loss": 2.2951, + "step": 512 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999407892516434e-05, + "loss": 2.2651, + "step": 513 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999388946463378e-05, + "loss": 2.2288, + "step": 514 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999369702059534e-05, + "loss": 2.181, + "step": 515 + }, + { + "epoch": 0.1, + "learning_rate": 1.999935015930547e-05, + "loss": 2.2066, + "step": 516 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999330318201776e-05, + "loss": 2.1466, + "step": 517 + }, + { + "epoch": 0.11, + "learning_rate": 1.999931017874904e-05, + "loss": 2.2116, + "step": 518 + }, + { + "epoch": 0.11, + "learning_rate": 1.999928974094786e-05, + "loss": 2.1422, + "step": 519 + }, + { + "epoch": 0.11, + "learning_rate": 1.9999269004798854e-05, + "loss": 2.2345, + "step": 520 + }, + { + "epoch": 0.11, + "learning_rate": 1.9999247970302633e-05, + "loss": 2.1677, + "step": 521 + }, + { + "epoch": 0.11, + "learning_rate": 1.999922663745983e-05, + "loss": 2.1646, + "step": 522 + }, + { + "epoch": 0.11, + "learning_rate": 1.999920500627108e-05, + "loss": 2.2046, + "step": 523 + }, + { + "epoch": 0.11, + "learning_rate": 1.9999183076737024e-05, + "loss": 2.3291, + "step": 524 + }, + { + "epoch": 0.11, + "learning_rate": 1.999916084885832e-05, + "loss": 2.2484, + "step": 525 + }, + { + "epoch": 0.11, + "learning_rate": 1.9999138322635633e-05, + "loss": 2.2731, + "step": 526 + }, + { + "epoch": 0.11, + "learning_rate": 1.999911549806963e-05, + "loss": 2.251, + "step": 527 + }, + { + "epoch": 0.11, + "learning_rate": 1.9999092375161e-05, + "loss": 2.2721, + "step": 528 + }, + { + "epoch": 0.11, + "learning_rate": 1.9999068953910425e-05, + "loss": 2.2102, + "step": 529 + }, + { + "epoch": 0.11, + "learning_rate": 1.999904523431861e-05, + "loss": 2.2366, + "step": 530 + }, + { + "epoch": 0.11, + "learning_rate": 1.9999021216386256e-05, + "loss": 2.2113, + "step": 531 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998996900114085e-05, + "loss": 2.2136, + "step": 532 + }, + { + "epoch": 0.11, + "learning_rate": 1.999897228550282e-05, + "loss": 2.2534, + "step": 533 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998947372553196e-05, + "loss": 2.2636, + "step": 534 + }, + { + "epoch": 0.11, + "learning_rate": 1.999892216126596e-05, + "loss": 2.281, + "step": 535 + }, + { + "epoch": 0.11, + "learning_rate": 1.999889665164186e-05, + "loss": 2.2387, + "step": 536 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998870843681652e-05, + "loss": 2.195, + "step": 537 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998844737386114e-05, + "loss": 2.2299, + "step": 538 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998818332756028e-05, + "loss": 2.1775, + "step": 539 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998791629792172e-05, + "loss": 2.2436, + "step": 540 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998764628495347e-05, + "loss": 2.2626, + "step": 541 + }, + { + "epoch": 0.11, + "learning_rate": 1.999873732886636e-05, + "loss": 2.297, + "step": 542 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998709730906025e-05, + "loss": 2.187, + "step": 543 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998681834615167e-05, + "loss": 2.2465, + "step": 544 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998653639994615e-05, + "loss": 2.1838, + "step": 545 + }, + { + "epoch": 0.11, + "learning_rate": 1.999862514704521e-05, + "loss": 2.1917, + "step": 546 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998596355767805e-05, + "loss": 2.2436, + "step": 547 + }, + { + "epoch": 0.11, + "learning_rate": 1.999856726616326e-05, + "loss": 2.2102, + "step": 548 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998537878232436e-05, + "loss": 2.1306, + "step": 549 + }, + { + "epoch": 0.11, + "learning_rate": 1.999850819197622e-05, + "loss": 2.2293, + "step": 550 + }, + { + "epoch": 0.11, + "learning_rate": 1.999847820739549e-05, + "loss": 2.2128, + "step": 551 + }, + { + "epoch": 0.11, + "learning_rate": 1.999844792449114e-05, + "loss": 2.1947, + "step": 552 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998417343264078e-05, + "loss": 2.172, + "step": 553 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998386463715217e-05, + "loss": 2.1386, + "step": 554 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998355285845473e-05, + "loss": 2.1511, + "step": 555 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998323809655783e-05, + "loss": 2.196, + "step": 556 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998292035147083e-05, + "loss": 2.2871, + "step": 557 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998259962320317e-05, + "loss": 2.2191, + "step": 558 + }, + { + "epoch": 0.11, + "learning_rate": 1.999822759117645e-05, + "loss": 2.2087, + "step": 559 + }, + { + "epoch": 0.11, + "learning_rate": 1.999819492171644e-05, + "loss": 2.1886, + "step": 560 + }, + { + "epoch": 0.11, + "learning_rate": 1.999816195394127e-05, + "loss": 2.1977, + "step": 561 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998128687851913e-05, + "loss": 2.2919, + "step": 562 + }, + { + "epoch": 0.11, + "learning_rate": 1.999809512344937e-05, + "loss": 2.2445, + "step": 563 + }, + { + "epoch": 0.11, + "learning_rate": 1.999806126073464e-05, + "loss": 2.2538, + "step": 564 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998027099708735e-05, + "loss": 2.1779, + "step": 565 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997992640372672e-05, + "loss": 2.2214, + "step": 566 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997957882727478e-05, + "loss": 2.2657, + "step": 567 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997922826774193e-05, + "loss": 2.202, + "step": 568 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997887472513862e-05, + "loss": 2.1828, + "step": 569 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997851819947537e-05, + "loss": 2.2483, + "step": 570 + }, + { + "epoch": 0.12, + "learning_rate": 1.999781586907629e-05, + "loss": 2.2985, + "step": 571 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997779619901184e-05, + "loss": 2.277, + "step": 572 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997743072423303e-05, + "loss": 2.1662, + "step": 573 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997706226643743e-05, + "loss": 2.2616, + "step": 574 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997669082563597e-05, + "loss": 2.3006, + "step": 575 + }, + { + "epoch": 0.12, + "learning_rate": 1.999763164018398e-05, + "loss": 2.2188, + "step": 576 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997593899506002e-05, + "loss": 2.1691, + "step": 577 + }, + { + "epoch": 0.12, + "learning_rate": 1.999755586053079e-05, + "loss": 2.2342, + "step": 578 + }, + { + "epoch": 0.12, + "learning_rate": 1.999751752325948e-05, + "loss": 2.2272, + "step": 579 + }, + { + "epoch": 0.12, + "learning_rate": 1.999747888769322e-05, + "loss": 2.1802, + "step": 580 + }, + { + "epoch": 0.12, + "learning_rate": 1.999743995383316e-05, + "loss": 2.0882, + "step": 581 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997400721680463e-05, + "loss": 2.1861, + "step": 582 + }, + { + "epoch": 0.12, + "learning_rate": 1.999736119123629e-05, + "loss": 2.0787, + "step": 583 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997321362501838e-05, + "loss": 2.2141, + "step": 584 + }, + { + "epoch": 0.12, + "learning_rate": 1.999728123547828e-05, + "loss": 2.2485, + "step": 585 + }, + { + "epoch": 0.12, + "learning_rate": 1.999724081016682e-05, + "loss": 2.1822, + "step": 586 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997200086568663e-05, + "loss": 2.3281, + "step": 587 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997159064685023e-05, + "loss": 2.2037, + "step": 588 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997117744517128e-05, + "loss": 2.2942, + "step": 589 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997076126066207e-05, + "loss": 2.2242, + "step": 590 + }, + { + "epoch": 0.12, + "learning_rate": 1.9997034209333498e-05, + "loss": 2.132, + "step": 591 + }, + { + "epoch": 0.12, + "learning_rate": 1.999699199432026e-05, + "loss": 2.2085, + "step": 592 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996949481027753e-05, + "loss": 2.225, + "step": 593 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996906669457238e-05, + "loss": 2.2488, + "step": 594 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996863559609993e-05, + "loss": 2.2073, + "step": 595 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996820151487312e-05, + "loss": 2.2118, + "step": 596 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996776445090483e-05, + "loss": 2.1802, + "step": 597 + }, + { + "epoch": 0.12, + "learning_rate": 1.999673244042081e-05, + "loss": 2.2284, + "step": 598 + }, + { + "epoch": 0.12, + "learning_rate": 1.999668813747961e-05, + "loss": 2.1914, + "step": 599 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996643536268202e-05, + "loss": 2.288, + "step": 600 + }, + { + "epoch": 0.12, + "learning_rate": 1.999659863678792e-05, + "loss": 2.2218, + "step": 601 + }, + { + "epoch": 0.12, + "learning_rate": 1.99965534390401e-05, + "loss": 2.2533, + "step": 602 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996507943026097e-05, + "loss": 2.2751, + "step": 603 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996462148747256e-05, + "loss": 2.1961, + "step": 604 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996416056204953e-05, + "loss": 2.1968, + "step": 605 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996369665400564e-05, + "loss": 2.1593, + "step": 606 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996322976335468e-05, + "loss": 2.2281, + "step": 607 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996275989011063e-05, + "loss": 2.2673, + "step": 608 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996228703428747e-05, + "loss": 2.1461, + "step": 609 + }, + { + "epoch": 0.12, + "learning_rate": 1.999618111958993e-05, + "loss": 2.2817, + "step": 610 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996133237496034e-05, + "loss": 2.2187, + "step": 611 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996085057148492e-05, + "loss": 2.2201, + "step": 612 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996036578548736e-05, + "loss": 2.1558, + "step": 613 + }, + { + "epoch": 0.12, + "learning_rate": 1.999598780169821e-05, + "loss": 2.2433, + "step": 614 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995938726598374e-05, + "loss": 2.2215, + "step": 615 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995889353250692e-05, + "loss": 2.2093, + "step": 616 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995839681656638e-05, + "loss": 2.2902, + "step": 617 + }, + { + "epoch": 0.13, + "learning_rate": 1.999578971181769e-05, + "loss": 2.1843, + "step": 618 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995739443735342e-05, + "loss": 2.157, + "step": 619 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995688877411092e-05, + "loss": 2.1998, + "step": 620 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995638012846453e-05, + "loss": 2.2324, + "step": 621 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995586850042937e-05, + "loss": 2.2812, + "step": 622 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995535389002074e-05, + "loss": 2.2695, + "step": 623 + }, + { + "epoch": 0.13, + "learning_rate": 1.99954836297254e-05, + "loss": 2.2326, + "step": 624 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995431572214456e-05, + "loss": 2.2192, + "step": 625 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995379216470797e-05, + "loss": 2.2104, + "step": 626 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995326562495985e-05, + "loss": 2.2212, + "step": 627 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995273610291593e-05, + "loss": 2.2781, + "step": 628 + }, + { + "epoch": 0.13, + "learning_rate": 1.99952203598592e-05, + "loss": 2.1727, + "step": 629 + }, + { + "epoch": 0.13, + "learning_rate": 1.999516681120039e-05, + "loss": 2.1923, + "step": 630 + }, + { + "epoch": 0.13, + "learning_rate": 1.999511296431677e-05, + "loss": 2.1556, + "step": 631 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995058819209937e-05, + "loss": 2.2177, + "step": 632 + }, + { + "epoch": 0.13, + "learning_rate": 1.9995004375881514e-05, + "loss": 2.1878, + "step": 633 + }, + { + "epoch": 0.13, + "learning_rate": 1.999494963433312e-05, + "loss": 2.2302, + "step": 634 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994894594566393e-05, + "loss": 2.1557, + "step": 635 + }, + { + "epoch": 0.13, + "learning_rate": 1.999483925658297e-05, + "loss": 2.2024, + "step": 636 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994783620384502e-05, + "loss": 2.1393, + "step": 637 + }, + { + "epoch": 0.13, + "learning_rate": 1.999472768597266e-05, + "loss": 2.2081, + "step": 638 + }, + { + "epoch": 0.13, + "learning_rate": 1.99946714533491e-05, + "loss": 2.2549, + "step": 639 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994614922515504e-05, + "loss": 2.1924, + "step": 640 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994558093473564e-05, + "loss": 2.1787, + "step": 641 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994500966224967e-05, + "loss": 2.212, + "step": 642 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994443540771425e-05, + "loss": 2.2167, + "step": 643 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994385817114644e-05, + "loss": 2.2212, + "step": 644 + }, + { + "epoch": 0.13, + "learning_rate": 1.999432779525635e-05, + "loss": 2.1917, + "step": 645 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994269475198278e-05, + "loss": 2.2979, + "step": 646 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994210856942162e-05, + "loss": 2.188, + "step": 647 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994151940489757e-05, + "loss": 2.1817, + "step": 648 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994092725842814e-05, + "loss": 2.3047, + "step": 649 + }, + { + "epoch": 0.13, + "learning_rate": 1.9994033213003103e-05, + "loss": 2.2262, + "step": 650 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993973401972402e-05, + "loss": 2.1056, + "step": 651 + }, + { + "epoch": 0.13, + "learning_rate": 1.999391329275249e-05, + "loss": 2.1806, + "step": 652 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993852885345166e-05, + "loss": 2.2096, + "step": 653 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993792179752233e-05, + "loss": 2.221, + "step": 654 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993731175975495e-05, + "loss": 2.252, + "step": 655 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993669874016777e-05, + "loss": 2.144, + "step": 656 + }, + { + "epoch": 0.13, + "learning_rate": 1.999360827387791e-05, + "loss": 2.2066, + "step": 657 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993546375560728e-05, + "loss": 2.1693, + "step": 658 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993484179067082e-05, + "loss": 2.2561, + "step": 659 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993421684398825e-05, + "loss": 2.1713, + "step": 660 + }, + { + "epoch": 0.13, + "learning_rate": 1.999335889155782e-05, + "loss": 2.1565, + "step": 661 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993295800545943e-05, + "loss": 2.1851, + "step": 662 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993232411365076e-05, + "loss": 2.2821, + "step": 663 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993168724017108e-05, + "loss": 2.179, + "step": 664 + }, + { + "epoch": 0.13, + "learning_rate": 1.9993104738503945e-05, + "loss": 2.1701, + "step": 665 + }, + { + "epoch": 0.14, + "learning_rate": 1.9993040454827493e-05, + "loss": 2.2487, + "step": 666 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992975872989667e-05, + "loss": 2.1862, + "step": 667 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992910992992398e-05, + "loss": 2.164, + "step": 668 + }, + { + "epoch": 0.14, + "learning_rate": 1.999284581483762e-05, + "loss": 2.1075, + "step": 669 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992780338527276e-05, + "loss": 2.175, + "step": 670 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992714564063327e-05, + "loss": 2.21, + "step": 671 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992648491447727e-05, + "loss": 2.1635, + "step": 672 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992582120682452e-05, + "loss": 2.2087, + "step": 673 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992515451769478e-05, + "loss": 2.1721, + "step": 674 + }, + { + "epoch": 0.14, + "learning_rate": 1.99924484847108e-05, + "loss": 2.2595, + "step": 675 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992381219508412e-05, + "loss": 2.222, + "step": 676 + }, + { + "epoch": 0.14, + "learning_rate": 1.999231365616432e-05, + "loss": 2.1616, + "step": 677 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992245794680546e-05, + "loss": 2.2311, + "step": 678 + }, + { + "epoch": 0.14, + "learning_rate": 1.999217763505911e-05, + "loss": 2.1848, + "step": 679 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992109177302044e-05, + "loss": 2.1996, + "step": 680 + }, + { + "epoch": 0.14, + "learning_rate": 1.9992040421411394e-05, + "loss": 2.1508, + "step": 681 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991971367389213e-05, + "loss": 2.2721, + "step": 682 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991902015237557e-05, + "loss": 2.2798, + "step": 683 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991832364958496e-05, + "loss": 2.2255, + "step": 684 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991762416554113e-05, + "loss": 2.1946, + "step": 685 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991692170026487e-05, + "loss": 2.2438, + "step": 686 + }, + { + "epoch": 0.14, + "learning_rate": 1.999162162537772e-05, + "loss": 2.2645, + "step": 687 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991550782609915e-05, + "loss": 2.2055, + "step": 688 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991479641725187e-05, + "loss": 2.1617, + "step": 689 + }, + { + "epoch": 0.14, + "learning_rate": 1.999140820272566e-05, + "loss": 2.2153, + "step": 690 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991336465613457e-05, + "loss": 2.137, + "step": 691 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991264430390726e-05, + "loss": 2.2879, + "step": 692 + }, + { + "epoch": 0.14, + "learning_rate": 1.999119209705962e-05, + "loss": 2.2227, + "step": 693 + }, + { + "epoch": 0.14, + "learning_rate": 1.9991119465622283e-05, + "loss": 2.249, + "step": 694 + }, + { + "epoch": 0.14, + "learning_rate": 1.99910465360809e-05, + "loss": 2.1828, + "step": 695 + }, + { + "epoch": 0.14, + "learning_rate": 1.999097330843763e-05, + "loss": 2.2037, + "step": 696 + }, + { + "epoch": 0.14, + "learning_rate": 1.9990899782694672e-05, + "loss": 2.1266, + "step": 697 + }, + { + "epoch": 0.14, + "learning_rate": 1.9990825958854212e-05, + "loss": 2.1749, + "step": 698 + }, + { + "epoch": 0.14, + "learning_rate": 1.9990751836918453e-05, + "loss": 2.1076, + "step": 699 + }, + { + "epoch": 0.14, + "learning_rate": 1.999067741688961e-05, + "loss": 2.1567, + "step": 700 + }, + { + "epoch": 0.14, + "learning_rate": 1.99906026987699e-05, + "loss": 2.1553, + "step": 701 + }, + { + "epoch": 0.14, + "learning_rate": 1.9990527682561553e-05, + "loss": 2.1527, + "step": 702 + }, + { + "epoch": 0.14, + "learning_rate": 1.9990452368266808e-05, + "loss": 2.2139, + "step": 703 + }, + { + "epoch": 0.14, + "learning_rate": 1.999037675588791e-05, + "loss": 2.143, + "step": 704 + }, + { + "epoch": 0.14, + "learning_rate": 1.9990300845427123e-05, + "loss": 2.2332, + "step": 705 + }, + { + "epoch": 0.14, + "learning_rate": 1.9990224636886705e-05, + "loss": 2.184, + "step": 706 + }, + { + "epoch": 0.14, + "learning_rate": 1.999014813026893e-05, + "loss": 2.2208, + "step": 707 + }, + { + "epoch": 0.14, + "learning_rate": 1.9990071325576078e-05, + "loss": 2.2166, + "step": 708 + }, + { + "epoch": 0.14, + "learning_rate": 1.998999422281045e-05, + "loss": 2.0907, + "step": 709 + }, + { + "epoch": 0.14, + "learning_rate": 1.9989916821974335e-05, + "loss": 2.1705, + "step": 710 + }, + { + "epoch": 0.14, + "learning_rate": 1.998983912307005e-05, + "loss": 2.2285, + "step": 711 + }, + { + "epoch": 0.14, + "learning_rate": 1.9989761126099915e-05, + "loss": 2.2549, + "step": 712 + }, + { + "epoch": 0.14, + "learning_rate": 1.9989682831066254e-05, + "loss": 2.193, + "step": 713 + }, + { + "epoch": 0.14, + "learning_rate": 1.99896042379714e-05, + "loss": 2.23, + "step": 714 + }, + { + "epoch": 0.15, + "learning_rate": 1.99895253468177e-05, + "loss": 2.2138, + "step": 715 + }, + { + "epoch": 0.15, + "learning_rate": 1.998944615760751e-05, + "loss": 2.1929, + "step": 716 + }, + { + "epoch": 0.15, + "learning_rate": 1.9989366670343188e-05, + "loss": 2.2613, + "step": 717 + }, + { + "epoch": 0.15, + "learning_rate": 1.9989286885027112e-05, + "loss": 2.2885, + "step": 718 + }, + { + "epoch": 0.15, + "learning_rate": 1.998920680166166e-05, + "loss": 2.2373, + "step": 719 + }, + { + "epoch": 0.15, + "learning_rate": 1.998912642024922e-05, + "loss": 2.1991, + "step": 720 + }, + { + "epoch": 0.15, + "learning_rate": 1.998904574079219e-05, + "loss": 2.219, + "step": 721 + }, + { + "epoch": 0.15, + "learning_rate": 1.9988964763292982e-05, + "loss": 2.2586, + "step": 722 + }, + { + "epoch": 0.15, + "learning_rate": 1.9988883487754007e-05, + "loss": 2.2265, + "step": 723 + }, + { + "epoch": 0.15, + "learning_rate": 1.998880191417769e-05, + "loss": 2.1641, + "step": 724 + }, + { + "epoch": 0.15, + "learning_rate": 1.998872004256647e-05, + "loss": 2.2428, + "step": 725 + }, + { + "epoch": 0.15, + "learning_rate": 1.9988637872922785e-05, + "loss": 2.2432, + "step": 726 + }, + { + "epoch": 0.15, + "learning_rate": 1.9988555405249087e-05, + "loss": 2.1931, + "step": 727 + }, + { + "epoch": 0.15, + "learning_rate": 1.998847263954784e-05, + "loss": 2.1984, + "step": 728 + }, + { + "epoch": 0.15, + "learning_rate": 1.998838957582151e-05, + "loss": 2.2223, + "step": 729 + }, + { + "epoch": 0.15, + "learning_rate": 1.9988306214072574e-05, + "loss": 2.0918, + "step": 730 + }, + { + "epoch": 0.15, + "learning_rate": 1.9988222554303524e-05, + "loss": 2.1917, + "step": 731 + }, + { + "epoch": 0.15, + "learning_rate": 1.9988138596516854e-05, + "loss": 2.2193, + "step": 732 + }, + { + "epoch": 0.15, + "learning_rate": 1.9988054340715066e-05, + "loss": 2.1583, + "step": 733 + }, + { + "epoch": 0.15, + "learning_rate": 1.998796978690068e-05, + "loss": 2.1145, + "step": 734 + }, + { + "epoch": 0.15, + "learning_rate": 1.9987884935076213e-05, + "loss": 2.2412, + "step": 735 + }, + { + "epoch": 0.15, + "learning_rate": 1.99877997852442e-05, + "loss": 2.149, + "step": 736 + }, + { + "epoch": 0.15, + "learning_rate": 1.9987714337407186e-05, + "loss": 2.1731, + "step": 737 + }, + { + "epoch": 0.15, + "learning_rate": 1.998762859156771e-05, + "loss": 2.2315, + "step": 738 + }, + { + "epoch": 0.15, + "learning_rate": 1.998754254772834e-05, + "loss": 2.2193, + "step": 739 + }, + { + "epoch": 0.15, + "learning_rate": 1.9987456205891633e-05, + "loss": 2.1641, + "step": 740 + }, + { + "epoch": 0.15, + "learning_rate": 1.998736956606018e-05, + "loss": 2.2337, + "step": 741 + }, + { + "epoch": 0.15, + "learning_rate": 1.998728262823655e-05, + "loss": 2.2466, + "step": 742 + }, + { + "epoch": 0.15, + "learning_rate": 1.9987195392423348e-05, + "loss": 2.2219, + "step": 743 + }, + { + "epoch": 0.15, + "learning_rate": 1.9987107858623173e-05, + "loss": 2.1934, + "step": 744 + }, + { + "epoch": 0.15, + "learning_rate": 1.9987020026838634e-05, + "loss": 2.2543, + "step": 745 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986931897072355e-05, + "loss": 2.1534, + "step": 746 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986843469326967e-05, + "loss": 2.2144, + "step": 747 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986754743605106e-05, + "loss": 2.1361, + "step": 748 + }, + { + "epoch": 0.15, + "learning_rate": 1.998666571990942e-05, + "loss": 2.1692, + "step": 749 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986576398242566e-05, + "loss": 2.2378, + "step": 750 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986486778607208e-05, + "loss": 2.1953, + "step": 751 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986396861006017e-05, + "loss": 2.2698, + "step": 752 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986306645441683e-05, + "loss": 2.2648, + "step": 753 + }, + { + "epoch": 0.15, + "learning_rate": 1.998621613191689e-05, + "loss": 2.2558, + "step": 754 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986125320434343e-05, + "loss": 2.2138, + "step": 755 + }, + { + "epoch": 0.15, + "learning_rate": 1.9986034210996755e-05, + "loss": 2.1874, + "step": 756 + }, + { + "epoch": 0.15, + "learning_rate": 1.9985942803606835e-05, + "loss": 2.1592, + "step": 757 + }, + { + "epoch": 0.15, + "learning_rate": 1.998585109826732e-05, + "loss": 2.1949, + "step": 758 + }, + { + "epoch": 0.15, + "learning_rate": 1.9985759094980937e-05, + "loss": 2.1724, + "step": 759 + }, + { + "epoch": 0.15, + "learning_rate": 1.998566679375044e-05, + "loss": 2.223, + "step": 760 + }, + { + "epoch": 0.15, + "learning_rate": 1.998557419457858e-05, + "loss": 2.2019, + "step": 761 + }, + { + "epoch": 0.15, + "learning_rate": 1.9985481297468113e-05, + "loss": 2.159, + "step": 762 + }, + { + "epoch": 0.15, + "learning_rate": 1.998538810242182e-05, + "loss": 2.1904, + "step": 763 + }, + { + "epoch": 0.16, + "learning_rate": 1.998529460944248e-05, + "loss": 2.2048, + "step": 764 + }, + { + "epoch": 0.16, + "learning_rate": 1.9985200818532873e-05, + "loss": 2.1648, + "step": 765 + }, + { + "epoch": 0.16, + "learning_rate": 1.9985106729695813e-05, + "loss": 2.2579, + "step": 766 + }, + { + "epoch": 0.16, + "learning_rate": 1.9985012342934098e-05, + "loss": 2.2214, + "step": 767 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984917658250542e-05, + "loss": 2.1776, + "step": 768 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984822675647976e-05, + "loss": 2.2217, + "step": 769 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984727395129234e-05, + "loss": 2.2037, + "step": 770 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984631816697157e-05, + "loss": 2.2947, + "step": 771 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984535940354593e-05, + "loss": 2.2102, + "step": 772 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984439766104405e-05, + "loss": 2.2295, + "step": 773 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984343293949468e-05, + "loss": 2.1363, + "step": 774 + }, + { + "epoch": 0.16, + "learning_rate": 1.998424652389265e-05, + "loss": 2.2877, + "step": 775 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984149455936847e-05, + "loss": 2.0879, + "step": 776 + }, + { + "epoch": 0.16, + "learning_rate": 1.9984052090084955e-05, + "loss": 2.2243, + "step": 777 + }, + { + "epoch": 0.16, + "learning_rate": 1.9983954426339876e-05, + "loss": 2.1601, + "step": 778 + }, + { + "epoch": 0.16, + "learning_rate": 1.9983856464704525e-05, + "loss": 2.199, + "step": 779 + }, + { + "epoch": 0.16, + "learning_rate": 1.9983758205181824e-05, + "loss": 2.2093, + "step": 780 + }, + { + "epoch": 0.16, + "learning_rate": 1.9983659647774702e-05, + "loss": 2.175, + "step": 781 + }, + { + "epoch": 0.16, + "learning_rate": 1.9983560792486106e-05, + "loss": 2.2324, + "step": 782 + }, + { + "epoch": 0.16, + "learning_rate": 1.998346163931898e-05, + "loss": 2.1956, + "step": 783 + }, + { + "epoch": 0.16, + "learning_rate": 1.9983362188276286e-05, + "loss": 2.1718, + "step": 784 + }, + { + "epoch": 0.16, + "learning_rate": 1.9983262439360992e-05, + "loss": 2.2854, + "step": 785 + }, + { + "epoch": 0.16, + "learning_rate": 1.998316239257607e-05, + "loss": 2.2228, + "step": 786 + }, + { + "epoch": 0.16, + "learning_rate": 1.998306204792451e-05, + "loss": 2.1573, + "step": 787 + }, + { + "epoch": 0.16, + "learning_rate": 1.99829614054093e-05, + "loss": 2.2379, + "step": 788 + }, + { + "epoch": 0.16, + "learning_rate": 1.998286046503345e-05, + "loss": 2.2476, + "step": 789 + }, + { + "epoch": 0.16, + "learning_rate": 1.9982759226799965e-05, + "loss": 2.1208, + "step": 790 + }, + { + "epoch": 0.16, + "learning_rate": 1.9982657690711868e-05, + "loss": 2.2179, + "step": 791 + }, + { + "epoch": 0.16, + "learning_rate": 1.998255585677219e-05, + "loss": 2.1536, + "step": 792 + }, + { + "epoch": 0.16, + "learning_rate": 1.998245372498397e-05, + "loss": 2.2423, + "step": 793 + }, + { + "epoch": 0.16, + "learning_rate": 1.9982351295350254e-05, + "loss": 2.1299, + "step": 794 + }, + { + "epoch": 0.16, + "learning_rate": 1.9982248567874098e-05, + "loss": 2.1308, + "step": 795 + }, + { + "epoch": 0.16, + "learning_rate": 1.9982145542558565e-05, + "loss": 2.1995, + "step": 796 + }, + { + "epoch": 0.16, + "learning_rate": 1.9982042219406735e-05, + "loss": 2.2616, + "step": 797 + }, + { + "epoch": 0.16, + "learning_rate": 1.9981938598421686e-05, + "loss": 2.2251, + "step": 798 + }, + { + "epoch": 0.16, + "learning_rate": 1.998183467960651e-05, + "loss": 2.1847, + "step": 799 + }, + { + "epoch": 0.16, + "learning_rate": 1.9981730462964303e-05, + "loss": 2.1562, + "step": 800 + }, + { + "epoch": 0.16, + "learning_rate": 1.9981625948498187e-05, + "loss": 2.2199, + "step": 801 + }, + { + "epoch": 0.16, + "learning_rate": 1.998152113621127e-05, + "loss": 2.1929, + "step": 802 + }, + { + "epoch": 0.16, + "learning_rate": 1.9981416026106682e-05, + "loss": 2.2474, + "step": 803 + }, + { + "epoch": 0.16, + "learning_rate": 1.998131061818756e-05, + "loss": 2.1985, + "step": 804 + }, + { + "epoch": 0.16, + "learning_rate": 1.998120491245705e-05, + "loss": 2.1703, + "step": 805 + }, + { + "epoch": 0.16, + "learning_rate": 1.9981098908918305e-05, + "loss": 2.1308, + "step": 806 + }, + { + "epoch": 0.16, + "learning_rate": 1.9980992607574485e-05, + "loss": 2.1765, + "step": 807 + }, + { + "epoch": 0.16, + "learning_rate": 1.9980886008428766e-05, + "loss": 2.1875, + "step": 808 + }, + { + "epoch": 0.16, + "learning_rate": 1.9980779111484324e-05, + "loss": 2.2295, + "step": 809 + }, + { + "epoch": 0.16, + "learning_rate": 1.9980671916744356e-05, + "loss": 2.1748, + "step": 810 + }, + { + "epoch": 0.16, + "learning_rate": 1.998056442421205e-05, + "loss": 2.2185, + "step": 811 + }, + { + "epoch": 0.16, + "learning_rate": 1.998045663389062e-05, + "loss": 2.1959, + "step": 812 + }, + { + "epoch": 0.17, + "learning_rate": 1.9980348545783285e-05, + "loss": 2.1957, + "step": 813 + }, + { + "epoch": 0.17, + "learning_rate": 1.998024015989326e-05, + "loss": 2.1875, + "step": 814 + }, + { + "epoch": 0.17, + "learning_rate": 1.998013147622379e-05, + "loss": 2.2362, + "step": 815 + }, + { + "epoch": 0.17, + "learning_rate": 1.998002249477811e-05, + "loss": 2.2589, + "step": 816 + }, + { + "epoch": 0.17, + "learning_rate": 1.9979913215559477e-05, + "loss": 2.225, + "step": 817 + }, + { + "epoch": 0.17, + "learning_rate": 1.9979803638571145e-05, + "loss": 2.2478, + "step": 818 + }, + { + "epoch": 0.17, + "learning_rate": 1.9979693763816387e-05, + "loss": 2.1288, + "step": 819 + }, + { + "epoch": 0.17, + "learning_rate": 1.9979583591298482e-05, + "loss": 2.1767, + "step": 820 + }, + { + "epoch": 0.17, + "learning_rate": 1.997947312102072e-05, + "loss": 2.2204, + "step": 821 + }, + { + "epoch": 0.17, + "learning_rate": 1.9979362352986395e-05, + "loss": 2.214, + "step": 822 + }, + { + "epoch": 0.17, + "learning_rate": 1.9979251287198806e-05, + "loss": 2.2228, + "step": 823 + }, + { + "epoch": 0.17, + "learning_rate": 1.9979139923661273e-05, + "loss": 2.1414, + "step": 824 + }, + { + "epoch": 0.17, + "learning_rate": 1.997902826237712e-05, + "loss": 2.1974, + "step": 825 + }, + { + "epoch": 0.17, + "learning_rate": 1.997891630334967e-05, + "loss": 2.2843, + "step": 826 + }, + { + "epoch": 0.17, + "learning_rate": 1.9978804046582276e-05, + "loss": 2.1502, + "step": 827 + }, + { + "epoch": 0.17, + "learning_rate": 1.9978691492078276e-05, + "loss": 2.1552, + "step": 828 + }, + { + "epoch": 0.17, + "learning_rate": 1.9978578639841037e-05, + "loss": 2.2404, + "step": 829 + }, + { + "epoch": 0.17, + "learning_rate": 1.9978465489873922e-05, + "loss": 2.2114, + "step": 830 + }, + { + "epoch": 0.17, + "learning_rate": 1.997835204218031e-05, + "loss": 2.2354, + "step": 831 + }, + { + "epoch": 0.17, + "learning_rate": 1.997823829676358e-05, + "loss": 2.1873, + "step": 832 + }, + { + "epoch": 0.17, + "learning_rate": 1.9978124253627126e-05, + "loss": 2.2545, + "step": 833 + }, + { + "epoch": 0.17, + "learning_rate": 1.9978009912774358e-05, + "loss": 2.1076, + "step": 834 + }, + { + "epoch": 0.17, + "learning_rate": 1.9977895274208682e-05, + "loss": 2.1974, + "step": 835 + }, + { + "epoch": 0.17, + "learning_rate": 1.997778033793352e-05, + "loss": 2.2776, + "step": 836 + }, + { + "epoch": 0.17, + "learning_rate": 1.9977665103952303e-05, + "loss": 2.205, + "step": 837 + }, + { + "epoch": 0.17, + "learning_rate": 1.997754957226847e-05, + "loss": 2.1421, + "step": 838 + }, + { + "epoch": 0.17, + "learning_rate": 1.997743374288546e-05, + "loss": 2.1658, + "step": 839 + }, + { + "epoch": 0.17, + "learning_rate": 1.9977317615806738e-05, + "loss": 2.1731, + "step": 840 + }, + { + "epoch": 0.17, + "learning_rate": 1.997720119103576e-05, + "loss": 2.2483, + "step": 841 + }, + { + "epoch": 0.17, + "learning_rate": 1.9977084468576013e-05, + "loss": 2.2583, + "step": 842 + }, + { + "epoch": 0.17, + "learning_rate": 1.9976967448430967e-05, + "loss": 2.2484, + "step": 843 + }, + { + "epoch": 0.17, + "learning_rate": 1.9976850130604117e-05, + "loss": 2.3076, + "step": 844 + }, + { + "epoch": 0.17, + "learning_rate": 1.997673251509897e-05, + "loss": 2.1383, + "step": 845 + }, + { + "epoch": 0.17, + "learning_rate": 1.9976614601919027e-05, + "loss": 2.1885, + "step": 846 + }, + { + "epoch": 0.17, + "learning_rate": 1.997649639106781e-05, + "loss": 2.1773, + "step": 847 + }, + { + "epoch": 0.17, + "learning_rate": 1.9976377882548843e-05, + "loss": 2.2199, + "step": 848 + }, + { + "epoch": 0.17, + "learning_rate": 1.9976259076365667e-05, + "loss": 2.2971, + "step": 849 + }, + { + "epoch": 0.17, + "learning_rate": 1.997613997252182e-05, + "loss": 2.2432, + "step": 850 + }, + { + "epoch": 0.17, + "learning_rate": 1.9976020571020866e-05, + "loss": 2.1466, + "step": 851 + }, + { + "epoch": 0.17, + "learning_rate": 1.9975900871866357e-05, + "loss": 2.2506, + "step": 852 + }, + { + "epoch": 0.17, + "learning_rate": 1.9975780875061872e-05, + "loss": 2.262, + "step": 853 + }, + { + "epoch": 0.17, + "learning_rate": 1.997566058061098e-05, + "loss": 2.2613, + "step": 854 + }, + { + "epoch": 0.17, + "learning_rate": 1.997553998851729e-05, + "loss": 2.2128, + "step": 855 + }, + { + "epoch": 0.17, + "learning_rate": 1.997541909878438e-05, + "loss": 2.2019, + "step": 856 + }, + { + "epoch": 0.17, + "learning_rate": 1.9975297911415867e-05, + "loss": 2.2397, + "step": 857 + }, + { + "epoch": 0.17, + "learning_rate": 1.997517642641537e-05, + "loss": 2.1789, + "step": 858 + }, + { + "epoch": 0.17, + "learning_rate": 1.9975054643786503e-05, + "loss": 2.2227, + "step": 859 + }, + { + "epoch": 0.17, + "learning_rate": 1.997493256353291e-05, + "loss": 2.2041, + "step": 860 + }, + { + "epoch": 0.17, + "learning_rate": 1.9974810185658226e-05, + "loss": 2.1233, + "step": 861 + }, + { + "epoch": 0.17, + "learning_rate": 1.9974687510166106e-05, + "loss": 2.2358, + "step": 862 + }, + { + "epoch": 0.18, + "learning_rate": 1.9974564537060205e-05, + "loss": 2.1897, + "step": 863 + }, + { + "epoch": 0.18, + "learning_rate": 1.9974441266344203e-05, + "loss": 2.18, + "step": 864 + }, + { + "epoch": 0.18, + "learning_rate": 1.997431769802177e-05, + "loss": 2.1495, + "step": 865 + }, + { + "epoch": 0.18, + "learning_rate": 1.9974193832096595e-05, + "loss": 2.1969, + "step": 866 + }, + { + "epoch": 0.18, + "learning_rate": 1.9974069668572373e-05, + "loss": 2.1409, + "step": 867 + }, + { + "epoch": 0.18, + "learning_rate": 1.997394520745281e-05, + "loss": 2.2094, + "step": 868 + }, + { + "epoch": 0.18, + "learning_rate": 1.997382044874162e-05, + "loss": 2.1757, + "step": 869 + }, + { + "epoch": 0.18, + "learning_rate": 1.997369539244252e-05, + "loss": 2.1078, + "step": 870 + }, + { + "epoch": 0.18, + "learning_rate": 1.9973570038559246e-05, + "loss": 2.1155, + "step": 871 + }, + { + "epoch": 0.18, + "learning_rate": 1.9973444387095542e-05, + "loss": 2.215, + "step": 872 + }, + { + "epoch": 0.18, + "learning_rate": 1.997331843805515e-05, + "loss": 2.2198, + "step": 873 + }, + { + "epoch": 0.18, + "learning_rate": 1.997319219144183e-05, + "loss": 2.1809, + "step": 874 + }, + { + "epoch": 0.18, + "learning_rate": 1.997306564725935e-05, + "loss": 2.1711, + "step": 875 + }, + { + "epoch": 0.18, + "learning_rate": 1.9972938805511484e-05, + "loss": 2.1722, + "step": 876 + }, + { + "epoch": 0.18, + "learning_rate": 1.9972811666202018e-05, + "loss": 2.241, + "step": 877 + }, + { + "epoch": 0.18, + "learning_rate": 1.9972684229334745e-05, + "loss": 2.2566, + "step": 878 + }, + { + "epoch": 0.18, + "learning_rate": 1.997255649491347e-05, + "loss": 2.2116, + "step": 879 + }, + { + "epoch": 0.18, + "learning_rate": 1.9972428462941997e-05, + "loss": 2.1864, + "step": 880 + }, + { + "epoch": 0.18, + "learning_rate": 1.9972300133424154e-05, + "loss": 2.1942, + "step": 881 + }, + { + "epoch": 0.18, + "learning_rate": 1.9972171506363766e-05, + "loss": 2.2047, + "step": 882 + }, + { + "epoch": 0.18, + "learning_rate": 1.997204258176467e-05, + "loss": 2.186, + "step": 883 + }, + { + "epoch": 0.18, + "learning_rate": 1.9971913359630718e-05, + "loss": 2.2354, + "step": 884 + }, + { + "epoch": 0.18, + "learning_rate": 1.9971783839965756e-05, + "loss": 2.2403, + "step": 885 + }, + { + "epoch": 0.18, + "learning_rate": 1.9971654022773658e-05, + "loss": 2.1375, + "step": 886 + }, + { + "epoch": 0.18, + "learning_rate": 1.9971523908058293e-05, + "loss": 2.1274, + "step": 887 + }, + { + "epoch": 0.18, + "learning_rate": 1.997139349582354e-05, + "loss": 2.2115, + "step": 888 + }, + { + "epoch": 0.18, + "learning_rate": 1.99712627860733e-05, + "loss": 2.0346, + "step": 889 + }, + { + "epoch": 0.18, + "learning_rate": 1.9971131778811464e-05, + "loss": 2.1983, + "step": 890 + }, + { + "epoch": 0.18, + "learning_rate": 1.9971000474041944e-05, + "loss": 2.0913, + "step": 891 + }, + { + "epoch": 0.18, + "learning_rate": 1.997086887176866e-05, + "loss": 2.2797, + "step": 892 + }, + { + "epoch": 0.18, + "learning_rate": 1.9970736971995532e-05, + "loss": 2.1853, + "step": 893 + }, + { + "epoch": 0.18, + "learning_rate": 1.9970604774726497e-05, + "loss": 2.1881, + "step": 894 + }, + { + "epoch": 0.18, + "learning_rate": 1.997047227996551e-05, + "loss": 2.114, + "step": 895 + }, + { + "epoch": 0.18, + "learning_rate": 1.9970339487716507e-05, + "loss": 2.1521, + "step": 896 + }, + { + "epoch": 0.18, + "learning_rate": 1.9970206397983465e-05, + "loss": 2.1936, + "step": 897 + }, + { + "epoch": 0.18, + "learning_rate": 1.997007301077035e-05, + "loss": 2.2108, + "step": 898 + }, + { + "epoch": 0.18, + "learning_rate": 1.996993932608114e-05, + "loss": 2.2746, + "step": 899 + }, + { + "epoch": 0.18, + "learning_rate": 1.9969805343919822e-05, + "loss": 2.1177, + "step": 900 + }, + { + "epoch": 0.18, + "learning_rate": 1.9969671064290397e-05, + "loss": 2.1316, + "step": 901 + }, + { + "epoch": 0.18, + "learning_rate": 1.9969536487196876e-05, + "loss": 2.1173, + "step": 902 + }, + { + "epoch": 0.18, + "learning_rate": 1.9969401612643262e-05, + "loss": 2.2235, + "step": 903 + }, + { + "epoch": 0.18, + "learning_rate": 1.996926644063359e-05, + "loss": 2.203, + "step": 904 + }, + { + "epoch": 0.18, + "learning_rate": 1.9969130971171887e-05, + "loss": 2.2021, + "step": 905 + }, + { + "epoch": 0.18, + "learning_rate": 1.9968995204262203e-05, + "loss": 2.1522, + "step": 906 + }, + { + "epoch": 0.18, + "learning_rate": 1.996885913990858e-05, + "loss": 2.211, + "step": 907 + }, + { + "epoch": 0.18, + "learning_rate": 1.996872277811508e-05, + "loss": 2.2718, + "step": 908 + }, + { + "epoch": 0.18, + "learning_rate": 1.9968586118885776e-05, + "loss": 2.1995, + "step": 909 + }, + { + "epoch": 0.18, + "learning_rate": 1.996844916222474e-05, + "loss": 2.1644, + "step": 910 + }, + { + "epoch": 0.18, + "learning_rate": 1.996831190813606e-05, + "loss": 2.1928, + "step": 911 + }, + { + "epoch": 0.19, + "learning_rate": 1.9968174356623834e-05, + "loss": 2.2058, + "step": 912 + }, + { + "epoch": 0.19, + "learning_rate": 1.9968036507692163e-05, + "loss": 2.1967, + "step": 913 + }, + { + "epoch": 0.19, + "learning_rate": 1.996789836134516e-05, + "loss": 2.1963, + "step": 914 + }, + { + "epoch": 0.19, + "learning_rate": 1.9967759917586953e-05, + "loss": 2.22, + "step": 915 + }, + { + "epoch": 0.19, + "learning_rate": 1.9967621176421665e-05, + "loss": 2.1619, + "step": 916 + }, + { + "epoch": 0.19, + "learning_rate": 1.9967482137853432e-05, + "loss": 2.167, + "step": 917 + }, + { + "epoch": 0.19, + "learning_rate": 1.9967342801886412e-05, + "loss": 2.2137, + "step": 918 + }, + { + "epoch": 0.19, + "learning_rate": 1.9967203168524763e-05, + "loss": 2.171, + "step": 919 + }, + { + "epoch": 0.19, + "learning_rate": 1.9967063237772644e-05, + "loss": 2.237, + "step": 920 + }, + { + "epoch": 0.19, + "learning_rate": 1.9966923009634237e-05, + "loss": 2.2313, + "step": 921 + }, + { + "epoch": 0.19, + "learning_rate": 1.996678248411372e-05, + "loss": 2.1583, + "step": 922 + }, + { + "epoch": 0.19, + "learning_rate": 1.9966641661215286e-05, + "loss": 2.1676, + "step": 923 + }, + { + "epoch": 0.19, + "learning_rate": 1.9966500540943137e-05, + "loss": 2.2826, + "step": 924 + }, + { + "epoch": 0.19, + "learning_rate": 1.9966359123301492e-05, + "loss": 2.2515, + "step": 925 + }, + { + "epoch": 0.19, + "learning_rate": 1.9966217408294558e-05, + "loss": 2.242, + "step": 926 + }, + { + "epoch": 0.19, + "learning_rate": 1.9966075395926574e-05, + "loss": 2.1837, + "step": 927 + }, + { + "epoch": 0.19, + "learning_rate": 1.996593308620177e-05, + "loss": 2.2682, + "step": 928 + }, + { + "epoch": 0.19, + "learning_rate": 1.9965790479124397e-05, + "loss": 2.1854, + "step": 929 + }, + { + "epoch": 0.19, + "learning_rate": 1.9965647574698705e-05, + "loss": 2.2051, + "step": 930 + }, + { + "epoch": 0.19, + "learning_rate": 1.996550437292896e-05, + "loss": 2.1166, + "step": 931 + }, + { + "epoch": 0.19, + "learning_rate": 1.9965360873819437e-05, + "loss": 2.2181, + "step": 932 + }, + { + "epoch": 0.19, + "learning_rate": 1.9965217077374416e-05, + "loss": 2.209, + "step": 933 + }, + { + "epoch": 0.19, + "learning_rate": 1.9965072983598185e-05, + "loss": 2.1288, + "step": 934 + }, + { + "epoch": 0.19, + "learning_rate": 1.9964928592495046e-05, + "loss": 2.0909, + "step": 935 + }, + { + "epoch": 0.19, + "learning_rate": 1.9964783904069306e-05, + "loss": 2.2472, + "step": 936 + }, + { + "epoch": 0.19, + "learning_rate": 1.9964638918325284e-05, + "loss": 2.1481, + "step": 937 + }, + { + "epoch": 0.19, + "learning_rate": 1.9964493635267303e-05, + "loss": 2.2969, + "step": 938 + }, + { + "epoch": 0.19, + "learning_rate": 1.9964348054899698e-05, + "loss": 2.209, + "step": 939 + }, + { + "epoch": 0.19, + "learning_rate": 1.9964202177226818e-05, + "loss": 2.2194, + "step": 940 + }, + { + "epoch": 0.19, + "learning_rate": 1.996405600225301e-05, + "loss": 2.2552, + "step": 941 + }, + { + "epoch": 0.19, + "learning_rate": 1.9963909529982635e-05, + "loss": 2.2494, + "step": 942 + }, + { + "epoch": 0.19, + "learning_rate": 1.9963762760420067e-05, + "loss": 2.2168, + "step": 943 + }, + { + "epoch": 0.19, + "learning_rate": 1.9963615693569684e-05, + "loss": 2.1793, + "step": 944 + }, + { + "epoch": 0.19, + "learning_rate": 1.9963468329435872e-05, + "loss": 2.15, + "step": 945 + }, + { + "epoch": 0.19, + "learning_rate": 1.996332066802303e-05, + "loss": 2.2157, + "step": 946 + }, + { + "epoch": 0.19, + "learning_rate": 1.996317270933556e-05, + "loss": 2.2038, + "step": 947 + }, + { + "epoch": 0.19, + "learning_rate": 1.9963024453377886e-05, + "loss": 2.1529, + "step": 948 + }, + { + "epoch": 0.19, + "learning_rate": 1.9962875900154422e-05, + "loss": 2.2217, + "step": 949 + }, + { + "epoch": 0.19, + "learning_rate": 1.99627270496696e-05, + "loss": 2.2397, + "step": 950 + }, + { + "epoch": 0.19, + "learning_rate": 1.996257790192787e-05, + "loss": 2.1122, + "step": 951 + }, + { + "epoch": 0.19, + "learning_rate": 1.9962428456933672e-05, + "loss": 2.1748, + "step": 952 + }, + { + "epoch": 0.19, + "learning_rate": 1.9962278714691473e-05, + "loss": 2.1433, + "step": 953 + }, + { + "epoch": 0.19, + "learning_rate": 1.9962128675205737e-05, + "loss": 2.1683, + "step": 954 + }, + { + "epoch": 0.19, + "learning_rate": 1.996197833848094e-05, + "loss": 2.2777, + "step": 955 + }, + { + "epoch": 0.19, + "learning_rate": 1.9961827704521575e-05, + "loss": 2.1109, + "step": 956 + }, + { + "epoch": 0.19, + "learning_rate": 1.9961676773332126e-05, + "loss": 2.1469, + "step": 957 + }, + { + "epoch": 0.19, + "learning_rate": 1.99615255449171e-05, + "loss": 2.2159, + "step": 958 + }, + { + "epoch": 0.19, + "learning_rate": 1.996137401928101e-05, + "loss": 2.1955, + "step": 959 + }, + { + "epoch": 0.19, + "learning_rate": 1.996122219642838e-05, + "loss": 2.2115, + "step": 960 + }, + { + "epoch": 0.2, + "learning_rate": 1.9961070076363733e-05, + "loss": 2.2111, + "step": 961 + }, + { + "epoch": 0.2, + "learning_rate": 1.9960917659091614e-05, + "loss": 2.2301, + "step": 962 + }, + { + "epoch": 0.2, + "learning_rate": 1.9960764944616566e-05, + "loss": 2.1968, + "step": 963 + }, + { + "epoch": 0.2, + "learning_rate": 1.996061193294315e-05, + "loss": 2.2339, + "step": 964 + }, + { + "epoch": 0.2, + "learning_rate": 1.996045862407593e-05, + "loss": 2.1571, + "step": 965 + }, + { + "epoch": 0.2, + "learning_rate": 1.996030501801948e-05, + "loss": 2.1717, + "step": 966 + }, + { + "epoch": 0.2, + "learning_rate": 1.996015111477838e-05, + "loss": 2.1365, + "step": 967 + }, + { + "epoch": 0.2, + "learning_rate": 1.9959996914357224e-05, + "loss": 2.1522, + "step": 968 + }, + { + "epoch": 0.2, + "learning_rate": 1.9959842416760617e-05, + "loss": 2.2051, + "step": 969 + }, + { + "epoch": 0.2, + "learning_rate": 1.9959687621993162e-05, + "loss": 2.2095, + "step": 970 + }, + { + "epoch": 0.2, + "learning_rate": 1.9959532530059485e-05, + "loss": 2.2033, + "step": 971 + }, + { + "epoch": 0.2, + "learning_rate": 1.9959377140964203e-05, + "loss": 2.2117, + "step": 972 + }, + { + "epoch": 0.2, + "learning_rate": 1.9959221454711963e-05, + "loss": 2.188, + "step": 973 + }, + { + "epoch": 0.2, + "learning_rate": 1.9959065471307405e-05, + "loss": 2.159, + "step": 974 + }, + { + "epoch": 0.2, + "learning_rate": 1.995890919075519e-05, + "loss": 2.1758, + "step": 975 + }, + { + "epoch": 0.2, + "learning_rate": 1.9958752613059965e-05, + "loss": 2.2661, + "step": 976 + }, + { + "epoch": 0.2, + "learning_rate": 1.9958595738226417e-05, + "loss": 2.2523, + "step": 977 + }, + { + "epoch": 0.2, + "learning_rate": 1.995843856625922e-05, + "loss": 2.175, + "step": 978 + }, + { + "epoch": 0.2, + "learning_rate": 1.9958281097163064e-05, + "loss": 2.1874, + "step": 979 + }, + { + "epoch": 0.2, + "learning_rate": 1.995812333094265e-05, + "loss": 2.2083, + "step": 980 + }, + { + "epoch": 0.2, + "learning_rate": 1.9957965267602684e-05, + "loss": 2.1749, + "step": 981 + }, + { + "epoch": 0.2, + "learning_rate": 1.995780690714788e-05, + "loss": 2.1311, + "step": 982 + }, + { + "epoch": 0.2, + "learning_rate": 1.9957648249582962e-05, + "loss": 2.1522, + "step": 983 + }, + { + "epoch": 0.2, + "learning_rate": 1.995748929491267e-05, + "loss": 2.2337, + "step": 984 + }, + { + "epoch": 0.2, + "learning_rate": 1.9957330043141744e-05, + "loss": 2.2112, + "step": 985 + }, + { + "epoch": 0.2, + "learning_rate": 1.9957170494274933e-05, + "loss": 2.1919, + "step": 986 + }, + { + "epoch": 0.2, + "learning_rate": 1.9957010648316998e-05, + "loss": 2.0646, + "step": 987 + }, + { + "epoch": 0.2, + "learning_rate": 1.995685050527271e-05, + "loss": 2.1733, + "step": 988 + }, + { + "epoch": 0.2, + "learning_rate": 1.995669006514685e-05, + "loss": 2.1942, + "step": 989 + }, + { + "epoch": 0.2, + "learning_rate": 1.9956529327944198e-05, + "loss": 2.2306, + "step": 990 + }, + { + "epoch": 0.2, + "learning_rate": 1.9956368293669555e-05, + "loss": 2.1644, + "step": 991 + }, + { + "epoch": 0.2, + "learning_rate": 1.995620696232772e-05, + "loss": 2.1738, + "step": 992 + }, + { + "epoch": 0.2, + "learning_rate": 1.995604533392352e-05, + "loss": 2.201, + "step": 993 + }, + { + "epoch": 0.2, + "learning_rate": 1.995588340846176e-05, + "loss": 2.2672, + "step": 994 + }, + { + "epoch": 0.2, + "learning_rate": 1.9955721185947286e-05, + "loss": 2.1632, + "step": 995 + }, + { + "epoch": 0.2, + "learning_rate": 1.9955558666384927e-05, + "loss": 2.1754, + "step": 996 + }, + { + "epoch": 0.2, + "learning_rate": 1.9955395849779538e-05, + "loss": 2.1274, + "step": 997 + }, + { + "epoch": 0.2, + "learning_rate": 1.9955232736135978e-05, + "loss": 2.1525, + "step": 998 + }, + { + "epoch": 0.2, + "learning_rate": 1.995506932545911e-05, + "loss": 2.1566, + "step": 999 + }, + { + "epoch": 0.2, + "learning_rate": 1.9954905617753815e-05, + "loss": 2.2134, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 1.995474161302497e-05, + "loss": 2.2163, + "step": 1001 + }, + { + "epoch": 0.2, + "learning_rate": 1.9954577311277473e-05, + "loss": 2.1541, + "step": 1002 + }, + { + "epoch": 0.2, + "learning_rate": 1.995441271251623e-05, + "loss": 2.2203, + "step": 1003 + }, + { + "epoch": 0.2, + "learning_rate": 1.9954247816746144e-05, + "loss": 2.2486, + "step": 1004 + }, + { + "epoch": 0.2, + "learning_rate": 1.9954082623972143e-05, + "loss": 2.2002, + "step": 1005 + }, + { + "epoch": 0.2, + "learning_rate": 1.9953917134199145e-05, + "loss": 2.1675, + "step": 1006 + }, + { + "epoch": 0.2, + "learning_rate": 1.9953751347432096e-05, + "loss": 2.1553, + "step": 1007 + }, + { + "epoch": 0.2, + "learning_rate": 1.9953585263675946e-05, + "loss": 2.1642, + "step": 1008 + }, + { + "epoch": 0.2, + "learning_rate": 1.995341888293564e-05, + "loss": 2.25, + "step": 1009 + }, + { + "epoch": 0.21, + "learning_rate": 1.995325220521615e-05, + "loss": 2.2267, + "step": 1010 + }, + { + "epoch": 0.21, + "learning_rate": 1.9953085230522445e-05, + "loss": 2.2686, + "step": 1011 + }, + { + "epoch": 0.21, + "learning_rate": 1.995291795885951e-05, + "loss": 2.2067, + "step": 1012 + }, + { + "epoch": 0.21, + "learning_rate": 1.9952750390232337e-05, + "loss": 2.1684, + "step": 1013 + }, + { + "epoch": 0.21, + "learning_rate": 1.995258252464592e-05, + "loss": 2.2434, + "step": 1014 + }, + { + "epoch": 0.21, + "learning_rate": 1.995241436210527e-05, + "loss": 2.2018, + "step": 1015 + }, + { + "epoch": 0.21, + "learning_rate": 1.995224590261541e-05, + "loss": 2.06, + "step": 1016 + }, + { + "epoch": 0.21, + "learning_rate": 1.9952077146181358e-05, + "loss": 2.2344, + "step": 1017 + }, + { + "epoch": 0.21, + "learning_rate": 1.9951908092808153e-05, + "loss": 2.1954, + "step": 1018 + }, + { + "epoch": 0.21, + "learning_rate": 1.995173874250084e-05, + "loss": 2.1215, + "step": 1019 + }, + { + "epoch": 0.21, + "learning_rate": 1.9951569095264473e-05, + "loss": 2.2294, + "step": 1020 + }, + { + "epoch": 0.21, + "learning_rate": 1.995139915110411e-05, + "loss": 2.202, + "step": 1021 + }, + { + "epoch": 0.21, + "learning_rate": 1.995122891002482e-05, + "loss": 2.2684, + "step": 1022 + }, + { + "epoch": 0.21, + "learning_rate": 1.9951058372031688e-05, + "loss": 2.1709, + "step": 1023 + }, + { + "epoch": 0.21, + "learning_rate": 1.9950887537129803e-05, + "loss": 2.2158, + "step": 1024 + }, + { + "epoch": 0.21, + "learning_rate": 1.9950716405324255e-05, + "loss": 2.1723, + "step": 1025 + }, + { + "epoch": 0.21, + "learning_rate": 1.9950544976620154e-05, + "loss": 2.1267, + "step": 1026 + }, + { + "epoch": 0.21, + "learning_rate": 1.9950373251022616e-05, + "loss": 2.2668, + "step": 1027 + }, + { + "epoch": 0.21, + "learning_rate": 1.9950201228536764e-05, + "loss": 2.1947, + "step": 1028 + }, + { + "epoch": 0.21, + "learning_rate": 1.9950028909167734e-05, + "loss": 2.1717, + "step": 1029 + }, + { + "epoch": 0.21, + "learning_rate": 1.994985629292066e-05, + "loss": 2.1634, + "step": 1030 + }, + { + "epoch": 0.21, + "learning_rate": 1.9949683379800697e-05, + "loss": 2.2134, + "step": 1031 + }, + { + "epoch": 0.21, + "learning_rate": 1.9949510169813006e-05, + "loss": 2.1578, + "step": 1032 + }, + { + "epoch": 0.21, + "learning_rate": 1.9949336662962748e-05, + "loss": 2.1654, + "step": 1033 + }, + { + "epoch": 0.21, + "learning_rate": 1.9949162859255106e-05, + "loss": 2.1257, + "step": 1034 + }, + { + "epoch": 0.21, + "learning_rate": 1.9948988758695263e-05, + "loss": 2.2336, + "step": 1035 + }, + { + "epoch": 0.21, + "learning_rate": 1.994881436128842e-05, + "loss": 2.2355, + "step": 1036 + }, + { + "epoch": 0.21, + "learning_rate": 1.994863966703977e-05, + "loss": 2.231, + "step": 1037 + }, + { + "epoch": 0.21, + "learning_rate": 1.994846467595453e-05, + "loss": 2.2294, + "step": 1038 + }, + { + "epoch": 0.21, + "learning_rate": 1.9948289388037926e-05, + "loss": 2.192, + "step": 1039 + }, + { + "epoch": 0.21, + "learning_rate": 1.9948113803295183e-05, + "loss": 2.2624, + "step": 1040 + }, + { + "epoch": 0.21, + "learning_rate": 1.994793792173154e-05, + "loss": 2.1664, + "step": 1041 + }, + { + "epoch": 0.21, + "learning_rate": 1.9947761743352244e-05, + "loss": 2.2642, + "step": 1042 + }, + { + "epoch": 0.21, + "learning_rate": 1.9947585268162555e-05, + "loss": 2.2117, + "step": 1043 + }, + { + "epoch": 0.21, + "learning_rate": 1.9947408496167736e-05, + "loss": 2.23, + "step": 1044 + }, + { + "epoch": 0.21, + "learning_rate": 1.994723142737306e-05, + "loss": 2.1656, + "step": 1045 + }, + { + "epoch": 0.21, + "learning_rate": 1.9947054061783815e-05, + "loss": 2.1542, + "step": 1046 + }, + { + "epoch": 0.21, + "learning_rate": 1.994687639940529e-05, + "loss": 2.1891, + "step": 1047 + }, + { + "epoch": 0.21, + "learning_rate": 1.9946698440242787e-05, + "loss": 2.2264, + "step": 1048 + }, + { + "epoch": 0.21, + "learning_rate": 1.994652018430161e-05, + "loss": 2.1316, + "step": 1049 + }, + { + "epoch": 0.21, + "learning_rate": 1.9946341631587086e-05, + "loss": 2.1684, + "step": 1050 + }, + { + "epoch": 0.21, + "learning_rate": 1.994616278210454e-05, + "loss": 2.2387, + "step": 1051 + }, + { + "epoch": 0.21, + "learning_rate": 1.9945983635859304e-05, + "loss": 2.1689, + "step": 1052 + }, + { + "epoch": 0.21, + "learning_rate": 1.994580419285673e-05, + "loss": 2.1005, + "step": 1053 + }, + { + "epoch": 0.21, + "learning_rate": 1.9945624453102165e-05, + "loss": 2.1699, + "step": 1054 + }, + { + "epoch": 0.21, + "learning_rate": 1.994544441660098e-05, + "loss": 2.2159, + "step": 1055 + }, + { + "epoch": 0.21, + "learning_rate": 1.9945264083358537e-05, + "loss": 2.1652, + "step": 1056 + }, + { + "epoch": 0.21, + "learning_rate": 1.994508345338022e-05, + "loss": 2.1624, + "step": 1057 + }, + { + "epoch": 0.21, + "learning_rate": 1.9944902526671427e-05, + "loss": 2.1896, + "step": 1058 + }, + { + "epoch": 0.21, + "learning_rate": 1.9944721303237544e-05, + "loss": 2.1242, + "step": 1059 + }, + { + "epoch": 0.22, + "learning_rate": 1.9944539783083984e-05, + "loss": 2.1718, + "step": 1060 + }, + { + "epoch": 0.22, + "learning_rate": 1.9944357966216166e-05, + "loss": 2.1497, + "step": 1061 + }, + { + "epoch": 0.22, + "learning_rate": 1.9944175852639505e-05, + "loss": 2.2811, + "step": 1062 + }, + { + "epoch": 0.22, + "learning_rate": 1.9943993442359448e-05, + "loss": 2.2627, + "step": 1063 + }, + { + "epoch": 0.22, + "learning_rate": 1.9943810735381426e-05, + "loss": 2.26, + "step": 1064 + }, + { + "epoch": 0.22, + "learning_rate": 1.9943627731710896e-05, + "loss": 2.1002, + "step": 1065 + }, + { + "epoch": 0.22, + "learning_rate": 1.9943444431353317e-05, + "loss": 2.26, + "step": 1066 + }, + { + "epoch": 0.22, + "learning_rate": 1.994326083431416e-05, + "loss": 2.2257, + "step": 1067 + }, + { + "epoch": 0.22, + "learning_rate": 1.9943076940598903e-05, + "loss": 2.185, + "step": 1068 + }, + { + "epoch": 0.22, + "learning_rate": 1.9942892750213026e-05, + "loss": 2.1989, + "step": 1069 + }, + { + "epoch": 0.22, + "learning_rate": 1.9942708263162034e-05, + "loss": 2.1695, + "step": 1070 + }, + { + "epoch": 0.22, + "learning_rate": 1.9942523479451426e-05, + "loss": 2.0523, + "step": 1071 + }, + { + "epoch": 0.22, + "learning_rate": 1.9942338399086716e-05, + "loss": 2.2032, + "step": 1072 + }, + { + "epoch": 0.22, + "learning_rate": 1.9942153022073426e-05, + "loss": 2.2413, + "step": 1073 + }, + { + "epoch": 0.22, + "learning_rate": 1.9941967348417092e-05, + "loss": 2.0829, + "step": 1074 + }, + { + "epoch": 0.22, + "learning_rate": 1.9941781378123244e-05, + "loss": 2.1169, + "step": 1075 + }, + { + "epoch": 0.22, + "learning_rate": 1.994159511119744e-05, + "loss": 2.2409, + "step": 1076 + }, + { + "epoch": 0.22, + "learning_rate": 1.9941408547645236e-05, + "loss": 2.1793, + "step": 1077 + }, + { + "epoch": 0.22, + "learning_rate": 1.9941221687472193e-05, + "loss": 2.2578, + "step": 1078 + }, + { + "epoch": 0.22, + "learning_rate": 1.9941034530683893e-05, + "loss": 2.1789, + "step": 1079 + }, + { + "epoch": 0.22, + "learning_rate": 1.9940847077285918e-05, + "loss": 2.19, + "step": 1080 + }, + { + "epoch": 0.22, + "learning_rate": 1.994065932728386e-05, + "loss": 2.2335, + "step": 1081 + }, + { + "epoch": 0.22, + "learning_rate": 1.9940471280683317e-05, + "loss": 2.1859, + "step": 1082 + }, + { + "epoch": 0.22, + "learning_rate": 1.9940282937489907e-05, + "loss": 2.2565, + "step": 1083 + }, + { + "epoch": 0.22, + "learning_rate": 1.9940094297709248e-05, + "loss": 2.1859, + "step": 1084 + }, + { + "epoch": 0.22, + "learning_rate": 1.9939905361346965e-05, + "loss": 2.1598, + "step": 1085 + }, + { + "epoch": 0.22, + "learning_rate": 1.99397161284087e-05, + "loss": 2.1812, + "step": 1086 + }, + { + "epoch": 0.22, + "learning_rate": 1.9939526598900092e-05, + "loss": 2.2435, + "step": 1087 + }, + { + "epoch": 0.22, + "learning_rate": 1.9939336772826802e-05, + "loss": 2.1738, + "step": 1088 + }, + { + "epoch": 0.22, + "learning_rate": 1.9939146650194494e-05, + "loss": 2.1844, + "step": 1089 + }, + { + "epoch": 0.22, + "learning_rate": 1.9938956231008836e-05, + "loss": 2.1795, + "step": 1090 + }, + { + "epoch": 0.22, + "learning_rate": 1.9938765515275516e-05, + "loss": 2.2268, + "step": 1091 + }, + { + "epoch": 0.22, + "learning_rate": 1.9938574503000217e-05, + "loss": 2.2512, + "step": 1092 + }, + { + "epoch": 0.22, + "learning_rate": 1.9938383194188648e-05, + "loss": 2.0891, + "step": 1093 + }, + { + "epoch": 0.22, + "learning_rate": 1.9938191588846504e-05, + "loss": 2.2274, + "step": 1094 + }, + { + "epoch": 0.22, + "learning_rate": 1.993799968697951e-05, + "loss": 2.2298, + "step": 1095 + }, + { + "epoch": 0.22, + "learning_rate": 1.993780748859339e-05, + "loss": 2.2105, + "step": 1096 + }, + { + "epoch": 0.22, + "learning_rate": 1.9937614993693882e-05, + "loss": 2.1174, + "step": 1097 + }, + { + "epoch": 0.22, + "learning_rate": 1.9937422202286728e-05, + "loss": 2.1737, + "step": 1098 + }, + { + "epoch": 0.22, + "learning_rate": 1.9937229114377677e-05, + "loss": 2.1841, + "step": 1099 + }, + { + "epoch": 0.22, + "learning_rate": 1.9937035729972494e-05, + "loss": 2.2623, + "step": 1100 + }, + { + "epoch": 0.22, + "learning_rate": 1.9936842049076947e-05, + "loss": 2.1688, + "step": 1101 + }, + { + "epoch": 0.22, + "learning_rate": 1.9936648071696812e-05, + "loss": 2.2309, + "step": 1102 + }, + { + "epoch": 0.22, + "learning_rate": 1.993645379783788e-05, + "loss": 2.1983, + "step": 1103 + }, + { + "epoch": 0.22, + "learning_rate": 1.993625922750595e-05, + "loss": 2.2112, + "step": 1104 + }, + { + "epoch": 0.22, + "learning_rate": 1.993606436070682e-05, + "loss": 2.1977, + "step": 1105 + }, + { + "epoch": 0.22, + "learning_rate": 1.9935869197446313e-05, + "loss": 2.1472, + "step": 1106 + }, + { + "epoch": 0.22, + "learning_rate": 1.993567373773025e-05, + "loss": 2.1297, + "step": 1107 + }, + { + "epoch": 0.22, + "learning_rate": 1.9935477981564456e-05, + "loss": 2.1576, + "step": 1108 + }, + { + "epoch": 0.23, + "learning_rate": 1.9935281928954776e-05, + "loss": 2.1767, + "step": 1109 + }, + { + "epoch": 0.23, + "learning_rate": 1.9935085579907064e-05, + "loss": 2.1708, + "step": 1110 + }, + { + "epoch": 0.23, + "learning_rate": 1.9934888934427172e-05, + "loss": 2.1747, + "step": 1111 + }, + { + "epoch": 0.23, + "learning_rate": 1.993469199252097e-05, + "loss": 2.191, + "step": 1112 + }, + { + "epoch": 0.23, + "learning_rate": 1.9934494754194335e-05, + "loss": 2.1795, + "step": 1113 + }, + { + "epoch": 0.23, + "learning_rate": 1.993429721945315e-05, + "loss": 2.3161, + "step": 1114 + }, + { + "epoch": 0.23, + "learning_rate": 1.993409938830331e-05, + "loss": 2.1314, + "step": 1115 + }, + { + "epoch": 0.23, + "learning_rate": 1.9933901260750723e-05, + "loss": 2.245, + "step": 1116 + }, + { + "epoch": 0.23, + "learning_rate": 1.9933702836801285e-05, + "loss": 2.2243, + "step": 1117 + }, + { + "epoch": 0.23, + "learning_rate": 1.9933504116460934e-05, + "loss": 2.2161, + "step": 1118 + }, + { + "epoch": 0.23, + "learning_rate": 1.993330509973559e-05, + "loss": 2.2021, + "step": 1119 + }, + { + "epoch": 0.23, + "learning_rate": 1.993310578663119e-05, + "loss": 2.1389, + "step": 1120 + }, + { + "epoch": 0.23, + "learning_rate": 1.9932906177153685e-05, + "loss": 2.1434, + "step": 1121 + }, + { + "epoch": 0.23, + "learning_rate": 1.9932706271309027e-05, + "loss": 2.2319, + "step": 1122 + }, + { + "epoch": 0.23, + "learning_rate": 1.9932506069103186e-05, + "loss": 2.304, + "step": 1123 + }, + { + "epoch": 0.23, + "learning_rate": 1.993230557054213e-05, + "loss": 2.1974, + "step": 1124 + }, + { + "epoch": 0.23, + "learning_rate": 1.9932104775631847e-05, + "loss": 2.2039, + "step": 1125 + }, + { + "epoch": 0.23, + "learning_rate": 1.993190368437832e-05, + "loss": 2.1731, + "step": 1126 + }, + { + "epoch": 0.23, + "learning_rate": 1.9931702296787558e-05, + "loss": 2.1842, + "step": 1127 + }, + { + "epoch": 0.23, + "learning_rate": 1.9931500612865563e-05, + "loss": 2.238, + "step": 1128 + }, + { + "epoch": 0.23, + "learning_rate": 1.9931298632618355e-05, + "loss": 2.2509, + "step": 1129 + }, + { + "epoch": 0.23, + "learning_rate": 1.9931096356051958e-05, + "loss": 2.1988, + "step": 1130 + }, + { + "epoch": 0.23, + "learning_rate": 1.9930893783172416e-05, + "loss": 2.1788, + "step": 1131 + }, + { + "epoch": 0.23, + "learning_rate": 1.9930690913985763e-05, + "loss": 2.2236, + "step": 1132 + }, + { + "epoch": 0.23, + "learning_rate": 1.9930487748498055e-05, + "loss": 2.1505, + "step": 1133 + }, + { + "epoch": 0.23, + "learning_rate": 1.9930284286715354e-05, + "loss": 2.1936, + "step": 1134 + }, + { + "epoch": 0.23, + "learning_rate": 1.993008052864373e-05, + "loss": 2.174, + "step": 1135 + }, + { + "epoch": 0.23, + "learning_rate": 1.992987647428927e-05, + "loss": 2.2062, + "step": 1136 + }, + { + "epoch": 0.23, + "learning_rate": 1.9929672123658052e-05, + "loss": 2.2033, + "step": 1137 + }, + { + "epoch": 0.23, + "learning_rate": 1.992946747675618e-05, + "loss": 2.271, + "step": 1138 + }, + { + "epoch": 0.23, + "learning_rate": 1.9929262533589752e-05, + "loss": 2.1277, + "step": 1139 + }, + { + "epoch": 0.23, + "learning_rate": 1.9929057294164894e-05, + "loss": 2.2904, + "step": 1140 + }, + { + "epoch": 0.23, + "learning_rate": 1.992885175848772e-05, + "loss": 2.2215, + "step": 1141 + }, + { + "epoch": 0.23, + "learning_rate": 1.9928645926564368e-05, + "loss": 2.1511, + "step": 1142 + }, + { + "epoch": 0.23, + "learning_rate": 1.9928439798400978e-05, + "loss": 2.1957, + "step": 1143 + }, + { + "epoch": 0.23, + "learning_rate": 1.9928233374003702e-05, + "loss": 2.1555, + "step": 1144 + }, + { + "epoch": 0.23, + "learning_rate": 1.9928026653378695e-05, + "loss": 2.176, + "step": 1145 + }, + { + "epoch": 0.23, + "learning_rate": 1.9927819636532125e-05, + "loss": 2.2061, + "step": 1146 + }, + { + "epoch": 0.23, + "learning_rate": 1.9927612323470175e-05, + "loss": 2.2129, + "step": 1147 + }, + { + "epoch": 0.23, + "learning_rate": 1.9927404714199024e-05, + "loss": 2.1787, + "step": 1148 + }, + { + "epoch": 0.23, + "learning_rate": 1.992719680872487e-05, + "loss": 2.1567, + "step": 1149 + }, + { + "epoch": 0.23, + "learning_rate": 1.9926988607053915e-05, + "loss": 2.1606, + "step": 1150 + }, + { + "epoch": 0.23, + "learning_rate": 1.9926780109192368e-05, + "loss": 2.27, + "step": 1151 + }, + { + "epoch": 0.23, + "learning_rate": 1.992657131514646e-05, + "loss": 2.2117, + "step": 1152 + }, + { + "epoch": 0.23, + "learning_rate": 1.992636222492241e-05, + "loss": 2.1689, + "step": 1153 + }, + { + "epoch": 0.23, + "learning_rate": 1.9926152838526457e-05, + "loss": 2.1649, + "step": 1154 + }, + { + "epoch": 0.23, + "learning_rate": 1.9925943155964857e-05, + "loss": 2.2188, + "step": 1155 + }, + { + "epoch": 0.23, + "learning_rate": 1.9925733177243858e-05, + "loss": 2.2275, + "step": 1156 + }, + { + "epoch": 0.23, + "learning_rate": 1.992552290236973e-05, + "loss": 2.2639, + "step": 1157 + }, + { + "epoch": 0.24, + "learning_rate": 1.992531233134874e-05, + "loss": 2.2141, + "step": 1158 + }, + { + "epoch": 0.24, + "learning_rate": 1.9925101464187184e-05, + "loss": 2.2172, + "step": 1159 + }, + { + "epoch": 0.24, + "learning_rate": 1.9924890300891343e-05, + "loss": 2.2251, + "step": 1160 + }, + { + "epoch": 0.24, + "learning_rate": 1.992467884146752e-05, + "loss": 2.1414, + "step": 1161 + }, + { + "epoch": 0.24, + "learning_rate": 1.9924467085922024e-05, + "loss": 2.1619, + "step": 1162 + }, + { + "epoch": 0.24, + "learning_rate": 1.9924255034261177e-05, + "loss": 2.2165, + "step": 1163 + }, + { + "epoch": 0.24, + "learning_rate": 1.99240426864913e-05, + "loss": 2.1468, + "step": 1164 + }, + { + "epoch": 0.24, + "learning_rate": 1.992383004261873e-05, + "loss": 2.2039, + "step": 1165 + }, + { + "epoch": 0.24, + "learning_rate": 1.9923617102649812e-05, + "loss": 2.2408, + "step": 1166 + }, + { + "epoch": 0.24, + "learning_rate": 1.9923403866590902e-05, + "loss": 2.1438, + "step": 1167 + }, + { + "epoch": 0.24, + "learning_rate": 1.9923190334448362e-05, + "loss": 2.1151, + "step": 1168 + }, + { + "epoch": 0.24, + "learning_rate": 1.992297650622856e-05, + "loss": 2.2639, + "step": 1169 + }, + { + "epoch": 0.24, + "learning_rate": 1.992276238193788e-05, + "loss": 2.1453, + "step": 1170 + }, + { + "epoch": 0.24, + "learning_rate": 1.9922547961582705e-05, + "loss": 2.1757, + "step": 1171 + }, + { + "epoch": 0.24, + "learning_rate": 1.992233324516944e-05, + "loss": 2.2456, + "step": 1172 + }, + { + "epoch": 0.24, + "learning_rate": 1.9922118232704484e-05, + "loss": 2.115, + "step": 1173 + }, + { + "epoch": 0.24, + "learning_rate": 1.992190292419426e-05, + "loss": 2.2665, + "step": 1174 + }, + { + "epoch": 0.24, + "learning_rate": 1.9921687319645183e-05, + "loss": 2.0816, + "step": 1175 + }, + { + "epoch": 0.24, + "learning_rate": 1.9921471419063696e-05, + "loss": 2.2362, + "step": 1176 + }, + { + "epoch": 0.24, + "learning_rate": 1.9921255222456235e-05, + "loss": 2.1749, + "step": 1177 + }, + { + "epoch": 0.24, + "learning_rate": 1.992103872982925e-05, + "loss": 2.242, + "step": 1178 + }, + { + "epoch": 0.24, + "learning_rate": 1.9920821941189206e-05, + "loss": 2.256, + "step": 1179 + }, + { + "epoch": 0.24, + "learning_rate": 1.9920604856542564e-05, + "loss": 2.1637, + "step": 1180 + }, + { + "epoch": 0.24, + "learning_rate": 1.9920387475895806e-05, + "loss": 2.1944, + "step": 1181 + }, + { + "epoch": 0.24, + "learning_rate": 1.9920169799255415e-05, + "loss": 2.1769, + "step": 1182 + }, + { + "epoch": 0.24, + "learning_rate": 1.9919951826627888e-05, + "loss": 2.1943, + "step": 1183 + }, + { + "epoch": 0.24, + "learning_rate": 1.9919733558019727e-05, + "loss": 2.1034, + "step": 1184 + }, + { + "epoch": 0.24, + "learning_rate": 1.9919514993437445e-05, + "loss": 2.2086, + "step": 1185 + }, + { + "epoch": 0.24, + "learning_rate": 1.9919296132887567e-05, + "loss": 2.179, + "step": 1186 + }, + { + "epoch": 0.24, + "learning_rate": 1.9919076976376618e-05, + "loss": 2.2422, + "step": 1187 + }, + { + "epoch": 0.24, + "learning_rate": 1.9918857523911137e-05, + "loss": 2.2, + "step": 1188 + }, + { + "epoch": 0.24, + "learning_rate": 1.9918637775497675e-05, + "loss": 2.2564, + "step": 1189 + }, + { + "epoch": 0.24, + "learning_rate": 1.9918417731142786e-05, + "loss": 2.2066, + "step": 1190 + }, + { + "epoch": 0.24, + "learning_rate": 1.9918197390853037e-05, + "loss": 2.2056, + "step": 1191 + }, + { + "epoch": 0.24, + "learning_rate": 1.9917976754635004e-05, + "loss": 2.2135, + "step": 1192 + }, + { + "epoch": 0.24, + "learning_rate": 1.9917755822495265e-05, + "loss": 2.1371, + "step": 1193 + }, + { + "epoch": 0.24, + "learning_rate": 1.9917534594440418e-05, + "loss": 2.1492, + "step": 1194 + }, + { + "epoch": 0.24, + "learning_rate": 1.9917313070477057e-05, + "loss": 2.1321, + "step": 1195 + }, + { + "epoch": 0.24, + "learning_rate": 1.99170912506118e-05, + "loss": 2.2317, + "step": 1196 + }, + { + "epoch": 0.24, + "learning_rate": 1.9916869134851254e-05, + "loss": 2.1685, + "step": 1197 + }, + { + "epoch": 0.24, + "learning_rate": 1.991664672320206e-05, + "loss": 2.1799, + "step": 1198 + }, + { + "epoch": 0.24, + "learning_rate": 1.991642401567084e-05, + "loss": 2.1695, + "step": 1199 + }, + { + "epoch": 0.24, + "learning_rate": 1.9916201012264255e-05, + "loss": 2.1935, + "step": 1200 + }, + { + "epoch": 0.24, + "learning_rate": 1.9915977712988944e-05, + "loss": 2.2166, + "step": 1201 + }, + { + "epoch": 0.24, + "learning_rate": 1.9915754117851574e-05, + "loss": 2.1998, + "step": 1202 + }, + { + "epoch": 0.24, + "learning_rate": 1.991553022685882e-05, + "loss": 2.175, + "step": 1203 + }, + { + "epoch": 0.24, + "learning_rate": 1.991530604001736e-05, + "loss": 2.161, + "step": 1204 + }, + { + "epoch": 0.24, + "learning_rate": 1.9915081557333876e-05, + "loss": 2.165, + "step": 1205 + }, + { + "epoch": 0.24, + "learning_rate": 1.991485677881508e-05, + "loss": 2.2113, + "step": 1206 + }, + { + "epoch": 0.25, + "learning_rate": 1.9914631704467672e-05, + "loss": 2.2356, + "step": 1207 + }, + { + "epoch": 0.25, + "learning_rate": 1.991440633429836e-05, + "loss": 2.1925, + "step": 1208 + }, + { + "epoch": 0.25, + "learning_rate": 1.991418066831388e-05, + "loss": 2.1895, + "step": 1209 + }, + { + "epoch": 0.25, + "learning_rate": 1.991395470652096e-05, + "loss": 2.203, + "step": 1210 + }, + { + "epoch": 0.25, + "learning_rate": 1.9913728448926343e-05, + "loss": 2.2981, + "step": 1211 + }, + { + "epoch": 0.25, + "learning_rate": 1.991350189553678e-05, + "loss": 2.1897, + "step": 1212 + }, + { + "epoch": 0.25, + "learning_rate": 1.9913275046359028e-05, + "loss": 2.1588, + "step": 1213 + }, + { + "epoch": 0.25, + "learning_rate": 1.9913047901399854e-05, + "loss": 2.1098, + "step": 1214 + }, + { + "epoch": 0.25, + "learning_rate": 1.9912820460666046e-05, + "loss": 2.0784, + "step": 1215 + }, + { + "epoch": 0.25, + "learning_rate": 1.991259272416438e-05, + "loss": 2.219, + "step": 1216 + }, + { + "epoch": 0.25, + "learning_rate": 1.991236469190165e-05, + "loss": 2.2025, + "step": 1217 + }, + { + "epoch": 0.25, + "learning_rate": 1.9912136363884667e-05, + "loss": 2.1206, + "step": 1218 + }, + { + "epoch": 0.25, + "learning_rate": 1.991190774012024e-05, + "loss": 2.1913, + "step": 1219 + }, + { + "epoch": 0.25, + "learning_rate": 1.991167882061519e-05, + "loss": 2.1057, + "step": 1220 + }, + { + "epoch": 0.25, + "learning_rate": 1.9911449605376346e-05, + "loss": 2.1827, + "step": 1221 + }, + { + "epoch": 0.25, + "learning_rate": 1.991122009441055e-05, + "loss": 2.1722, + "step": 1222 + }, + { + "epoch": 0.25, + "learning_rate": 1.9910990287724653e-05, + "loss": 2.179, + "step": 1223 + }, + { + "epoch": 0.25, + "learning_rate": 1.9910760185325503e-05, + "loss": 2.2178, + "step": 1224 + }, + { + "epoch": 0.25, + "learning_rate": 1.991052978721997e-05, + "loss": 2.1327, + "step": 1225 + }, + { + "epoch": 0.25, + "learning_rate": 1.991029909341493e-05, + "loss": 2.1935, + "step": 1226 + }, + { + "epoch": 0.25, + "learning_rate": 1.9910068103917265e-05, + "loss": 2.1311, + "step": 1227 + }, + { + "epoch": 0.25, + "learning_rate": 1.9909836818733863e-05, + "loss": 2.2271, + "step": 1228 + }, + { + "epoch": 0.25, + "learning_rate": 1.990960523787163e-05, + "loss": 2.1509, + "step": 1229 + }, + { + "epoch": 0.25, + "learning_rate": 1.9909373361337475e-05, + "loss": 2.1672, + "step": 1230 + }, + { + "epoch": 0.25, + "learning_rate": 1.9909141189138318e-05, + "loss": 2.2464, + "step": 1231 + }, + { + "epoch": 0.25, + "learning_rate": 1.990890872128108e-05, + "loss": 2.2297, + "step": 1232 + }, + { + "epoch": 0.25, + "learning_rate": 1.99086759577727e-05, + "loss": 2.1583, + "step": 1233 + }, + { + "epoch": 0.25, + "learning_rate": 1.990844289862013e-05, + "loss": 2.1651, + "step": 1234 + }, + { + "epoch": 0.25, + "learning_rate": 1.9908209543830312e-05, + "loss": 2.187, + "step": 1235 + }, + { + "epoch": 0.25, + "learning_rate": 1.9907975893410215e-05, + "loss": 2.1267, + "step": 1236 + }, + { + "epoch": 0.25, + "learning_rate": 1.9907741947366813e-05, + "loss": 2.1141, + "step": 1237 + }, + { + "epoch": 0.25, + "learning_rate": 1.990750770570708e-05, + "loss": 2.2107, + "step": 1238 + }, + { + "epoch": 0.25, + "learning_rate": 1.990727316843801e-05, + "loss": 2.2189, + "step": 1239 + }, + { + "epoch": 0.25, + "learning_rate": 1.9907038335566595e-05, + "loss": 2.2162, + "step": 1240 + }, + { + "epoch": 0.25, + "learning_rate": 1.9906803207099846e-05, + "loss": 2.1877, + "step": 1241 + }, + { + "epoch": 0.25, + "learning_rate": 1.9906567783044785e-05, + "loss": 2.1985, + "step": 1242 + }, + { + "epoch": 0.25, + "learning_rate": 1.9906332063408423e-05, + "loss": 2.2019, + "step": 1243 + }, + { + "epoch": 0.25, + "learning_rate": 1.99060960481978e-05, + "loss": 2.1903, + "step": 1244 + }, + { + "epoch": 0.25, + "learning_rate": 1.990585973741996e-05, + "loss": 2.1471, + "step": 1245 + }, + { + "epoch": 0.25, + "learning_rate": 1.9905623131081944e-05, + "loss": 2.2057, + "step": 1246 + }, + { + "epoch": 0.25, + "learning_rate": 1.9905386229190825e-05, + "loss": 2.2372, + "step": 1247 + }, + { + "epoch": 0.25, + "learning_rate": 1.9905149031753667e-05, + "loss": 2.1827, + "step": 1248 + }, + { + "epoch": 0.25, + "learning_rate": 1.990491153877754e-05, + "loss": 2.1928, + "step": 1249 + }, + { + "epoch": 0.25, + "learning_rate": 1.990467375026954e-05, + "loss": 2.2138, + "step": 1250 + }, + { + "epoch": 0.25, + "learning_rate": 1.9904435666236754e-05, + "loss": 2.1924, + "step": 1251 + }, + { + "epoch": 0.25, + "learning_rate": 1.990419728668629e-05, + "loss": 2.179, + "step": 1252 + }, + { + "epoch": 0.25, + "learning_rate": 1.9903958611625262e-05, + "loss": 2.1899, + "step": 1253 + }, + { + "epoch": 0.25, + "learning_rate": 1.9903719641060788e-05, + "loss": 2.1468, + "step": 1254 + }, + { + "epoch": 0.25, + "learning_rate": 1.9903480374999997e-05, + "loss": 2.0806, + "step": 1255 + }, + { + "epoch": 0.25, + "learning_rate": 1.990324081345003e-05, + "loss": 2.1585, + "step": 1256 + }, + { + "epoch": 0.26, + "learning_rate": 1.9903000956418036e-05, + "loss": 2.1765, + "step": 1257 + }, + { + "epoch": 0.26, + "learning_rate": 1.9902760803911173e-05, + "loss": 2.2464, + "step": 1258 + }, + { + "epoch": 0.26, + "learning_rate": 1.99025203559366e-05, + "loss": 2.1587, + "step": 1259 + }, + { + "epoch": 0.26, + "learning_rate": 1.9902279612501494e-05, + "loss": 2.1831, + "step": 1260 + }, + { + "epoch": 0.26, + "learning_rate": 1.9902038573613044e-05, + "loss": 2.2247, + "step": 1261 + }, + { + "epoch": 0.26, + "learning_rate": 1.990179723927843e-05, + "loss": 2.1769, + "step": 1262 + }, + { + "epoch": 0.26, + "learning_rate": 1.9901555609504864e-05, + "loss": 2.1234, + "step": 1263 + }, + { + "epoch": 0.26, + "learning_rate": 1.9901313684299552e-05, + "loss": 2.1567, + "step": 1264 + }, + { + "epoch": 0.26, + "learning_rate": 1.9901071463669714e-05, + "loss": 2.1848, + "step": 1265 + }, + { + "epoch": 0.26, + "learning_rate": 1.9900828947622566e-05, + "loss": 2.2185, + "step": 1266 + }, + { + "epoch": 0.26, + "learning_rate": 1.990058613616536e-05, + "loss": 2.1793, + "step": 1267 + }, + { + "epoch": 0.26, + "learning_rate": 1.9900343029305328e-05, + "loss": 2.0502, + "step": 1268 + }, + { + "epoch": 0.26, + "learning_rate": 1.990009962704973e-05, + "loss": 2.2185, + "step": 1269 + }, + { + "epoch": 0.26, + "learning_rate": 1.989985592940583e-05, + "loss": 2.1779, + "step": 1270 + }, + { + "epoch": 0.26, + "learning_rate": 1.989961193638089e-05, + "loss": 2.2094, + "step": 1271 + }, + { + "epoch": 0.26, + "learning_rate": 1.98993676479822e-05, + "loss": 2.2062, + "step": 1272 + }, + { + "epoch": 0.26, + "learning_rate": 1.9899123064217048e-05, + "loss": 2.178, + "step": 1273 + }, + { + "epoch": 0.26, + "learning_rate": 1.9898878185092725e-05, + "loss": 2.242, + "step": 1274 + }, + { + "epoch": 0.26, + "learning_rate": 1.989863301061654e-05, + "loss": 2.1367, + "step": 1275 + }, + { + "epoch": 0.26, + "learning_rate": 1.9898387540795815e-05, + "loss": 2.177, + "step": 1276 + }, + { + "epoch": 0.26, + "learning_rate": 1.9898141775637862e-05, + "loss": 2.2076, + "step": 1277 + }, + { + "epoch": 0.26, + "learning_rate": 1.9897895715150023e-05, + "loss": 2.1814, + "step": 1278 + }, + { + "epoch": 0.26, + "learning_rate": 1.9897649359339638e-05, + "loss": 2.1598, + "step": 1279 + }, + { + "epoch": 0.26, + "learning_rate": 1.9897402708214055e-05, + "loss": 2.2444, + "step": 1280 + }, + { + "epoch": 0.26, + "learning_rate": 1.9897155761780636e-05, + "loss": 2.1814, + "step": 1281 + }, + { + "epoch": 0.26, + "learning_rate": 1.9896908520046745e-05, + "loss": 2.17, + "step": 1282 + }, + { + "epoch": 0.26, + "learning_rate": 1.9896660983019763e-05, + "loss": 2.1819, + "step": 1283 + }, + { + "epoch": 0.26, + "learning_rate": 1.9896413150707077e-05, + "loss": 2.2127, + "step": 1284 + }, + { + "epoch": 0.26, + "learning_rate": 1.989616502311608e-05, + "loss": 2.1519, + "step": 1285 + }, + { + "epoch": 0.26, + "learning_rate": 1.989591660025417e-05, + "loss": 2.1906, + "step": 1286 + }, + { + "epoch": 0.26, + "learning_rate": 1.9895667882128764e-05, + "loss": 2.1565, + "step": 1287 + }, + { + "epoch": 0.26, + "learning_rate": 1.9895418868747286e-05, + "loss": 2.1735, + "step": 1288 + }, + { + "epoch": 0.26, + "learning_rate": 1.989516956011716e-05, + "loss": 2.1594, + "step": 1289 + }, + { + "epoch": 0.26, + "learning_rate": 1.9894919956245825e-05, + "loss": 2.1997, + "step": 1290 + }, + { + "epoch": 0.26, + "learning_rate": 1.9894670057140733e-05, + "loss": 2.1411, + "step": 1291 + }, + { + "epoch": 0.26, + "learning_rate": 1.9894419862809338e-05, + "loss": 2.1975, + "step": 1292 + }, + { + "epoch": 0.26, + "learning_rate": 1.9894169373259104e-05, + "loss": 2.2415, + "step": 1293 + }, + { + "epoch": 0.26, + "learning_rate": 1.9893918588497504e-05, + "loss": 2.2611, + "step": 1294 + }, + { + "epoch": 0.26, + "learning_rate": 1.9893667508532024e-05, + "loss": 2.158, + "step": 1295 + }, + { + "epoch": 0.26, + "learning_rate": 1.9893416133370154e-05, + "loss": 2.15, + "step": 1296 + }, + { + "epoch": 0.26, + "learning_rate": 1.9893164463019393e-05, + "loss": 2.1948, + "step": 1297 + }, + { + "epoch": 0.26, + "learning_rate": 1.989291249748725e-05, + "loss": 2.1619, + "step": 1298 + }, + { + "epoch": 0.26, + "learning_rate": 1.9892660236781244e-05, + "loss": 2.1389, + "step": 1299 + }, + { + "epoch": 0.26, + "learning_rate": 1.9892407680908904e-05, + "loss": 2.1131, + "step": 1300 + }, + { + "epoch": 0.26, + "learning_rate": 1.989215482987776e-05, + "loss": 2.1024, + "step": 1301 + }, + { + "epoch": 0.26, + "learning_rate": 1.989190168369536e-05, + "loss": 2.1919, + "step": 1302 + }, + { + "epoch": 0.26, + "learning_rate": 1.9891648242369258e-05, + "loss": 2.2827, + "step": 1303 + }, + { + "epoch": 0.26, + "learning_rate": 1.989139450590701e-05, + "loss": 2.1613, + "step": 1304 + }, + { + "epoch": 0.26, + "learning_rate": 1.9891140474316197e-05, + "loss": 2.1736, + "step": 1305 + }, + { + "epoch": 0.27, + "learning_rate": 1.9890886147604386e-05, + "loss": 2.2151, + "step": 1306 + }, + { + "epoch": 0.27, + "learning_rate": 1.9890631525779175e-05, + "loss": 2.1756, + "step": 1307 + }, + { + "epoch": 0.27, + "learning_rate": 1.9890376608848156e-05, + "loss": 2.1758, + "step": 1308 + }, + { + "epoch": 0.27, + "learning_rate": 1.989012139681894e-05, + "loss": 2.2174, + "step": 1309 + }, + { + "epoch": 0.27, + "learning_rate": 1.9889865889699137e-05, + "loss": 2.202, + "step": 1310 + }, + { + "epoch": 0.27, + "learning_rate": 1.988961008749637e-05, + "loss": 2.1957, + "step": 1311 + }, + { + "epoch": 0.27, + "learning_rate": 1.9889353990218274e-05, + "loss": 2.3097, + "step": 1312 + }, + { + "epoch": 0.27, + "learning_rate": 1.988909759787249e-05, + "loss": 2.1867, + "step": 1313 + }, + { + "epoch": 0.27, + "learning_rate": 1.988884091046667e-05, + "loss": 2.2631, + "step": 1314 + }, + { + "epoch": 0.27, + "learning_rate": 1.988858392800847e-05, + "loss": 2.2217, + "step": 1315 + }, + { + "epoch": 0.27, + "learning_rate": 1.9888326650505554e-05, + "loss": 2.1539, + "step": 1316 + }, + { + "epoch": 0.27, + "learning_rate": 1.9888069077965607e-05, + "loss": 2.1391, + "step": 1317 + }, + { + "epoch": 0.27, + "learning_rate": 1.9887811210396302e-05, + "loss": 2.2024, + "step": 1318 + }, + { + "epoch": 0.27, + "learning_rate": 1.9887553047805347e-05, + "loss": 2.242, + "step": 1319 + }, + { + "epoch": 0.27, + "learning_rate": 1.9887294590200437e-05, + "loss": 2.2472, + "step": 1320 + }, + { + "epoch": 0.27, + "learning_rate": 1.9887035837589286e-05, + "loss": 2.2132, + "step": 1321 + }, + { + "epoch": 0.27, + "learning_rate": 1.988677678997961e-05, + "loss": 2.0944, + "step": 1322 + }, + { + "epoch": 0.27, + "learning_rate": 1.988651744737914e-05, + "loss": 2.109, + "step": 1323 + }, + { + "epoch": 0.27, + "learning_rate": 1.988625780979562e-05, + "loss": 2.1919, + "step": 1324 + }, + { + "epoch": 0.27, + "learning_rate": 1.9885997877236788e-05, + "loss": 2.201, + "step": 1325 + }, + { + "epoch": 0.27, + "learning_rate": 1.9885737649710405e-05, + "loss": 2.1574, + "step": 1326 + }, + { + "epoch": 0.27, + "learning_rate": 1.9885477127224235e-05, + "loss": 2.1709, + "step": 1327 + }, + { + "epoch": 0.27, + "learning_rate": 1.9885216309786052e-05, + "loss": 2.2112, + "step": 1328 + }, + { + "epoch": 0.27, + "learning_rate": 1.988495519740363e-05, + "loss": 2.1264, + "step": 1329 + }, + { + "epoch": 0.27, + "learning_rate": 1.988469379008477e-05, + "loss": 2.2261, + "step": 1330 + }, + { + "epoch": 0.27, + "learning_rate": 1.9884432087837266e-05, + "loss": 2.2437, + "step": 1331 + }, + { + "epoch": 0.27, + "learning_rate": 1.9884170090668927e-05, + "loss": 2.1234, + "step": 1332 + }, + { + "epoch": 0.27, + "learning_rate": 1.9883907798587576e-05, + "loss": 2.2263, + "step": 1333 + }, + { + "epoch": 0.27, + "learning_rate": 1.988364521160103e-05, + "loss": 2.1325, + "step": 1334 + }, + { + "epoch": 0.27, + "learning_rate": 1.988338232971713e-05, + "loss": 2.1966, + "step": 1335 + }, + { + "epoch": 0.27, + "learning_rate": 1.9883119152943716e-05, + "loss": 2.2021, + "step": 1336 + }, + { + "epoch": 0.27, + "learning_rate": 1.988285568128864e-05, + "loss": 2.2432, + "step": 1337 + }, + { + "epoch": 0.27, + "learning_rate": 1.9882591914759767e-05, + "loss": 2.1964, + "step": 1338 + }, + { + "epoch": 0.27, + "learning_rate": 1.9882327853364964e-05, + "loss": 2.2133, + "step": 1339 + }, + { + "epoch": 0.27, + "learning_rate": 1.9882063497112113e-05, + "loss": 2.1205, + "step": 1340 + }, + { + "epoch": 0.27, + "learning_rate": 1.9881798846009096e-05, + "loss": 2.1766, + "step": 1341 + }, + { + "epoch": 0.27, + "learning_rate": 1.9881533900063816e-05, + "loss": 2.1621, + "step": 1342 + }, + { + "epoch": 0.27, + "learning_rate": 1.9881268659284176e-05, + "loss": 2.1677, + "step": 1343 + }, + { + "epoch": 0.27, + "learning_rate": 1.9881003123678086e-05, + "loss": 2.139, + "step": 1344 + }, + { + "epoch": 0.27, + "learning_rate": 1.9880737293253468e-05, + "loss": 2.2429, + "step": 1345 + }, + { + "epoch": 0.27, + "learning_rate": 1.988047116801826e-05, + "loss": 2.2103, + "step": 1346 + }, + { + "epoch": 0.27, + "learning_rate": 1.9880204747980404e-05, + "loss": 2.183, + "step": 1347 + }, + { + "epoch": 0.27, + "learning_rate": 1.9879938033147838e-05, + "loss": 2.1964, + "step": 1348 + }, + { + "epoch": 0.27, + "learning_rate": 1.9879671023528528e-05, + "loss": 2.1625, + "step": 1349 + }, + { + "epoch": 0.27, + "learning_rate": 1.987940371913044e-05, + "loss": 2.1435, + "step": 1350 + }, + { + "epoch": 0.27, + "learning_rate": 1.9879136119961552e-05, + "loss": 2.1962, + "step": 1351 + }, + { + "epoch": 0.27, + "learning_rate": 1.987886822602984e-05, + "loss": 2.1589, + "step": 1352 + }, + { + "epoch": 0.27, + "learning_rate": 1.9878600037343307e-05, + "loss": 2.1121, + "step": 1353 + }, + { + "epoch": 0.27, + "learning_rate": 1.987833155390995e-05, + "loss": 2.1929, + "step": 1354 + }, + { + "epoch": 0.28, + "learning_rate": 1.9878062775737776e-05, + "loss": 2.1475, + "step": 1355 + }, + { + "epoch": 0.28, + "learning_rate": 1.9877793702834813e-05, + "loss": 2.1818, + "step": 1356 + }, + { + "epoch": 0.28, + "learning_rate": 1.987752433520908e-05, + "loss": 2.2374, + "step": 1357 + }, + { + "epoch": 0.28, + "learning_rate": 1.987725467286862e-05, + "loss": 2.2983, + "step": 1358 + }, + { + "epoch": 0.28, + "learning_rate": 1.987698471582148e-05, + "loss": 2.1433, + "step": 1359 + }, + { + "epoch": 0.28, + "learning_rate": 1.9876714464075712e-05, + "loss": 2.1967, + "step": 1360 + }, + { + "epoch": 0.28, + "learning_rate": 1.987644391763938e-05, + "loss": 2.124, + "step": 1361 + }, + { + "epoch": 0.28, + "learning_rate": 1.9876173076520554e-05, + "loss": 2.1592, + "step": 1362 + }, + { + "epoch": 0.28, + "learning_rate": 1.9875901940727317e-05, + "loss": 2.1835, + "step": 1363 + }, + { + "epoch": 0.28, + "learning_rate": 1.987563051026776e-05, + "loss": 2.1701, + "step": 1364 + }, + { + "epoch": 0.28, + "learning_rate": 1.9875358785149982e-05, + "loss": 2.1154, + "step": 1365 + }, + { + "epoch": 0.28, + "learning_rate": 1.987508676538209e-05, + "loss": 2.2182, + "step": 1366 + }, + { + "epoch": 0.28, + "learning_rate": 1.9874814450972196e-05, + "loss": 2.1338, + "step": 1367 + }, + { + "epoch": 0.28, + "learning_rate": 1.9874541841928428e-05, + "loss": 2.1199, + "step": 1368 + }, + { + "epoch": 0.28, + "learning_rate": 1.9874268938258924e-05, + "loss": 2.1623, + "step": 1369 + }, + { + "epoch": 0.28, + "learning_rate": 1.987399573997182e-05, + "loss": 2.2023, + "step": 1370 + }, + { + "epoch": 0.28, + "learning_rate": 1.987372224707527e-05, + "loss": 2.1985, + "step": 1371 + }, + { + "epoch": 0.28, + "learning_rate": 1.9873448459577437e-05, + "loss": 2.113, + "step": 1372 + }, + { + "epoch": 0.28, + "learning_rate": 1.9873174377486488e-05, + "loss": 2.1721, + "step": 1373 + }, + { + "epoch": 0.28, + "learning_rate": 1.9872900000810594e-05, + "loss": 2.22, + "step": 1374 + }, + { + "epoch": 0.28, + "learning_rate": 1.9872625329557957e-05, + "loss": 2.2359, + "step": 1375 + }, + { + "epoch": 0.28, + "learning_rate": 1.9872350363736754e-05, + "loss": 2.2238, + "step": 1376 + }, + { + "epoch": 0.28, + "learning_rate": 1.98720751033552e-05, + "loss": 2.1541, + "step": 1377 + }, + { + "epoch": 0.28, + "learning_rate": 1.987179954842151e-05, + "loss": 2.137, + "step": 1378 + }, + { + "epoch": 0.28, + "learning_rate": 1.98715236989439e-05, + "loss": 2.2024, + "step": 1379 + }, + { + "epoch": 0.28, + "learning_rate": 1.98712475549306e-05, + "loss": 2.1243, + "step": 1380 + }, + { + "epoch": 0.28, + "learning_rate": 1.9870971116389853e-05, + "loss": 2.2227, + "step": 1381 + }, + { + "epoch": 0.28, + "learning_rate": 1.9870694383329906e-05, + "loss": 2.1379, + "step": 1382 + }, + { + "epoch": 0.28, + "learning_rate": 1.9870417355759017e-05, + "loss": 2.0727, + "step": 1383 + }, + { + "epoch": 0.28, + "learning_rate": 1.9870140033685444e-05, + "loss": 2.1062, + "step": 1384 + }, + { + "epoch": 0.28, + "learning_rate": 1.9869862417117475e-05, + "loss": 2.1393, + "step": 1385 + }, + { + "epoch": 0.28, + "learning_rate": 1.986958450606338e-05, + "loss": 2.1735, + "step": 1386 + }, + { + "epoch": 0.28, + "learning_rate": 1.986930630053146e-05, + "loss": 2.2379, + "step": 1387 + }, + { + "epoch": 0.28, + "learning_rate": 1.986902780053001e-05, + "loss": 2.1765, + "step": 1388 + }, + { + "epoch": 0.28, + "learning_rate": 1.9868749006067343e-05, + "loss": 2.1555, + "step": 1389 + }, + { + "epoch": 0.28, + "learning_rate": 1.986846991715178e-05, + "loss": 2.1796, + "step": 1390 + }, + { + "epoch": 0.28, + "learning_rate": 1.9868190533791642e-05, + "loss": 2.3083, + "step": 1391 + }, + { + "epoch": 0.28, + "learning_rate": 1.9867910855995268e-05, + "loss": 2.2135, + "step": 1392 + }, + { + "epoch": 0.28, + "learning_rate": 1.9867630883771004e-05, + "loss": 2.2227, + "step": 1393 + }, + { + "epoch": 0.28, + "learning_rate": 1.9867350617127198e-05, + "loss": 2.222, + "step": 1394 + }, + { + "epoch": 0.28, + "learning_rate": 1.9867070056072215e-05, + "loss": 2.105, + "step": 1395 + }, + { + "epoch": 0.28, + "learning_rate": 1.986678920061443e-05, + "loss": 2.1915, + "step": 1396 + }, + { + "epoch": 0.28, + "learning_rate": 1.9866508050762223e-05, + "loss": 2.2014, + "step": 1397 + }, + { + "epoch": 0.28, + "learning_rate": 1.9866226606523974e-05, + "loss": 2.187, + "step": 1398 + }, + { + "epoch": 0.28, + "learning_rate": 1.986594486790809e-05, + "loss": 2.1796, + "step": 1399 + }, + { + "epoch": 0.28, + "learning_rate": 1.986566283492297e-05, + "loss": 2.1631, + "step": 1400 + }, + { + "epoch": 0.28, + "learning_rate": 1.9865380507577033e-05, + "loss": 2.1762, + "step": 1401 + }, + { + "epoch": 0.28, + "learning_rate": 1.9865097885878704e-05, + "loss": 2.2226, + "step": 1402 + }, + { + "epoch": 0.28, + "learning_rate": 1.9864814969836412e-05, + "loss": 2.2239, + "step": 1403 + }, + { + "epoch": 0.28, + "learning_rate": 1.98645317594586e-05, + "loss": 2.234, + "step": 1404 + }, + { + "epoch": 0.29, + "learning_rate": 1.9864248254753717e-05, + "loss": 2.2032, + "step": 1405 + }, + { + "epoch": 0.29, + "learning_rate": 1.9863964455730227e-05, + "loss": 2.1349, + "step": 1406 + }, + { + "epoch": 0.29, + "learning_rate": 1.986368036239659e-05, + "loss": 2.18, + "step": 1407 + }, + { + "epoch": 0.29, + "learning_rate": 1.9863395974761287e-05, + "loss": 2.1485, + "step": 1408 + }, + { + "epoch": 0.29, + "learning_rate": 1.98631112928328e-05, + "loss": 2.2243, + "step": 1409 + }, + { + "epoch": 0.29, + "learning_rate": 1.986282631661963e-05, + "loss": 2.134, + "step": 1410 + }, + { + "epoch": 0.29, + "learning_rate": 1.9862541046130272e-05, + "loss": 2.192, + "step": 1411 + }, + { + "epoch": 0.29, + "learning_rate": 1.9862255481373244e-05, + "loss": 2.2923, + "step": 1412 + }, + { + "epoch": 0.29, + "learning_rate": 1.986196962235706e-05, + "loss": 2.0886, + "step": 1413 + }, + { + "epoch": 0.29, + "learning_rate": 1.9861683469090255e-05, + "loss": 2.2112, + "step": 1414 + }, + { + "epoch": 0.29, + "learning_rate": 1.986139702158136e-05, + "loss": 2.1671, + "step": 1415 + }, + { + "epoch": 0.29, + "learning_rate": 1.986111027983893e-05, + "loss": 2.211, + "step": 1416 + }, + { + "epoch": 0.29, + "learning_rate": 1.9860823243871513e-05, + "loss": 2.1082, + "step": 1417 + }, + { + "epoch": 0.29, + "learning_rate": 1.9860535913687677e-05, + "loss": 2.1944, + "step": 1418 + }, + { + "epoch": 0.29, + "learning_rate": 1.9860248289295997e-05, + "loss": 2.2208, + "step": 1419 + }, + { + "epoch": 0.29, + "learning_rate": 1.985996037070505e-05, + "loss": 2.1442, + "step": 1420 + }, + { + "epoch": 0.29, + "learning_rate": 1.9859672157923434e-05, + "loss": 2.2169, + "step": 1421 + }, + { + "epoch": 0.29, + "learning_rate": 1.9859383650959736e-05, + "loss": 2.1526, + "step": 1422 + }, + { + "epoch": 0.29, + "learning_rate": 1.9859094849822577e-05, + "loss": 2.1701, + "step": 1423 + }, + { + "epoch": 0.29, + "learning_rate": 1.985880575452057e-05, + "loss": 2.1283, + "step": 1424 + }, + { + "epoch": 0.29, + "learning_rate": 1.9858516365062334e-05, + "loss": 2.0893, + "step": 1425 + }, + { + "epoch": 0.29, + "learning_rate": 1.9858226681456512e-05, + "loss": 2.1516, + "step": 1426 + }, + { + "epoch": 0.29, + "learning_rate": 1.9857936703711744e-05, + "loss": 2.193, + "step": 1427 + }, + { + "epoch": 0.29, + "learning_rate": 1.985764643183668e-05, + "loss": 2.1404, + "step": 1428 + }, + { + "epoch": 0.29, + "learning_rate": 1.9857355865839983e-05, + "loss": 2.2174, + "step": 1429 + }, + { + "epoch": 0.29, + "learning_rate": 1.9857065005730325e-05, + "loss": 2.2085, + "step": 1430 + }, + { + "epoch": 0.29, + "learning_rate": 1.9856773851516383e-05, + "loss": 2.2075, + "step": 1431 + }, + { + "epoch": 0.29, + "learning_rate": 1.985648240320684e-05, + "loss": 2.1909, + "step": 1432 + }, + { + "epoch": 0.29, + "learning_rate": 1.9856190660810393e-05, + "loss": 2.2353, + "step": 1433 + }, + { + "epoch": 0.29, + "learning_rate": 1.9855898624335753e-05, + "loss": 2.1977, + "step": 1434 + }, + { + "epoch": 0.29, + "learning_rate": 1.9855606293791626e-05, + "loss": 2.1041, + "step": 1435 + }, + { + "epoch": 0.29, + "learning_rate": 1.9855313669186737e-05, + "loss": 2.2555, + "step": 1436 + }, + { + "epoch": 0.29, + "learning_rate": 1.985502075052982e-05, + "loss": 2.1266, + "step": 1437 + }, + { + "epoch": 0.29, + "learning_rate": 1.9854727537829613e-05, + "loss": 2.2276, + "step": 1438 + }, + { + "epoch": 0.29, + "learning_rate": 1.985443403109486e-05, + "loss": 2.1241, + "step": 1439 + }, + { + "epoch": 0.29, + "learning_rate": 1.9854140230334323e-05, + "loss": 2.1961, + "step": 1440 + }, + { + "epoch": 0.29, + "learning_rate": 1.985384613555677e-05, + "loss": 2.0848, + "step": 1441 + }, + { + "epoch": 0.29, + "learning_rate": 1.9853551746770967e-05, + "loss": 2.0853, + "step": 1442 + }, + { + "epoch": 0.29, + "learning_rate": 1.985325706398571e-05, + "loss": 2.0933, + "step": 1443 + }, + { + "epoch": 0.29, + "learning_rate": 1.985296208720978e-05, + "loss": 2.1802, + "step": 1444 + }, + { + "epoch": 0.29, + "learning_rate": 1.9852666816451986e-05, + "loss": 2.2131, + "step": 1445 + }, + { + "epoch": 0.29, + "learning_rate": 1.9852371251721133e-05, + "loss": 2.1947, + "step": 1446 + }, + { + "epoch": 0.29, + "learning_rate": 1.9852075393026043e-05, + "loss": 2.2247, + "step": 1447 + }, + { + "epoch": 0.29, + "learning_rate": 1.9851779240375544e-05, + "loss": 2.1727, + "step": 1448 + }, + { + "epoch": 0.29, + "learning_rate": 1.985148279377847e-05, + "loss": 2.1967, + "step": 1449 + }, + { + "epoch": 0.29, + "learning_rate": 1.9851186053243667e-05, + "loss": 2.1573, + "step": 1450 + }, + { + "epoch": 0.29, + "learning_rate": 1.9850889018779987e-05, + "loss": 2.2058, + "step": 1451 + }, + { + "epoch": 0.29, + "learning_rate": 1.9850591690396296e-05, + "loss": 2.1021, + "step": 1452 + }, + { + "epoch": 0.29, + "learning_rate": 1.985029406810146e-05, + "loss": 2.171, + "step": 1453 + }, + { + "epoch": 0.3, + "learning_rate": 1.984999615190437e-05, + "loss": 2.2791, + "step": 1454 + }, + { + "epoch": 0.3, + "learning_rate": 1.98496979418139e-05, + "loss": 2.2247, + "step": 1455 + }, + { + "epoch": 0.3, + "learning_rate": 1.9849399437838958e-05, + "loss": 2.2118, + "step": 1456 + }, + { + "epoch": 0.3, + "learning_rate": 1.9849100639988448e-05, + "loss": 2.1081, + "step": 1457 + }, + { + "epoch": 0.3, + "learning_rate": 1.9848801548271282e-05, + "loss": 2.1582, + "step": 1458 + }, + { + "epoch": 0.3, + "learning_rate": 1.984850216269639e-05, + "loss": 2.1092, + "step": 1459 + }, + { + "epoch": 0.3, + "learning_rate": 1.98482024832727e-05, + "loss": 2.1853, + "step": 1460 + }, + { + "epoch": 0.3, + "learning_rate": 1.9847902510009155e-05, + "loss": 2.1993, + "step": 1461 + }, + { + "epoch": 0.3, + "learning_rate": 1.984760224291471e-05, + "loss": 2.1863, + "step": 1462 + }, + { + "epoch": 0.3, + "learning_rate": 1.9847301681998313e-05, + "loss": 2.2079, + "step": 1463 + }, + { + "epoch": 0.3, + "learning_rate": 1.984700082726894e-05, + "loss": 2.169, + "step": 1464 + }, + { + "epoch": 0.3, + "learning_rate": 1.984669967873557e-05, + "loss": 2.2086, + "step": 1465 + }, + { + "epoch": 0.3, + "learning_rate": 1.9846398236407178e-05, + "loss": 2.1661, + "step": 1466 + }, + { + "epoch": 0.3, + "learning_rate": 1.9846096500292765e-05, + "loss": 2.2603, + "step": 1467 + }, + { + "epoch": 0.3, + "learning_rate": 1.9845794470401335e-05, + "loss": 2.2417, + "step": 1468 + }, + { + "epoch": 0.3, + "learning_rate": 1.9845492146741902e-05, + "loss": 2.1485, + "step": 1469 + }, + { + "epoch": 0.3, + "learning_rate": 1.9845189529323473e-05, + "loss": 2.2261, + "step": 1470 + }, + { + "epoch": 0.3, + "learning_rate": 1.9844886618155092e-05, + "loss": 2.1731, + "step": 1471 + }, + { + "epoch": 0.3, + "learning_rate": 1.984458341324579e-05, + "loss": 2.1617, + "step": 1472 + }, + { + "epoch": 0.3, + "learning_rate": 1.9844279914604616e-05, + "loss": 2.2008, + "step": 1473 + }, + { + "epoch": 0.3, + "learning_rate": 1.9843976122240623e-05, + "loss": 2.1749, + "step": 1474 + }, + { + "epoch": 0.3, + "learning_rate": 1.9843672036162877e-05, + "loss": 2.1599, + "step": 1475 + }, + { + "epoch": 0.3, + "learning_rate": 1.984336765638045e-05, + "loss": 2.1434, + "step": 1476 + }, + { + "epoch": 0.3, + "learning_rate": 1.9843062982902425e-05, + "loss": 2.1506, + "step": 1477 + }, + { + "epoch": 0.3, + "learning_rate": 1.984275801573789e-05, + "loss": 2.1926, + "step": 1478 + }, + { + "epoch": 0.3, + "learning_rate": 1.984245275489595e-05, + "loss": 2.2516, + "step": 1479 + }, + { + "epoch": 0.3, + "learning_rate": 1.9842147200385708e-05, + "loss": 2.1813, + "step": 1480 + }, + { + "epoch": 0.3, + "learning_rate": 1.984184135221628e-05, + "loss": 2.151, + "step": 1481 + }, + { + "epoch": 0.3, + "learning_rate": 1.9841535210396793e-05, + "loss": 2.1467, + "step": 1482 + }, + { + "epoch": 0.3, + "learning_rate": 1.984122877493638e-05, + "loss": 2.1551, + "step": 1483 + }, + { + "epoch": 0.3, + "learning_rate": 1.9840922045844193e-05, + "loss": 2.2486, + "step": 1484 + }, + { + "epoch": 0.3, + "learning_rate": 1.9840615023129372e-05, + "loss": 2.1718, + "step": 1485 + }, + { + "epoch": 0.3, + "learning_rate": 1.9840307706801082e-05, + "loss": 2.1714, + "step": 1486 + }, + { + "epoch": 0.3, + "learning_rate": 1.9840000096868497e-05, + "loss": 2.28, + "step": 1487 + }, + { + "epoch": 0.3, + "learning_rate": 1.9839692193340787e-05, + "loss": 2.1922, + "step": 1488 + }, + { + "epoch": 0.3, + "learning_rate": 1.9839383996227144e-05, + "loss": 2.1842, + "step": 1489 + }, + { + "epoch": 0.3, + "learning_rate": 1.983907550553676e-05, + "loss": 2.1955, + "step": 1490 + }, + { + "epoch": 0.3, + "learning_rate": 1.9838766721278845e-05, + "loss": 2.1581, + "step": 1491 + }, + { + "epoch": 0.3, + "learning_rate": 1.9838457643462608e-05, + "loss": 2.1323, + "step": 1492 + }, + { + "epoch": 0.3, + "learning_rate": 1.983814827209727e-05, + "loss": 2.2256, + "step": 1493 + }, + { + "epoch": 0.3, + "learning_rate": 1.9837838607192067e-05, + "loss": 2.1865, + "step": 1494 + }, + { + "epoch": 0.3, + "learning_rate": 1.9837528648756234e-05, + "loss": 2.1123, + "step": 1495 + }, + { + "epoch": 0.3, + "learning_rate": 1.9837218396799018e-05, + "loss": 2.2019, + "step": 1496 + }, + { + "epoch": 0.3, + "learning_rate": 1.983690785132968e-05, + "loss": 2.1456, + "step": 1497 + }, + { + "epoch": 0.3, + "learning_rate": 1.983659701235748e-05, + "loss": 2.1006, + "step": 1498 + }, + { + "epoch": 0.3, + "learning_rate": 1.98362858798917e-05, + "loss": 2.1593, + "step": 1499 + }, + { + "epoch": 0.3, + "learning_rate": 1.9835974453941623e-05, + "loss": 2.2389, + "step": 1500 + }, + { + "epoch": 0.3, + "learning_rate": 1.9835662734516533e-05, + "loss": 2.2111, + "step": 1501 + }, + { + "epoch": 0.3, + "learning_rate": 1.9835350721625735e-05, + "loss": 2.2187, + "step": 1502 + }, + { + "epoch": 0.31, + "learning_rate": 1.983503841527854e-05, + "loss": 2.1756, + "step": 1503 + }, + { + "epoch": 0.31, + "learning_rate": 1.9834725815484263e-05, + "loss": 2.1833, + "step": 1504 + }, + { + "epoch": 0.31, + "learning_rate": 1.9834412922252238e-05, + "loss": 2.2538, + "step": 1505 + }, + { + "epoch": 0.31, + "learning_rate": 1.983409973559179e-05, + "loss": 2.1739, + "step": 1506 + }, + { + "epoch": 0.31, + "learning_rate": 1.9833786255512274e-05, + "loss": 2.1282, + "step": 1507 + }, + { + "epoch": 0.31, + "learning_rate": 1.9833472482023037e-05, + "loss": 2.1276, + "step": 1508 + }, + { + "epoch": 0.31, + "learning_rate": 1.9833158415133443e-05, + "loss": 2.247, + "step": 1509 + }, + { + "epoch": 0.31, + "learning_rate": 1.9832844054852862e-05, + "loss": 2.2194, + "step": 1510 + }, + { + "epoch": 0.31, + "learning_rate": 1.9832529401190674e-05, + "loss": 2.2285, + "step": 1511 + }, + { + "epoch": 0.31, + "learning_rate": 1.9832214454156265e-05, + "loss": 2.1297, + "step": 1512 + }, + { + "epoch": 0.31, + "learning_rate": 1.983189921375904e-05, + "loss": 2.1766, + "step": 1513 + }, + { + "epoch": 0.31, + "learning_rate": 1.9831583680008396e-05, + "loss": 2.2216, + "step": 1514 + }, + { + "epoch": 0.31, + "learning_rate": 1.983126785291375e-05, + "loss": 2.0989, + "step": 1515 + }, + { + "epoch": 0.31, + "learning_rate": 1.9830951732484528e-05, + "loss": 2.1324, + "step": 1516 + }, + { + "epoch": 0.31, + "learning_rate": 1.9830635318730155e-05, + "loss": 2.1862, + "step": 1517 + }, + { + "epoch": 0.31, + "learning_rate": 1.983031861166008e-05, + "loss": 2.2332, + "step": 1518 + }, + { + "epoch": 0.31, + "learning_rate": 1.983000161128375e-05, + "loss": 2.1823, + "step": 1519 + }, + { + "epoch": 0.31, + "learning_rate": 1.9829684317610623e-05, + "loss": 2.1957, + "step": 1520 + }, + { + "epoch": 0.31, + "learning_rate": 1.9829366730650167e-05, + "loss": 2.1144, + "step": 1521 + }, + { + "epoch": 0.31, + "learning_rate": 1.9829048850411854e-05, + "loss": 2.2001, + "step": 1522 + }, + { + "epoch": 0.31, + "learning_rate": 1.982873067690517e-05, + "loss": 2.1632, + "step": 1523 + }, + { + "epoch": 0.31, + "learning_rate": 1.9828412210139612e-05, + "loss": 2.2309, + "step": 1524 + }, + { + "epoch": 0.31, + "learning_rate": 1.982809345012468e-05, + "loss": 2.2267, + "step": 1525 + }, + { + "epoch": 0.31, + "learning_rate": 1.9827774396869883e-05, + "loss": 2.2154, + "step": 1526 + }, + { + "epoch": 0.31, + "learning_rate": 1.982745505038474e-05, + "loss": 2.1852, + "step": 1527 + }, + { + "epoch": 0.31, + "learning_rate": 1.9827135410678782e-05, + "loss": 2.157, + "step": 1528 + }, + { + "epoch": 0.31, + "learning_rate": 1.9826815477761547e-05, + "loss": 2.1343, + "step": 1529 + }, + { + "epoch": 0.31, + "learning_rate": 1.982649525164258e-05, + "loss": 2.2221, + "step": 1530 + }, + { + "epoch": 0.31, + "learning_rate": 1.9826174732331434e-05, + "loss": 2.1161, + "step": 1531 + }, + { + "epoch": 0.31, + "learning_rate": 1.9825853919837673e-05, + "loss": 2.163, + "step": 1532 + }, + { + "epoch": 0.31, + "learning_rate": 1.9825532814170863e-05, + "loss": 2.1858, + "step": 1533 + }, + { + "epoch": 0.31, + "learning_rate": 1.9825211415340598e-05, + "loss": 2.2185, + "step": 1534 + }, + { + "epoch": 0.31, + "learning_rate": 1.982488972335646e-05, + "loss": 2.2025, + "step": 1535 + }, + { + "epoch": 0.31, + "learning_rate": 1.982456773822804e-05, + "loss": 2.1256, + "step": 1536 + }, + { + "epoch": 0.31, + "learning_rate": 1.982424545996496e-05, + "loss": 2.0736, + "step": 1537 + }, + { + "epoch": 0.31, + "learning_rate": 1.982392288857683e-05, + "loss": 2.1604, + "step": 1538 + }, + { + "epoch": 0.31, + "learning_rate": 1.9823600024073267e-05, + "loss": 2.2189, + "step": 1539 + }, + { + "epoch": 0.31, + "learning_rate": 1.9823276866463915e-05, + "loss": 2.2153, + "step": 1540 + }, + { + "epoch": 0.31, + "learning_rate": 1.982295341575841e-05, + "loss": 2.1503, + "step": 1541 + }, + { + "epoch": 0.31, + "learning_rate": 1.9822629671966404e-05, + "loss": 2.2186, + "step": 1542 + }, + { + "epoch": 0.31, + "learning_rate": 1.9822305635097554e-05, + "loss": 2.1456, + "step": 1543 + }, + { + "epoch": 0.31, + "learning_rate": 1.982198130516153e-05, + "loss": 2.1472, + "step": 1544 + }, + { + "epoch": 0.31, + "learning_rate": 1.9821656682168013e-05, + "loss": 2.0812, + "step": 1545 + }, + { + "epoch": 0.31, + "learning_rate": 1.9821331766126682e-05, + "loss": 2.198, + "step": 1546 + }, + { + "epoch": 0.31, + "learning_rate": 1.982100655704724e-05, + "loss": 2.2081, + "step": 1547 + }, + { + "epoch": 0.31, + "learning_rate": 1.9820681054939383e-05, + "loss": 2.1333, + "step": 1548 + }, + { + "epoch": 0.31, + "learning_rate": 1.9820355259812826e-05, + "loss": 2.1857, + "step": 1549 + }, + { + "epoch": 0.31, + "learning_rate": 1.9820029171677288e-05, + "loss": 2.1565, + "step": 1550 + }, + { + "epoch": 0.31, + "learning_rate": 1.98197027905425e-05, + "loss": 2.2376, + "step": 1551 + }, + { + "epoch": 0.32, + "learning_rate": 1.98193761164182e-05, + "loss": 2.1343, + "step": 1552 + }, + { + "epoch": 0.32, + "learning_rate": 1.9819049149314133e-05, + "loss": 2.1422, + "step": 1553 + }, + { + "epoch": 0.32, + "learning_rate": 1.9818721889240055e-05, + "loss": 2.206, + "step": 1554 + }, + { + "epoch": 0.32, + "learning_rate": 1.9818394336205734e-05, + "loss": 2.1163, + "step": 1555 + }, + { + "epoch": 0.32, + "learning_rate": 1.981806649022094e-05, + "loss": 2.1817, + "step": 1556 + }, + { + "epoch": 0.32, + "learning_rate": 1.981773835129546e-05, + "loss": 2.2555, + "step": 1557 + }, + { + "epoch": 0.32, + "learning_rate": 1.9817409919439074e-05, + "loss": 2.1488, + "step": 1558 + }, + { + "epoch": 0.32, + "learning_rate": 1.981708119466159e-05, + "loss": 2.2611, + "step": 1559 + }, + { + "epoch": 0.32, + "learning_rate": 1.9816752176972815e-05, + "loss": 2.149, + "step": 1560 + }, + { + "epoch": 0.32, + "learning_rate": 1.9816422866382564e-05, + "loss": 2.1887, + "step": 1561 + }, + { + "epoch": 0.32, + "learning_rate": 1.9816093262900662e-05, + "loss": 2.1262, + "step": 1562 + }, + { + "epoch": 0.32, + "learning_rate": 1.9815763366536943e-05, + "loss": 2.1673, + "step": 1563 + }, + { + "epoch": 0.32, + "learning_rate": 1.981543317730126e-05, + "loss": 2.1452, + "step": 1564 + }, + { + "epoch": 0.32, + "learning_rate": 1.981510269520345e-05, + "loss": 2.1867, + "step": 1565 + }, + { + "epoch": 0.32, + "learning_rate": 1.9814771920253378e-05, + "loss": 2.1842, + "step": 1566 + }, + { + "epoch": 0.32, + "learning_rate": 1.9814440852460925e-05, + "loss": 2.2195, + "step": 1567 + }, + { + "epoch": 0.32, + "learning_rate": 1.9814109491835947e-05, + "loss": 2.1792, + "step": 1568 + }, + { + "epoch": 0.32, + "learning_rate": 1.9813777838388356e-05, + "loss": 2.0807, + "step": 1569 + }, + { + "epoch": 0.32, + "learning_rate": 1.9813445892128028e-05, + "loss": 2.1938, + "step": 1570 + }, + { + "epoch": 0.32, + "learning_rate": 1.9813113653064873e-05, + "loss": 2.2191, + "step": 1571 + }, + { + "epoch": 0.32, + "learning_rate": 1.9812781121208808e-05, + "loss": 2.1449, + "step": 1572 + }, + { + "epoch": 0.32, + "learning_rate": 1.981244829656975e-05, + "loss": 2.2138, + "step": 1573 + }, + { + "epoch": 0.32, + "learning_rate": 1.981211517915763e-05, + "loss": 2.2183, + "step": 1574 + }, + { + "epoch": 0.32, + "learning_rate": 1.9811781768982392e-05, + "loss": 2.148, + "step": 1575 + }, + { + "epoch": 0.32, + "learning_rate": 1.9811448066053977e-05, + "loss": 2.1661, + "step": 1576 + }, + { + "epoch": 0.32, + "learning_rate": 1.981111407038235e-05, + "loss": 2.1331, + "step": 1577 + }, + { + "epoch": 0.32, + "learning_rate": 1.981077978197747e-05, + "loss": 2.1217, + "step": 1578 + }, + { + "epoch": 0.32, + "learning_rate": 1.9810445200849305e-05, + "loss": 2.1432, + "step": 1579 + }, + { + "epoch": 0.32, + "learning_rate": 1.9810110327007853e-05, + "loss": 2.1822, + "step": 1580 + }, + { + "epoch": 0.32, + "learning_rate": 1.9809775160463097e-05, + "loss": 2.1853, + "step": 1581 + }, + { + "epoch": 0.32, + "learning_rate": 1.980943970122504e-05, + "loss": 2.2269, + "step": 1582 + }, + { + "epoch": 0.32, + "learning_rate": 1.9809103949303683e-05, + "loss": 2.1433, + "step": 1583 + }, + { + "epoch": 0.32, + "learning_rate": 1.9808767904709056e-05, + "loss": 2.2739, + "step": 1584 + }, + { + "epoch": 0.32, + "learning_rate": 1.9808431567451176e-05, + "loss": 2.189, + "step": 1585 + }, + { + "epoch": 0.32, + "learning_rate": 1.9808094937540082e-05, + "loss": 2.1995, + "step": 1586 + }, + { + "epoch": 0.32, + "learning_rate": 1.9807758014985824e-05, + "loss": 2.1665, + "step": 1587 + }, + { + "epoch": 0.32, + "learning_rate": 1.9807420799798444e-05, + "loss": 2.2024, + "step": 1588 + }, + { + "epoch": 0.32, + "learning_rate": 1.9807083291988007e-05, + "loss": 2.2086, + "step": 1589 + }, + { + "epoch": 0.32, + "learning_rate": 1.9806745491564588e-05, + "loss": 2.0727, + "step": 1590 + }, + { + "epoch": 0.32, + "learning_rate": 1.9806407398538258e-05, + "loss": 2.2293, + "step": 1591 + }, + { + "epoch": 0.32, + "learning_rate": 1.980606901291911e-05, + "loss": 2.1307, + "step": 1592 + }, + { + "epoch": 0.32, + "learning_rate": 1.980573033471724e-05, + "loss": 2.1862, + "step": 1593 + }, + { + "epoch": 0.32, + "learning_rate": 1.9805391363942753e-05, + "loss": 2.2114, + "step": 1594 + }, + { + "epoch": 0.32, + "learning_rate": 1.9805052100605762e-05, + "loss": 2.0798, + "step": 1595 + }, + { + "epoch": 0.32, + "learning_rate": 1.9804712544716392e-05, + "loss": 2.117, + "step": 1596 + }, + { + "epoch": 0.32, + "learning_rate": 1.980437269628477e-05, + "loss": 2.1973, + "step": 1597 + }, + { + "epoch": 0.32, + "learning_rate": 1.9804032555321034e-05, + "loss": 2.1444, + "step": 1598 + }, + { + "epoch": 0.32, + "learning_rate": 1.9803692121835345e-05, + "loss": 2.2829, + "step": 1599 + }, + { + "epoch": 0.32, + "learning_rate": 1.980335139583785e-05, + "loss": 2.2086, + "step": 1600 + }, + { + "epoch": 0.32, + "learning_rate": 1.980301037733871e-05, + "loss": 2.1211, + "step": 1601 + }, + { + "epoch": 0.33, + "learning_rate": 1.9802669066348116e-05, + "loss": 2.1448, + "step": 1602 + }, + { + "epoch": 0.33, + "learning_rate": 1.980232746287624e-05, + "loss": 2.1734, + "step": 1603 + }, + { + "epoch": 0.33, + "learning_rate": 1.980198556693328e-05, + "loss": 2.0773, + "step": 1604 + }, + { + "epoch": 0.33, + "learning_rate": 1.980164337852943e-05, + "loss": 2.1405, + "step": 1605 + }, + { + "epoch": 0.33, + "learning_rate": 1.980130089767491e-05, + "loss": 2.2235, + "step": 1606 + }, + { + "epoch": 0.33, + "learning_rate": 1.980095812437993e-05, + "loss": 2.1803, + "step": 1607 + }, + { + "epoch": 0.33, + "learning_rate": 1.980061505865472e-05, + "loss": 2.1477, + "step": 1608 + }, + { + "epoch": 0.33, + "learning_rate": 1.9800271700509517e-05, + "loss": 2.2567, + "step": 1609 + }, + { + "epoch": 0.33, + "learning_rate": 1.9799928049954564e-05, + "loss": 2.1803, + "step": 1610 + }, + { + "epoch": 0.33, + "learning_rate": 1.9799584107000118e-05, + "loss": 2.1864, + "step": 1611 + }, + { + "epoch": 0.33, + "learning_rate": 1.979923987165644e-05, + "loss": 2.1405, + "step": 1612 + }, + { + "epoch": 0.33, + "learning_rate": 1.9798895343933795e-05, + "loss": 2.1312, + "step": 1613 + }, + { + "epoch": 0.33, + "learning_rate": 1.979855052384247e-05, + "loss": 2.1586, + "step": 1614 + }, + { + "epoch": 0.33, + "learning_rate": 1.979820541139275e-05, + "loss": 2.2045, + "step": 1615 + }, + { + "epoch": 0.33, + "learning_rate": 1.9797860006594934e-05, + "loss": 2.1223, + "step": 1616 + }, + { + "epoch": 0.33, + "learning_rate": 1.9797514309459326e-05, + "loss": 2.1823, + "step": 1617 + }, + { + "epoch": 0.33, + "learning_rate": 1.979716831999624e-05, + "loss": 2.1769, + "step": 1618 + }, + { + "epoch": 0.33, + "learning_rate": 1.9796822038216002e-05, + "loss": 2.158, + "step": 1619 + }, + { + "epoch": 0.33, + "learning_rate": 1.9796475464128943e-05, + "loss": 2.2117, + "step": 1620 + }, + { + "epoch": 0.33, + "learning_rate": 1.97961285977454e-05, + "loss": 2.1462, + "step": 1621 + }, + { + "epoch": 0.33, + "learning_rate": 1.9795781439075726e-05, + "loss": 2.1238, + "step": 1622 + }, + { + "epoch": 0.33, + "learning_rate": 1.979543398813028e-05, + "loss": 2.0526, + "step": 1623 + }, + { + "epoch": 0.33, + "learning_rate": 1.979508624491943e-05, + "loss": 2.1813, + "step": 1624 + }, + { + "epoch": 0.33, + "learning_rate": 1.9794738209453544e-05, + "loss": 2.1698, + "step": 1625 + }, + { + "epoch": 0.33, + "learning_rate": 1.9794389881743017e-05, + "loss": 2.1806, + "step": 1626 + }, + { + "epoch": 0.33, + "learning_rate": 1.9794041261798232e-05, + "loss": 2.1451, + "step": 1627 + }, + { + "epoch": 0.33, + "learning_rate": 1.9793692349629598e-05, + "loss": 2.2378, + "step": 1628 + }, + { + "epoch": 0.33, + "learning_rate": 1.979334314524752e-05, + "loss": 2.16, + "step": 1629 + }, + { + "epoch": 0.33, + "learning_rate": 1.979299364866242e-05, + "loss": 2.2056, + "step": 1630 + }, + { + "epoch": 0.33, + "learning_rate": 1.9792643859884726e-05, + "loss": 2.2018, + "step": 1631 + }, + { + "epoch": 0.33, + "learning_rate": 1.9792293778924876e-05, + "loss": 2.1806, + "step": 1632 + }, + { + "epoch": 0.33, + "learning_rate": 1.9791943405793312e-05, + "loss": 2.1283, + "step": 1633 + }, + { + "epoch": 0.33, + "learning_rate": 1.979159274050049e-05, + "loss": 2.2332, + "step": 1634 + }, + { + "epoch": 0.33, + "learning_rate": 1.9791241783056874e-05, + "loss": 2.2421, + "step": 1635 + }, + { + "epoch": 0.33, + "learning_rate": 1.979089053347293e-05, + "loss": 2.1691, + "step": 1636 + }, + { + "epoch": 0.33, + "learning_rate": 1.9790538991759145e-05, + "loss": 2.1407, + "step": 1637 + }, + { + "epoch": 0.33, + "learning_rate": 1.9790187157926003e-05, + "loss": 2.1316, + "step": 1638 + }, + { + "epoch": 0.33, + "learning_rate": 1.9789835031984005e-05, + "loss": 2.1976, + "step": 1639 + }, + { + "epoch": 0.33, + "learning_rate": 1.9789482613943658e-05, + "loss": 2.1807, + "step": 1640 + }, + { + "epoch": 0.33, + "learning_rate": 1.9789129903815474e-05, + "loss": 2.1717, + "step": 1641 + }, + { + "epoch": 0.33, + "learning_rate": 1.9788776901609977e-05, + "loss": 2.2687, + "step": 1642 + }, + { + "epoch": 0.33, + "learning_rate": 1.97884236073377e-05, + "loss": 2.1098, + "step": 1643 + }, + { + "epoch": 0.33, + "learning_rate": 1.9788070021009184e-05, + "loss": 2.161, + "step": 1644 + }, + { + "epoch": 0.33, + "learning_rate": 1.9787716142634982e-05, + "loss": 2.2369, + "step": 1645 + }, + { + "epoch": 0.33, + "learning_rate": 1.9787361972225646e-05, + "loss": 2.2275, + "step": 1646 + }, + { + "epoch": 0.33, + "learning_rate": 1.9787007509791748e-05, + "loss": 2.1404, + "step": 1647 + }, + { + "epoch": 0.33, + "learning_rate": 1.9786652755343867e-05, + "loss": 2.1301, + "step": 1648 + }, + { + "epoch": 0.33, + "learning_rate": 1.9786297708892582e-05, + "loss": 2.1561, + "step": 1649 + }, + { + "epoch": 0.33, + "learning_rate": 1.978594237044849e-05, + "loss": 2.095, + "step": 1650 + }, + { + "epoch": 0.34, + "learning_rate": 1.978558674002219e-05, + "loss": 2.2086, + "step": 1651 + }, + { + "epoch": 0.34, + "learning_rate": 1.9785230817624296e-05, + "loss": 2.1956, + "step": 1652 + }, + { + "epoch": 0.34, + "learning_rate": 1.9784874603265427e-05, + "loss": 2.1256, + "step": 1653 + }, + { + "epoch": 0.34, + "learning_rate": 1.9784518096956212e-05, + "loss": 2.156, + "step": 1654 + }, + { + "epoch": 0.34, + "learning_rate": 1.9784161298707283e-05, + "loss": 2.1735, + "step": 1655 + }, + { + "epoch": 0.34, + "learning_rate": 1.9783804208529292e-05, + "loss": 2.187, + "step": 1656 + }, + { + "epoch": 0.34, + "learning_rate": 1.9783446826432893e-05, + "loss": 2.1702, + "step": 1657 + }, + { + "epoch": 0.34, + "learning_rate": 1.9783089152428746e-05, + "loss": 2.144, + "step": 1658 + }, + { + "epoch": 0.34, + "learning_rate": 1.9782731186527522e-05, + "loss": 2.1061, + "step": 1659 + }, + { + "epoch": 0.34, + "learning_rate": 1.9782372928739906e-05, + "loss": 2.189, + "step": 1660 + }, + { + "epoch": 0.34, + "learning_rate": 1.978201437907658e-05, + "loss": 2.1667, + "step": 1661 + }, + { + "epoch": 0.34, + "learning_rate": 1.9781655537548255e-05, + "loss": 2.165, + "step": 1662 + }, + { + "epoch": 0.34, + "learning_rate": 1.9781296404165625e-05, + "loss": 2.1079, + "step": 1663 + }, + { + "epoch": 0.34, + "learning_rate": 1.978093697893941e-05, + "loss": 2.1261, + "step": 1664 + }, + { + "epoch": 0.34, + "learning_rate": 1.9780577261880336e-05, + "loss": 2.1477, + "step": 1665 + }, + { + "epoch": 0.34, + "learning_rate": 1.9780217252999137e-05, + "loss": 2.1885, + "step": 1666 + }, + { + "epoch": 0.34, + "learning_rate": 1.9779856952306545e-05, + "loss": 2.1875, + "step": 1667 + }, + { + "epoch": 0.34, + "learning_rate": 1.9779496359813323e-05, + "loss": 2.0775, + "step": 1668 + }, + { + "epoch": 0.34, + "learning_rate": 1.977913547553022e-05, + "loss": 2.1564, + "step": 1669 + }, + { + "epoch": 0.34, + "learning_rate": 1.9778774299468007e-05, + "loss": 2.0911, + "step": 1670 + }, + { + "epoch": 0.34, + "learning_rate": 1.9778412831637466e-05, + "loss": 2.1923, + "step": 1671 + }, + { + "epoch": 0.34, + "learning_rate": 1.9778051072049374e-05, + "loss": 2.1623, + "step": 1672 + }, + { + "epoch": 0.34, + "learning_rate": 1.9777689020714525e-05, + "loss": 2.1342, + "step": 1673 + }, + { + "epoch": 0.34, + "learning_rate": 1.9777326677643728e-05, + "loss": 2.2124, + "step": 1674 + }, + { + "epoch": 0.34, + "learning_rate": 1.977696404284779e-05, + "loss": 2.1126, + "step": 1675 + }, + { + "epoch": 0.34, + "learning_rate": 1.977660111633753e-05, + "loss": 2.2341, + "step": 1676 + }, + { + "epoch": 0.34, + "learning_rate": 1.9776237898123783e-05, + "loss": 2.131, + "step": 1677 + }, + { + "epoch": 0.34, + "learning_rate": 1.9775874388217375e-05, + "loss": 2.2056, + "step": 1678 + }, + { + "epoch": 0.34, + "learning_rate": 1.9775510586629162e-05, + "loss": 2.1945, + "step": 1679 + }, + { + "epoch": 0.34, + "learning_rate": 1.9775146493369996e-05, + "loss": 2.269, + "step": 1680 + }, + { + "epoch": 0.34, + "learning_rate": 1.9774782108450737e-05, + "loss": 2.1324, + "step": 1681 + }, + { + "epoch": 0.34, + "learning_rate": 1.977441743188226e-05, + "loss": 2.1803, + "step": 1682 + }, + { + "epoch": 0.34, + "learning_rate": 1.9774052463675444e-05, + "loss": 2.1529, + "step": 1683 + }, + { + "epoch": 0.34, + "learning_rate": 1.977368720384118e-05, + "loss": 2.257, + "step": 1684 + }, + { + "epoch": 0.34, + "learning_rate": 1.977332165239037e-05, + "loss": 2.1485, + "step": 1685 + }, + { + "epoch": 0.34, + "learning_rate": 1.9772955809333915e-05, + "loss": 2.2201, + "step": 1686 + }, + { + "epoch": 0.34, + "learning_rate": 1.9772589674682733e-05, + "loss": 2.1058, + "step": 1687 + }, + { + "epoch": 0.34, + "learning_rate": 1.9772223248447747e-05, + "loss": 2.2045, + "step": 1688 + }, + { + "epoch": 0.34, + "learning_rate": 1.9771856530639893e-05, + "loss": 2.147, + "step": 1689 + }, + { + "epoch": 0.34, + "learning_rate": 1.977148952127011e-05, + "loss": 2.3301, + "step": 1690 + }, + { + "epoch": 0.34, + "learning_rate": 1.9771122220349344e-05, + "loss": 2.2123, + "step": 1691 + }, + { + "epoch": 0.34, + "learning_rate": 1.9770754627888568e-05, + "loss": 2.0665, + "step": 1692 + }, + { + "epoch": 0.34, + "learning_rate": 1.9770386743898735e-05, + "loss": 2.2022, + "step": 1693 + }, + { + "epoch": 0.34, + "learning_rate": 1.977001856839083e-05, + "loss": 2.254, + "step": 1694 + }, + { + "epoch": 0.34, + "learning_rate": 1.9769650101375835e-05, + "loss": 2.1458, + "step": 1695 + }, + { + "epoch": 0.34, + "learning_rate": 1.9769281342864746e-05, + "loss": 2.1568, + "step": 1696 + }, + { + "epoch": 0.34, + "learning_rate": 1.9768912292868566e-05, + "loss": 2.2345, + "step": 1697 + }, + { + "epoch": 0.34, + "learning_rate": 1.97685429513983e-05, + "loss": 2.1632, + "step": 1698 + }, + { + "epoch": 0.34, + "learning_rate": 1.9768173318464975e-05, + "loss": 2.1959, + "step": 1699 + }, + { + "epoch": 0.35, + "learning_rate": 1.9767803394079618e-05, + "loss": 2.1862, + "step": 1700 + }, + { + "epoch": 0.35, + "learning_rate": 1.9767433178253266e-05, + "loss": 2.1556, + "step": 1701 + }, + { + "epoch": 0.35, + "learning_rate": 1.9767062670996965e-05, + "loss": 2.1464, + "step": 1702 + }, + { + "epoch": 0.35, + "learning_rate": 1.9766691872321767e-05, + "loss": 2.1221, + "step": 1703 + }, + { + "epoch": 0.35, + "learning_rate": 1.976632078223874e-05, + "loss": 2.1595, + "step": 1704 + }, + { + "epoch": 0.35, + "learning_rate": 1.9765949400758955e-05, + "loss": 2.1504, + "step": 1705 + }, + { + "epoch": 0.35, + "learning_rate": 1.9765577727893494e-05, + "loss": 2.1663, + "step": 1706 + }, + { + "epoch": 0.35, + "learning_rate": 1.9765205763653442e-05, + "loss": 2.203, + "step": 1707 + }, + { + "epoch": 0.35, + "learning_rate": 1.9764833508049902e-05, + "loss": 2.2106, + "step": 1708 + }, + { + "epoch": 0.35, + "learning_rate": 1.9764460961093978e-05, + "loss": 2.1455, + "step": 1709 + }, + { + "epoch": 0.35, + "learning_rate": 1.9764088122796785e-05, + "loss": 2.275, + "step": 1710 + }, + { + "epoch": 0.35, + "learning_rate": 1.976371499316945e-05, + "loss": 2.2539, + "step": 1711 + }, + { + "epoch": 0.35, + "learning_rate": 1.9763341572223106e-05, + "loss": 2.2125, + "step": 1712 + }, + { + "epoch": 0.35, + "learning_rate": 1.9762967859968892e-05, + "loss": 2.2116, + "step": 1713 + }, + { + "epoch": 0.35, + "learning_rate": 1.976259385641796e-05, + "loss": 2.1692, + "step": 1714 + }, + { + "epoch": 0.35, + "learning_rate": 1.9762219561581472e-05, + "loss": 2.1198, + "step": 1715 + }, + { + "epoch": 0.35, + "learning_rate": 1.976184497547059e-05, + "loss": 2.1747, + "step": 1716 + }, + { + "epoch": 0.35, + "learning_rate": 1.9761470098096497e-05, + "loss": 2.2009, + "step": 1717 + }, + { + "epoch": 0.35, + "learning_rate": 1.9761094929470374e-05, + "loss": 2.1526, + "step": 1718 + }, + { + "epoch": 0.35, + "learning_rate": 1.9760719469603415e-05, + "loss": 2.1768, + "step": 1719 + }, + { + "epoch": 0.35, + "learning_rate": 1.9760343718506823e-05, + "loss": 2.1444, + "step": 1720 + }, + { + "epoch": 0.35, + "learning_rate": 1.9759967676191813e-05, + "loss": 2.2069, + "step": 1721 + }, + { + "epoch": 0.35, + "learning_rate": 1.9759591342669596e-05, + "loss": 2.234, + "step": 1722 + }, + { + "epoch": 0.35, + "learning_rate": 1.9759214717951405e-05, + "loss": 2.2016, + "step": 1723 + }, + { + "epoch": 0.35, + "learning_rate": 1.9758837802048483e-05, + "loss": 2.1739, + "step": 1724 + }, + { + "epoch": 0.35, + "learning_rate": 1.9758460594972068e-05, + "loss": 2.1348, + "step": 1725 + }, + { + "epoch": 0.35, + "learning_rate": 1.975808309673342e-05, + "loss": 2.138, + "step": 1726 + }, + { + "epoch": 0.35, + "learning_rate": 1.97577053073438e-05, + "loss": 2.1675, + "step": 1727 + }, + { + "epoch": 0.35, + "learning_rate": 1.975732722681448e-05, + "loss": 2.2036, + "step": 1728 + }, + { + "epoch": 0.35, + "learning_rate": 1.975694885515674e-05, + "loss": 2.1499, + "step": 1729 + }, + { + "epoch": 0.35, + "learning_rate": 1.9756570192381875e-05, + "loss": 2.2528, + "step": 1730 + }, + { + "epoch": 0.35, + "learning_rate": 1.9756191238501175e-05, + "loss": 2.183, + "step": 1731 + }, + { + "epoch": 0.35, + "learning_rate": 1.9755811993525952e-05, + "loss": 2.097, + "step": 1732 + }, + { + "epoch": 0.35, + "learning_rate": 1.9755432457467517e-05, + "loss": 2.1698, + "step": 1733 + }, + { + "epoch": 0.35, + "learning_rate": 1.97550526303372e-05, + "loss": 2.168, + "step": 1734 + }, + { + "epoch": 0.35, + "learning_rate": 1.9754672512146334e-05, + "loss": 2.189, + "step": 1735 + }, + { + "epoch": 0.35, + "learning_rate": 1.9754292102906253e-05, + "loss": 2.1882, + "step": 1736 + }, + { + "epoch": 0.35, + "learning_rate": 1.9753911402628313e-05, + "loss": 2.1364, + "step": 1737 + }, + { + "epoch": 0.35, + "learning_rate": 1.975353041132387e-05, + "loss": 2.2272, + "step": 1738 + }, + { + "epoch": 0.35, + "learning_rate": 1.97531491290043e-05, + "loss": 2.1668, + "step": 1739 + }, + { + "epoch": 0.35, + "learning_rate": 1.9752767555680967e-05, + "loss": 2.2118, + "step": 1740 + }, + { + "epoch": 0.35, + "learning_rate": 1.9752385691365262e-05, + "loss": 2.1241, + "step": 1741 + }, + { + "epoch": 0.35, + "learning_rate": 1.975200353606858e-05, + "loss": 2.2425, + "step": 1742 + }, + { + "epoch": 0.35, + "learning_rate": 1.975162108980232e-05, + "loss": 2.1273, + "step": 1743 + }, + { + "epoch": 0.35, + "learning_rate": 1.9751238352577897e-05, + "loss": 2.1729, + "step": 1744 + }, + { + "epoch": 0.35, + "learning_rate": 1.9750855324406725e-05, + "loss": 2.18, + "step": 1745 + }, + { + "epoch": 0.35, + "learning_rate": 1.9750472005300238e-05, + "loss": 2.1979, + "step": 1746 + }, + { + "epoch": 0.35, + "learning_rate": 1.975008839526987e-05, + "loss": 2.1898, + "step": 1747 + }, + { + "epoch": 0.35, + "learning_rate": 1.9749704494327072e-05, + "loss": 2.1661, + "step": 1748 + }, + { + "epoch": 0.36, + "learning_rate": 1.9749320302483287e-05, + "loss": 2.1649, + "step": 1749 + }, + { + "epoch": 0.36, + "learning_rate": 1.9748935819749988e-05, + "loss": 2.1978, + "step": 1750 + }, + { + "epoch": 0.36, + "learning_rate": 1.9748551046138646e-05, + "loss": 2.0861, + "step": 1751 + }, + { + "epoch": 0.36, + "learning_rate": 1.9748165981660733e-05, + "loss": 2.1962, + "step": 1752 + }, + { + "epoch": 0.36, + "learning_rate": 1.974778062632775e-05, + "loss": 2.2095, + "step": 1753 + }, + { + "epoch": 0.36, + "learning_rate": 1.9747394980151185e-05, + "loss": 2.1794, + "step": 1754 + }, + { + "epoch": 0.36, + "learning_rate": 1.9747009043142556e-05, + "loss": 2.1074, + "step": 1755 + }, + { + "epoch": 0.36, + "learning_rate": 1.9746622815313364e-05, + "loss": 2.1779, + "step": 1756 + }, + { + "epoch": 0.36, + "learning_rate": 1.9746236296675142e-05, + "loss": 2.1163, + "step": 1757 + }, + { + "epoch": 0.36, + "learning_rate": 1.974584948723942e-05, + "loss": 2.1538, + "step": 1758 + }, + { + "epoch": 0.36, + "learning_rate": 1.9745462387017738e-05, + "loss": 2.0863, + "step": 1759 + }, + { + "epoch": 0.36, + "learning_rate": 1.974507499602165e-05, + "loss": 2.1901, + "step": 1760 + }, + { + "epoch": 0.36, + "learning_rate": 1.974468731426271e-05, + "loss": 2.1883, + "step": 1761 + }, + { + "epoch": 0.36, + "learning_rate": 1.9744299341752485e-05, + "loss": 2.1626, + "step": 1762 + }, + { + "epoch": 0.36, + "learning_rate": 1.974391107850256e-05, + "loss": 2.172, + "step": 1763 + }, + { + "epoch": 0.36, + "learning_rate": 1.9743522524524504e-05, + "loss": 2.2072, + "step": 1764 + }, + { + "epoch": 0.36, + "learning_rate": 1.9743133679829925e-05, + "loss": 2.1908, + "step": 1765 + }, + { + "epoch": 0.36, + "learning_rate": 1.9742744544430414e-05, + "loss": 2.0677, + "step": 1766 + }, + { + "epoch": 0.36, + "learning_rate": 1.974235511833759e-05, + "loss": 2.2384, + "step": 1767 + }, + { + "epoch": 0.36, + "learning_rate": 1.9741965401563066e-05, + "loss": 2.1878, + "step": 1768 + }, + { + "epoch": 0.36, + "learning_rate": 1.9741575394118477e-05, + "loss": 2.1131, + "step": 1769 + }, + { + "epoch": 0.36, + "learning_rate": 1.974118509601545e-05, + "loss": 2.1703, + "step": 1770 + }, + { + "epoch": 0.36, + "learning_rate": 1.9740794507265636e-05, + "loss": 2.1365, + "step": 1771 + }, + { + "epoch": 0.36, + "learning_rate": 1.9740403627880692e-05, + "loss": 2.2511, + "step": 1772 + }, + { + "epoch": 0.36, + "learning_rate": 1.974001245787227e-05, + "loss": 2.179, + "step": 1773 + }, + { + "epoch": 0.36, + "learning_rate": 1.9739620997252053e-05, + "loss": 1.9961, + "step": 1774 + }, + { + "epoch": 0.36, + "learning_rate": 1.973922924603172e-05, + "loss": 2.1581, + "step": 1775 + }, + { + "epoch": 0.36, + "learning_rate": 1.973883720422295e-05, + "loss": 2.1388, + "step": 1776 + }, + { + "epoch": 0.36, + "learning_rate": 1.9738444871837448e-05, + "loss": 2.2099, + "step": 1777 + }, + { + "epoch": 0.36, + "learning_rate": 1.973805224888692e-05, + "loss": 2.1781, + "step": 1778 + }, + { + "epoch": 0.36, + "learning_rate": 1.9737659335383075e-05, + "loss": 2.2532, + "step": 1779 + }, + { + "epoch": 0.36, + "learning_rate": 1.973726613133764e-05, + "loss": 2.2059, + "step": 1780 + }, + { + "epoch": 0.36, + "learning_rate": 1.9736872636762352e-05, + "loss": 2.2309, + "step": 1781 + }, + { + "epoch": 0.36, + "learning_rate": 1.9736478851668945e-05, + "loss": 2.1041, + "step": 1782 + }, + { + "epoch": 0.36, + "learning_rate": 1.973608477606917e-05, + "loss": 2.1084, + "step": 1783 + }, + { + "epoch": 0.36, + "learning_rate": 1.9735690409974782e-05, + "loss": 2.1867, + "step": 1784 + }, + { + "epoch": 0.36, + "learning_rate": 1.973529575339755e-05, + "loss": 2.1977, + "step": 1785 + }, + { + "epoch": 0.36, + "learning_rate": 1.9734900806349257e-05, + "loss": 2.188, + "step": 1786 + }, + { + "epoch": 0.36, + "learning_rate": 1.9734505568841675e-05, + "loss": 2.2015, + "step": 1787 + }, + { + "epoch": 0.36, + "learning_rate": 1.9734110040886604e-05, + "loss": 2.2256, + "step": 1788 + }, + { + "epoch": 0.36, + "learning_rate": 1.9733714222495842e-05, + "loss": 2.1413, + "step": 1789 + }, + { + "epoch": 0.36, + "learning_rate": 1.97333181136812e-05, + "loss": 2.1246, + "step": 1790 + }, + { + "epoch": 0.36, + "learning_rate": 1.97329217144545e-05, + "loss": 2.211, + "step": 1791 + }, + { + "epoch": 0.36, + "learning_rate": 1.973252502482756e-05, + "loss": 2.14, + "step": 1792 + }, + { + "epoch": 0.36, + "learning_rate": 1.9732128044812224e-05, + "loss": 2.2235, + "step": 1793 + }, + { + "epoch": 0.36, + "learning_rate": 1.9731730774420338e-05, + "loss": 2.1401, + "step": 1794 + }, + { + "epoch": 0.36, + "learning_rate": 1.9731333213663748e-05, + "loss": 2.1604, + "step": 1795 + }, + { + "epoch": 0.36, + "learning_rate": 1.9730935362554323e-05, + "loss": 2.1814, + "step": 1796 + }, + { + "epoch": 0.36, + "learning_rate": 1.9730537221103932e-05, + "loss": 2.1929, + "step": 1797 + }, + { + "epoch": 0.36, + "learning_rate": 1.973013878932445e-05, + "loss": 2.1642, + "step": 1798 + }, + { + "epoch": 0.37, + "learning_rate": 1.972974006722777e-05, + "loss": 2.2404, + "step": 1799 + }, + { + "epoch": 0.37, + "learning_rate": 1.9729341054825783e-05, + "loss": 2.2308, + "step": 1800 + }, + { + "epoch": 0.37, + "learning_rate": 1.97289417521304e-05, + "loss": 2.1501, + "step": 1801 + }, + { + "epoch": 0.37, + "learning_rate": 1.972854215915353e-05, + "loss": 2.1856, + "step": 1802 + }, + { + "epoch": 0.37, + "learning_rate": 1.9728142275907105e-05, + "loss": 2.0708, + "step": 1803 + }, + { + "epoch": 0.37, + "learning_rate": 1.9727742102403047e-05, + "loss": 2.1511, + "step": 1804 + }, + { + "epoch": 0.37, + "learning_rate": 1.9727341638653297e-05, + "loss": 2.1494, + "step": 1805 + }, + { + "epoch": 0.37, + "learning_rate": 1.9726940884669808e-05, + "loss": 2.1673, + "step": 1806 + }, + { + "epoch": 0.37, + "learning_rate": 1.972653984046453e-05, + "loss": 2.0986, + "step": 1807 + }, + { + "epoch": 0.37, + "learning_rate": 1.9726138506049438e-05, + "loss": 2.0845, + "step": 1808 + }, + { + "epoch": 0.37, + "learning_rate": 1.97257368814365e-05, + "loss": 2.1495, + "step": 1809 + }, + { + "epoch": 0.37, + "learning_rate": 1.97253349666377e-05, + "loss": 2.1132, + "step": 1810 + }, + { + "epoch": 0.37, + "learning_rate": 1.9724932761665032e-05, + "loss": 2.1313, + "step": 1811 + }, + { + "epoch": 0.37, + "learning_rate": 1.9724530266530497e-05, + "loss": 2.1386, + "step": 1812 + }, + { + "epoch": 0.37, + "learning_rate": 1.9724127481246103e-05, + "loss": 2.0258, + "step": 1813 + }, + { + "epoch": 0.37, + "learning_rate": 1.9723724405823867e-05, + "loss": 2.1111, + "step": 1814 + }, + { + "epoch": 0.37, + "learning_rate": 1.9723321040275816e-05, + "loss": 2.1422, + "step": 1815 + }, + { + "epoch": 0.37, + "learning_rate": 1.9722917384613985e-05, + "loss": 2.2049, + "step": 1816 + }, + { + "epoch": 0.37, + "learning_rate": 1.9722513438850418e-05, + "loss": 2.1808, + "step": 1817 + }, + { + "epoch": 0.37, + "learning_rate": 1.9722109202997172e-05, + "loss": 2.1033, + "step": 1818 + }, + { + "epoch": 0.37, + "learning_rate": 1.97217046770663e-05, + "loss": 2.0967, + "step": 1819 + }, + { + "epoch": 0.37, + "learning_rate": 1.9721299861069877e-05, + "loss": 2.2119, + "step": 1820 + }, + { + "epoch": 0.37, + "learning_rate": 1.9720894755019978e-05, + "loss": 2.1447, + "step": 1821 + }, + { + "epoch": 0.37, + "learning_rate": 1.9720489358928692e-05, + "loss": 2.1626, + "step": 1822 + }, + { + "epoch": 0.37, + "learning_rate": 1.972008367280812e-05, + "loss": 2.1512, + "step": 1823 + }, + { + "epoch": 0.37, + "learning_rate": 1.9719677696670356e-05, + "loss": 2.1238, + "step": 1824 + }, + { + "epoch": 0.37, + "learning_rate": 1.971927143052752e-05, + "loss": 2.1656, + "step": 1825 + }, + { + "epoch": 0.37, + "learning_rate": 1.9718864874391735e-05, + "loss": 2.1211, + "step": 1826 + }, + { + "epoch": 0.37, + "learning_rate": 1.9718458028275128e-05, + "loss": 2.184, + "step": 1827 + }, + { + "epoch": 0.37, + "learning_rate": 1.9718050892189836e-05, + "loss": 2.1616, + "step": 1828 + }, + { + "epoch": 0.37, + "learning_rate": 1.9717643466148008e-05, + "loss": 2.1909, + "step": 1829 + }, + { + "epoch": 0.37, + "learning_rate": 1.9717235750161808e-05, + "loss": 2.1165, + "step": 1830 + }, + { + "epoch": 0.37, + "learning_rate": 1.971682774424339e-05, + "loss": 2.1339, + "step": 1831 + }, + { + "epoch": 0.37, + "learning_rate": 1.9716419448404936e-05, + "loss": 2.146, + "step": 1832 + }, + { + "epoch": 0.37, + "learning_rate": 1.9716010862658618e-05, + "loss": 2.2319, + "step": 1833 + }, + { + "epoch": 0.37, + "learning_rate": 1.971560198701664e-05, + "loss": 2.094, + "step": 1834 + }, + { + "epoch": 0.37, + "learning_rate": 1.9715192821491193e-05, + "loss": 2.148, + "step": 1835 + }, + { + "epoch": 0.37, + "learning_rate": 1.9714783366094487e-05, + "loss": 2.1834, + "step": 1836 + }, + { + "epoch": 0.37, + "learning_rate": 1.971437362083874e-05, + "loss": 2.209, + "step": 1837 + }, + { + "epoch": 0.37, + "learning_rate": 1.9713963585736175e-05, + "loss": 2.1063, + "step": 1838 + }, + { + "epoch": 0.37, + "learning_rate": 1.9713553260799027e-05, + "loss": 2.2555, + "step": 1839 + }, + { + "epoch": 0.37, + "learning_rate": 1.9713142646039545e-05, + "loss": 2.1539, + "step": 1840 + }, + { + "epoch": 0.37, + "learning_rate": 1.9712731741469972e-05, + "loss": 2.1691, + "step": 1841 + }, + { + "epoch": 0.37, + "learning_rate": 1.971232054710257e-05, + "loss": 2.1765, + "step": 1842 + }, + { + "epoch": 0.37, + "learning_rate": 1.971190906294961e-05, + "loss": 2.1688, + "step": 1843 + }, + { + "epoch": 0.37, + "learning_rate": 1.971149728902337e-05, + "loss": 2.0842, + "step": 1844 + }, + { + "epoch": 0.37, + "learning_rate": 1.971108522533613e-05, + "loss": 2.1624, + "step": 1845 + }, + { + "epoch": 0.37, + "learning_rate": 1.9710672871900196e-05, + "loss": 2.1429, + "step": 1846 + }, + { + "epoch": 0.37, + "learning_rate": 1.971026022872786e-05, + "loss": 2.1267, + "step": 1847 + }, + { + "epoch": 0.38, + "learning_rate": 1.970984729583144e-05, + "loss": 2.125, + "step": 1848 + }, + { + "epoch": 0.38, + "learning_rate": 1.970943407322326e-05, + "loss": 2.1998, + "step": 1849 + }, + { + "epoch": 0.38, + "learning_rate": 1.9709020560915638e-05, + "loss": 2.109, + "step": 1850 + }, + { + "epoch": 0.38, + "learning_rate": 1.9708606758920917e-05, + "loss": 2.1405, + "step": 1851 + }, + { + "epoch": 0.38, + "learning_rate": 1.9708192667251448e-05, + "loss": 2.1867, + "step": 1852 + }, + { + "epoch": 0.38, + "learning_rate": 1.9707778285919584e-05, + "loss": 2.1441, + "step": 1853 + }, + { + "epoch": 0.38, + "learning_rate": 1.970736361493769e-05, + "loss": 2.1424, + "step": 1854 + }, + { + "epoch": 0.38, + "learning_rate": 1.9706948654318133e-05, + "loss": 2.1641, + "step": 1855 + }, + { + "epoch": 0.38, + "learning_rate": 1.9706533404073302e-05, + "loss": 2.1146, + "step": 1856 + }, + { + "epoch": 0.38, + "learning_rate": 1.9706117864215577e-05, + "loss": 2.2254, + "step": 1857 + }, + { + "epoch": 0.38, + "learning_rate": 1.9705702034757362e-05, + "loss": 2.213, + "step": 1858 + }, + { + "epoch": 0.38, + "learning_rate": 1.970528591571107e-05, + "loss": 2.0611, + "step": 1859 + }, + { + "epoch": 0.38, + "learning_rate": 1.9704869507089105e-05, + "loss": 2.1499, + "step": 1860 + }, + { + "epoch": 0.38, + "learning_rate": 1.97044528089039e-05, + "loss": 2.191, + "step": 1861 + }, + { + "epoch": 0.38, + "learning_rate": 1.9704035821167884e-05, + "loss": 2.2249, + "step": 1862 + }, + { + "epoch": 0.38, + "learning_rate": 1.97036185438935e-05, + "loss": 2.1821, + "step": 1863 + }, + { + "epoch": 0.38, + "learning_rate": 1.97032009770932e-05, + "loss": 2.1173, + "step": 1864 + }, + { + "epoch": 0.38, + "learning_rate": 1.9702783120779438e-05, + "loss": 2.2068, + "step": 1865 + }, + { + "epoch": 0.38, + "learning_rate": 1.9702364974964685e-05, + "loss": 2.1901, + "step": 1866 + }, + { + "epoch": 0.38, + "learning_rate": 1.9701946539661416e-05, + "loss": 2.1331, + "step": 1867 + }, + { + "epoch": 0.38, + "learning_rate": 1.970152781488212e-05, + "loss": 2.1727, + "step": 1868 + }, + { + "epoch": 0.38, + "learning_rate": 1.970110880063928e-05, + "loss": 2.2026, + "step": 1869 + }, + { + "epoch": 0.38, + "learning_rate": 1.970068949694541e-05, + "loss": 2.1972, + "step": 1870 + }, + { + "epoch": 0.38, + "learning_rate": 1.9700269903813015e-05, + "loss": 2.0975, + "step": 1871 + }, + { + "epoch": 0.38, + "learning_rate": 1.9699850021254615e-05, + "loss": 2.1326, + "step": 1872 + }, + { + "epoch": 0.38, + "learning_rate": 1.9699429849282737e-05, + "loss": 2.1491, + "step": 1873 + }, + { + "epoch": 0.38, + "learning_rate": 1.9699009387909916e-05, + "loss": 2.134, + "step": 1874 + }, + { + "epoch": 0.38, + "learning_rate": 1.9698588637148705e-05, + "loss": 2.1675, + "step": 1875 + }, + { + "epoch": 0.38, + "learning_rate": 1.969816759701165e-05, + "loss": 2.1672, + "step": 1876 + }, + { + "epoch": 0.38, + "learning_rate": 1.9697746267511316e-05, + "loss": 2.1492, + "step": 1877 + }, + { + "epoch": 0.38, + "learning_rate": 1.9697324648660278e-05, + "loss": 2.2016, + "step": 1878 + }, + { + "epoch": 0.38, + "learning_rate": 1.969690274047111e-05, + "loss": 2.1661, + "step": 1879 + }, + { + "epoch": 0.38, + "learning_rate": 1.96964805429564e-05, + "loss": 2.1379, + "step": 1880 + }, + { + "epoch": 0.38, + "learning_rate": 1.9696058056128748e-05, + "loss": 2.2215, + "step": 1881 + }, + { + "epoch": 0.38, + "learning_rate": 1.969563528000076e-05, + "loss": 2.2761, + "step": 1882 + }, + { + "epoch": 0.38, + "learning_rate": 1.9695212214585054e-05, + "loss": 2.2076, + "step": 1883 + }, + { + "epoch": 0.38, + "learning_rate": 1.969478885989425e-05, + "loss": 2.1405, + "step": 1884 + }, + { + "epoch": 0.38, + "learning_rate": 1.969436521594097e-05, + "loss": 2.1936, + "step": 1885 + }, + { + "epoch": 0.38, + "learning_rate": 1.9693941282737867e-05, + "loss": 2.1636, + "step": 1886 + }, + { + "epoch": 0.38, + "learning_rate": 1.9693517060297586e-05, + "loss": 2.1757, + "step": 1887 + }, + { + "epoch": 0.38, + "learning_rate": 1.9693092548632782e-05, + "loss": 2.2029, + "step": 1888 + }, + { + "epoch": 0.38, + "learning_rate": 1.9692667747756124e-05, + "loss": 2.1868, + "step": 1889 + }, + { + "epoch": 0.38, + "learning_rate": 1.9692242657680286e-05, + "loss": 2.1906, + "step": 1890 + }, + { + "epoch": 0.38, + "learning_rate": 1.9691817278417954e-05, + "loss": 2.2026, + "step": 1891 + }, + { + "epoch": 0.38, + "learning_rate": 1.9691391609981813e-05, + "loss": 2.1225, + "step": 1892 + }, + { + "epoch": 0.38, + "learning_rate": 1.969096565238457e-05, + "loss": 2.1457, + "step": 1893 + }, + { + "epoch": 0.38, + "learning_rate": 1.969053940563893e-05, + "loss": 2.1828, + "step": 1894 + }, + { + "epoch": 0.38, + "learning_rate": 1.969011286975761e-05, + "loss": 2.1513, + "step": 1895 + }, + { + "epoch": 0.38, + "learning_rate": 1.9689686044753346e-05, + "loss": 2.1819, + "step": 1896 + }, + { + "epoch": 0.39, + "learning_rate": 1.9689258930638863e-05, + "loss": 2.1296, + "step": 1897 + }, + { + "epoch": 0.39, + "learning_rate": 1.968883152742691e-05, + "loss": 2.16, + "step": 1898 + }, + { + "epoch": 0.39, + "learning_rate": 1.9688403835130238e-05, + "loss": 2.1617, + "step": 1899 + }, + { + "epoch": 0.39, + "learning_rate": 1.9687975853761604e-05, + "loss": 2.1354, + "step": 1900 + }, + { + "epoch": 0.39, + "learning_rate": 1.9687547583333786e-05, + "loss": 2.2689, + "step": 1901 + }, + { + "epoch": 0.39, + "learning_rate": 1.9687119023859555e-05, + "loss": 2.1956, + "step": 1902 + }, + { + "epoch": 0.39, + "learning_rate": 1.9686690175351702e-05, + "loss": 2.1533, + "step": 1903 + }, + { + "epoch": 0.39, + "learning_rate": 1.968626103782302e-05, + "loss": 2.1192, + "step": 1904 + }, + { + "epoch": 0.39, + "learning_rate": 1.9685831611286312e-05, + "loss": 2.159, + "step": 1905 + }, + { + "epoch": 0.39, + "learning_rate": 1.9685401895754393e-05, + "loss": 2.0888, + "step": 1906 + }, + { + "epoch": 0.39, + "learning_rate": 1.9684971891240087e-05, + "loss": 2.1513, + "step": 1907 + }, + { + "epoch": 0.39, + "learning_rate": 1.9684541597756223e-05, + "loss": 2.1703, + "step": 1908 + }, + { + "epoch": 0.39, + "learning_rate": 1.9684111015315633e-05, + "loss": 2.1528, + "step": 1909 + }, + { + "epoch": 0.39, + "learning_rate": 1.9683680143931174e-05, + "loss": 2.2144, + "step": 1910 + }, + { + "epoch": 0.39, + "learning_rate": 1.968324898361569e-05, + "loss": 2.1768, + "step": 1911 + }, + { + "epoch": 0.39, + "learning_rate": 1.9682817534382062e-05, + "loss": 2.1585, + "step": 1912 + }, + { + "epoch": 0.39, + "learning_rate": 1.968238579624315e-05, + "loss": 2.2545, + "step": 1913 + }, + { + "epoch": 0.39, + "learning_rate": 1.9681953769211833e-05, + "loss": 2.1121, + "step": 1914 + }, + { + "epoch": 0.39, + "learning_rate": 1.9681521453301017e-05, + "loss": 2.125, + "step": 1915 + }, + { + "epoch": 0.39, + "learning_rate": 1.9681088848523587e-05, + "loss": 2.0659, + "step": 1916 + }, + { + "epoch": 0.39, + "learning_rate": 1.9680655954892452e-05, + "loss": 2.1542, + "step": 1917 + }, + { + "epoch": 0.39, + "learning_rate": 1.9680222772420538e-05, + "loss": 2.2097, + "step": 1918 + }, + { + "epoch": 0.39, + "learning_rate": 1.967978930112076e-05, + "loss": 2.1769, + "step": 1919 + }, + { + "epoch": 0.39, + "learning_rate": 1.9679355541006056e-05, + "loss": 2.1398, + "step": 1920 + }, + { + "epoch": 0.39, + "learning_rate": 1.9678921492089364e-05, + "loss": 2.1736, + "step": 1921 + }, + { + "epoch": 0.39, + "learning_rate": 1.967848715438364e-05, + "loss": 2.1799, + "step": 1922 + }, + { + "epoch": 0.39, + "learning_rate": 1.967805252790184e-05, + "loss": 2.0548, + "step": 1923 + }, + { + "epoch": 0.39, + "learning_rate": 1.967761761265693e-05, + "loss": 2.1266, + "step": 1924 + }, + { + "epoch": 0.39, + "learning_rate": 1.9677182408661894e-05, + "loss": 2.1847, + "step": 1925 + }, + { + "epoch": 0.39, + "learning_rate": 1.9676746915929713e-05, + "loss": 2.133, + "step": 1926 + }, + { + "epoch": 0.39, + "learning_rate": 1.9676311134473375e-05, + "loss": 2.2474, + "step": 1927 + }, + { + "epoch": 0.39, + "learning_rate": 1.9675875064305888e-05, + "loss": 2.1287, + "step": 1928 + }, + { + "epoch": 0.39, + "learning_rate": 1.9675438705440266e-05, + "loss": 2.1158, + "step": 1929 + }, + { + "epoch": 0.39, + "learning_rate": 1.9675002057889524e-05, + "loss": 2.1057, + "step": 1930 + }, + { + "epoch": 0.39, + "learning_rate": 1.9674565121666688e-05, + "loss": 2.2047, + "step": 1931 + }, + { + "epoch": 0.39, + "learning_rate": 1.9674127896784802e-05, + "loss": 2.1941, + "step": 1932 + }, + { + "epoch": 0.39, + "learning_rate": 1.9673690383256907e-05, + "loss": 2.2007, + "step": 1933 + }, + { + "epoch": 0.39, + "learning_rate": 1.9673252581096053e-05, + "loss": 2.1866, + "step": 1934 + }, + { + "epoch": 0.39, + "learning_rate": 1.9672814490315312e-05, + "loss": 2.1639, + "step": 1935 + }, + { + "epoch": 0.39, + "learning_rate": 1.967237611092775e-05, + "loss": 2.2196, + "step": 1936 + }, + { + "epoch": 0.39, + "learning_rate": 1.9671937442946446e-05, + "loss": 2.1463, + "step": 1937 + }, + { + "epoch": 0.39, + "learning_rate": 1.967149848638449e-05, + "loss": 2.1761, + "step": 1938 + }, + { + "epoch": 0.39, + "learning_rate": 1.9671059241254977e-05, + "loss": 2.2442, + "step": 1939 + }, + { + "epoch": 0.39, + "learning_rate": 1.967061970757102e-05, + "loss": 2.1804, + "step": 1940 + }, + { + "epoch": 0.39, + "learning_rate": 1.9670179885345724e-05, + "loss": 2.1482, + "step": 1941 + }, + { + "epoch": 0.39, + "learning_rate": 1.9669739774592215e-05, + "loss": 2.1932, + "step": 1942 + }, + { + "epoch": 0.39, + "learning_rate": 1.9669299375323628e-05, + "loss": 2.1763, + "step": 1943 + }, + { + "epoch": 0.39, + "learning_rate": 1.9668858687553102e-05, + "loss": 2.2587, + "step": 1944 + }, + { + "epoch": 0.39, + "learning_rate": 1.966841771129378e-05, + "loss": 2.1439, + "step": 1945 + }, + { + "epoch": 0.4, + "learning_rate": 1.9667976446558826e-05, + "loss": 2.1245, + "step": 1946 + }, + { + "epoch": 0.4, + "learning_rate": 1.96675348933614e-05, + "loss": 2.1863, + "step": 1947 + }, + { + "epoch": 0.4, + "learning_rate": 1.9667093051714686e-05, + "loss": 2.1594, + "step": 1948 + }, + { + "epoch": 0.4, + "learning_rate": 1.966665092163186e-05, + "loss": 2.1507, + "step": 1949 + }, + { + "epoch": 0.4, + "learning_rate": 1.9666208503126115e-05, + "loss": 2.1351, + "step": 1950 + }, + { + "epoch": 0.4, + "learning_rate": 1.966576579621065e-05, + "loss": 2.1713, + "step": 1951 + }, + { + "epoch": 0.4, + "learning_rate": 1.9665322800898678e-05, + "loss": 2.1446, + "step": 1952 + }, + { + "epoch": 0.4, + "learning_rate": 1.966487951720341e-05, + "loss": 2.1507, + "step": 1953 + }, + { + "epoch": 0.4, + "learning_rate": 1.9664435945138082e-05, + "loss": 2.1069, + "step": 1954 + }, + { + "epoch": 0.4, + "learning_rate": 1.966399208471592e-05, + "loss": 2.1796, + "step": 1955 + }, + { + "epoch": 0.4, + "learning_rate": 1.966354793595017e-05, + "loss": 2.2263, + "step": 1956 + }, + { + "epoch": 0.4, + "learning_rate": 1.9663103498854087e-05, + "loss": 2.1847, + "step": 1957 + }, + { + "epoch": 0.4, + "learning_rate": 1.966265877344093e-05, + "loss": 2.1335, + "step": 1958 + }, + { + "epoch": 0.4, + "learning_rate": 1.9662213759723964e-05, + "loss": 2.1558, + "step": 1959 + }, + { + "epoch": 0.4, + "learning_rate": 1.9661768457716474e-05, + "loss": 2.2139, + "step": 1960 + }, + { + "epoch": 0.4, + "learning_rate": 1.966132286743174e-05, + "loss": 2.1424, + "step": 1961 + }, + { + "epoch": 0.4, + "learning_rate": 1.9660876988883066e-05, + "loss": 2.1632, + "step": 1962 + }, + { + "epoch": 0.4, + "learning_rate": 1.9660430822083742e-05, + "loss": 2.0547, + "step": 1963 + }, + { + "epoch": 0.4, + "learning_rate": 1.965998436704709e-05, + "loss": 2.0611, + "step": 1964 + }, + { + "epoch": 0.4, + "learning_rate": 1.9659537623786428e-05, + "loss": 2.1702, + "step": 1965 + }, + { + "epoch": 0.4, + "learning_rate": 1.965909059231509e-05, + "loss": 2.2399, + "step": 1966 + }, + { + "epoch": 0.4, + "learning_rate": 1.9658643272646407e-05, + "loss": 2.2702, + "step": 1967 + }, + { + "epoch": 0.4, + "learning_rate": 1.9658195664793728e-05, + "loss": 2.1041, + "step": 1968 + }, + { + "epoch": 0.4, + "learning_rate": 1.965774776877041e-05, + "loss": 2.2273, + "step": 1969 + }, + { + "epoch": 0.4, + "learning_rate": 1.9657299584589817e-05, + "loss": 2.212, + "step": 1970 + }, + { + "epoch": 0.4, + "learning_rate": 1.9656851112265317e-05, + "loss": 2.1684, + "step": 1971 + }, + { + "epoch": 0.4, + "learning_rate": 1.96564023518103e-05, + "loss": 2.1902, + "step": 1972 + }, + { + "epoch": 0.4, + "learning_rate": 1.965595330323814e-05, + "loss": 2.1498, + "step": 1973 + }, + { + "epoch": 0.4, + "learning_rate": 1.9655503966562252e-05, + "loss": 2.2171, + "step": 1974 + }, + { + "epoch": 0.4, + "learning_rate": 1.9655054341796038e-05, + "loss": 2.1979, + "step": 1975 + }, + { + "epoch": 0.4, + "learning_rate": 1.9654604428952905e-05, + "loss": 2.1305, + "step": 1976 + }, + { + "epoch": 0.4, + "learning_rate": 1.965415422804629e-05, + "loss": 2.1359, + "step": 1977 + }, + { + "epoch": 0.4, + "learning_rate": 1.9653703739089612e-05, + "loss": 2.208, + "step": 1978 + }, + { + "epoch": 0.4, + "learning_rate": 1.9653252962096324e-05, + "loss": 2.187, + "step": 1979 + }, + { + "epoch": 0.4, + "learning_rate": 1.965280189707987e-05, + "loss": 2.1293, + "step": 1980 + }, + { + "epoch": 0.4, + "learning_rate": 1.965235054405371e-05, + "loss": 2.1129, + "step": 1981 + }, + { + "epoch": 0.4, + "learning_rate": 1.9651898903031307e-05, + "loss": 2.1965, + "step": 1982 + }, + { + "epoch": 0.4, + "learning_rate": 1.965144697402614e-05, + "loss": 2.166, + "step": 1983 + }, + { + "epoch": 0.4, + "learning_rate": 1.9650994757051694e-05, + "loss": 2.1572, + "step": 1984 + }, + { + "epoch": 0.4, + "learning_rate": 1.9650542252121463e-05, + "loss": 2.2167, + "step": 1985 + }, + { + "epoch": 0.4, + "learning_rate": 1.9650089459248945e-05, + "loss": 2.0893, + "step": 1986 + }, + { + "epoch": 0.4, + "learning_rate": 1.964963637844765e-05, + "loss": 2.1378, + "step": 1987 + }, + { + "epoch": 0.4, + "learning_rate": 1.9649183009731096e-05, + "loss": 2.1516, + "step": 1988 + }, + { + "epoch": 0.4, + "learning_rate": 1.9648729353112813e-05, + "loss": 2.2058, + "step": 1989 + }, + { + "epoch": 0.4, + "learning_rate": 1.964827540860634e-05, + "loss": 2.206, + "step": 1990 + }, + { + "epoch": 0.4, + "learning_rate": 1.9647821176225207e-05, + "loss": 2.1503, + "step": 1991 + }, + { + "epoch": 0.4, + "learning_rate": 1.9647366655982983e-05, + "loss": 2.1451, + "step": 1992 + }, + { + "epoch": 0.4, + "learning_rate": 1.9646911847893224e-05, + "loss": 2.1706, + "step": 1993 + }, + { + "epoch": 0.4, + "learning_rate": 1.9646456751969496e-05, + "loss": 2.1102, + "step": 1994 + }, + { + "epoch": 0.4, + "learning_rate": 1.9646001368225382e-05, + "loss": 2.2173, + "step": 1995 + }, + { + "epoch": 0.41, + "learning_rate": 1.964554569667447e-05, + "loss": 2.2272, + "step": 1996 + }, + { + "epoch": 0.41, + "learning_rate": 1.964508973733035e-05, + "loss": 2.1572, + "step": 1997 + }, + { + "epoch": 0.41, + "learning_rate": 1.9644633490206632e-05, + "loss": 2.2156, + "step": 1998 + }, + { + "epoch": 0.41, + "learning_rate": 1.9644176955316926e-05, + "loss": 2.217, + "step": 1999 + }, + { + "epoch": 0.41, + "learning_rate": 1.9643720132674854e-05, + "loss": 2.1173, + "step": 2000 + }, + { + "epoch": 0.41, + "learning_rate": 1.964326302229405e-05, + "loss": 2.1391, + "step": 2001 + }, + { + "epoch": 0.41, + "learning_rate": 1.964280562418815e-05, + "loss": 2.2192, + "step": 2002 + }, + { + "epoch": 0.41, + "learning_rate": 1.96423479383708e-05, + "loss": 2.1693, + "step": 2003 + }, + { + "epoch": 0.41, + "learning_rate": 1.9641889964855653e-05, + "loss": 2.1478, + "step": 2004 + }, + { + "epoch": 0.41, + "learning_rate": 1.964143170365638e-05, + "loss": 2.1132, + "step": 2005 + }, + { + "epoch": 0.41, + "learning_rate": 1.964097315478665e-05, + "loss": 2.1994, + "step": 2006 + }, + { + "epoch": 0.41, + "learning_rate": 1.9640514318260153e-05, + "loss": 2.1624, + "step": 2007 + }, + { + "epoch": 0.41, + "learning_rate": 1.9640055194090568e-05, + "loss": 2.1806, + "step": 2008 + }, + { + "epoch": 0.41, + "learning_rate": 1.9639595782291597e-05, + "loss": 2.1881, + "step": 2009 + }, + { + "epoch": 0.41, + "learning_rate": 1.9639136082876954e-05, + "loss": 2.149, + "step": 2010 + }, + { + "epoch": 0.41, + "learning_rate": 1.9638676095860345e-05, + "loss": 2.1476, + "step": 2011 + }, + { + "epoch": 0.41, + "learning_rate": 1.96382158212555e-05, + "loss": 2.2481, + "step": 2012 + }, + { + "epoch": 0.41, + "learning_rate": 1.9637755259076154e-05, + "loss": 2.1821, + "step": 2013 + }, + { + "epoch": 0.41, + "learning_rate": 1.9637294409336044e-05, + "loss": 2.0932, + "step": 2014 + }, + { + "epoch": 0.41, + "learning_rate": 1.9636833272048922e-05, + "loss": 2.1224, + "step": 2015 + }, + { + "epoch": 0.41, + "learning_rate": 1.963637184722855e-05, + "loss": 2.0983, + "step": 2016 + }, + { + "epoch": 0.41, + "learning_rate": 1.9635910134888693e-05, + "loss": 2.1387, + "step": 2017 + }, + { + "epoch": 0.41, + "learning_rate": 1.9635448135043123e-05, + "loss": 2.2139, + "step": 2018 + }, + { + "epoch": 0.41, + "learning_rate": 1.9634985847705634e-05, + "loss": 2.144, + "step": 2019 + }, + { + "epoch": 0.41, + "learning_rate": 1.963452327289001e-05, + "loss": 2.201, + "step": 2020 + }, + { + "epoch": 0.41, + "learning_rate": 1.963406041061006e-05, + "loss": 2.1558, + "step": 2021 + }, + { + "epoch": 0.41, + "learning_rate": 1.9633597260879588e-05, + "loss": 2.1077, + "step": 2022 + }, + { + "epoch": 0.41, + "learning_rate": 1.963313382371242e-05, + "loss": 2.2081, + "step": 2023 + }, + { + "epoch": 0.41, + "learning_rate": 1.9632670099122374e-05, + "loss": 2.1401, + "step": 2024 + }, + { + "epoch": 0.41, + "learning_rate": 1.9632206087123296e-05, + "loss": 2.1483, + "step": 2025 + }, + { + "epoch": 0.41, + "learning_rate": 1.9631741787729026e-05, + "loss": 2.1337, + "step": 2026 + }, + { + "epoch": 0.41, + "learning_rate": 1.9631277200953417e-05, + "loss": 2.1469, + "step": 2027 + }, + { + "epoch": 0.41, + "learning_rate": 1.963081232681033e-05, + "loss": 2.1257, + "step": 2028 + }, + { + "epoch": 0.41, + "learning_rate": 1.963034716531364e-05, + "loss": 2.1423, + "step": 2029 + }, + { + "epoch": 0.41, + "learning_rate": 1.9629881716477223e-05, + "loss": 2.1248, + "step": 2030 + }, + { + "epoch": 0.41, + "learning_rate": 1.9629415980314964e-05, + "loss": 2.1366, + "step": 2031 + }, + { + "epoch": 0.41, + "learning_rate": 1.9628949956840762e-05, + "loss": 2.2086, + "step": 2032 + }, + { + "epoch": 0.41, + "learning_rate": 1.9628483646068523e-05, + "loss": 2.1762, + "step": 2033 + }, + { + "epoch": 0.41, + "learning_rate": 1.9628017048012154e-05, + "loss": 2.2283, + "step": 2034 + }, + { + "epoch": 0.41, + "learning_rate": 1.9627550162685588e-05, + "loss": 2.256, + "step": 2035 + }, + { + "epoch": 0.41, + "learning_rate": 1.9627082990102743e-05, + "loss": 2.2101, + "step": 2036 + }, + { + "epoch": 0.41, + "learning_rate": 1.9626615530277567e-05, + "loss": 2.0878, + "step": 2037 + }, + { + "epoch": 0.41, + "learning_rate": 1.9626147783224004e-05, + "loss": 2.2026, + "step": 2038 + }, + { + "epoch": 0.41, + "learning_rate": 1.962567974895601e-05, + "loss": 2.1695, + "step": 2039 + }, + { + "epoch": 0.41, + "learning_rate": 1.962521142748755e-05, + "loss": 2.1136, + "step": 2040 + }, + { + "epoch": 0.41, + "learning_rate": 1.9624742818832598e-05, + "loss": 2.2506, + "step": 2041 + }, + { + "epoch": 0.41, + "learning_rate": 1.9624273923005135e-05, + "loss": 2.1654, + "step": 2042 + }, + { + "epoch": 0.41, + "learning_rate": 1.962380474001915e-05, + "loss": 2.2023, + "step": 2043 + }, + { + "epoch": 0.41, + "learning_rate": 1.9623335269888642e-05, + "loss": 2.2118, + "step": 2044 + }, + { + "epoch": 0.42, + "learning_rate": 1.9622865512627625e-05, + "loss": 2.1983, + "step": 2045 + }, + { + "epoch": 0.42, + "learning_rate": 1.962239546825011e-05, + "loss": 2.2314, + "step": 2046 + }, + { + "epoch": 0.42, + "learning_rate": 1.962192513677012e-05, + "loss": 2.1535, + "step": 2047 + }, + { + "epoch": 0.42, + "learning_rate": 1.9621454518201692e-05, + "loss": 2.1653, + "step": 2048 + }, + { + "epoch": 0.42, + "learning_rate": 1.9620983612558863e-05, + "loss": 2.0978, + "step": 2049 + }, + { + "epoch": 0.42, + "learning_rate": 1.9620512419855684e-05, + "loss": 2.1047, + "step": 2050 + }, + { + "epoch": 0.42, + "learning_rate": 1.962004094010622e-05, + "loss": 2.1698, + "step": 2051 + }, + { + "epoch": 0.42, + "learning_rate": 1.9619569173324536e-05, + "loss": 2.1649, + "step": 2052 + }, + { + "epoch": 0.42, + "learning_rate": 1.9619097119524702e-05, + "loss": 2.1787, + "step": 2053 + }, + { + "epoch": 0.42, + "learning_rate": 1.9618624778720812e-05, + "loss": 2.2184, + "step": 2054 + }, + { + "epoch": 0.42, + "learning_rate": 1.9618152150926953e-05, + "loss": 2.1298, + "step": 2055 + }, + { + "epoch": 0.42, + "learning_rate": 1.961767923615723e-05, + "loss": 2.1588, + "step": 2056 + }, + { + "epoch": 0.42, + "learning_rate": 1.961720603442575e-05, + "loss": 2.1731, + "step": 2057 + }, + { + "epoch": 0.42, + "learning_rate": 1.9616732545746634e-05, + "loss": 2.2008, + "step": 2058 + }, + { + "epoch": 0.42, + "learning_rate": 1.961625877013401e-05, + "loss": 2.1641, + "step": 2059 + }, + { + "epoch": 0.42, + "learning_rate": 1.961578470760201e-05, + "loss": 2.157, + "step": 2060 + }, + { + "epoch": 0.42, + "learning_rate": 1.9615310358164784e-05, + "loss": 2.1397, + "step": 2061 + }, + { + "epoch": 0.42, + "learning_rate": 1.961483572183648e-05, + "loss": 2.178, + "step": 2062 + }, + { + "epoch": 0.42, + "learning_rate": 1.9614360798631266e-05, + "loss": 2.2302, + "step": 2063 + }, + { + "epoch": 0.42, + "learning_rate": 1.9613885588563308e-05, + "loss": 2.1763, + "step": 2064 + }, + { + "epoch": 0.42, + "learning_rate": 1.961341009164678e-05, + "loss": 2.2045, + "step": 2065 + }, + { + "epoch": 0.42, + "learning_rate": 1.961293430789588e-05, + "loss": 2.1742, + "step": 2066 + }, + { + "epoch": 0.42, + "learning_rate": 1.9612458237324795e-05, + "loss": 2.2205, + "step": 2067 + }, + { + "epoch": 0.42, + "learning_rate": 1.9611981879947735e-05, + "loss": 2.2164, + "step": 2068 + }, + { + "epoch": 0.42, + "learning_rate": 1.9611505235778913e-05, + "loss": 2.1529, + "step": 2069 + }, + { + "epoch": 0.42, + "learning_rate": 1.9611028304832547e-05, + "loss": 2.1079, + "step": 2070 + }, + { + "epoch": 0.42, + "learning_rate": 1.961055108712287e-05, + "loss": 2.2068, + "step": 2071 + }, + { + "epoch": 0.42, + "learning_rate": 1.961007358266412e-05, + "loss": 2.1402, + "step": 2072 + }, + { + "epoch": 0.42, + "learning_rate": 1.960959579147054e-05, + "loss": 2.171, + "step": 2073 + }, + { + "epoch": 0.42, + "learning_rate": 1.9609117713556392e-05, + "loss": 2.1711, + "step": 2074 + }, + { + "epoch": 0.42, + "learning_rate": 1.9608639348935938e-05, + "loss": 2.1821, + "step": 2075 + }, + { + "epoch": 0.42, + "learning_rate": 1.960816069762345e-05, + "loss": 2.1371, + "step": 2076 + }, + { + "epoch": 0.42, + "learning_rate": 1.960768175963321e-05, + "loss": 2.1552, + "step": 2077 + }, + { + "epoch": 0.42, + "learning_rate": 1.960720253497951e-05, + "loss": 2.1872, + "step": 2078 + }, + { + "epoch": 0.42, + "learning_rate": 1.9606723023676647e-05, + "loss": 2.1581, + "step": 2079 + }, + { + "epoch": 0.42, + "learning_rate": 1.960624322573893e-05, + "loss": 2.2098, + "step": 2080 + }, + { + "epoch": 0.42, + "learning_rate": 1.960576314118067e-05, + "loss": 2.1783, + "step": 2081 + }, + { + "epoch": 0.42, + "learning_rate": 1.9605282770016193e-05, + "loss": 2.1306, + "step": 2082 + }, + { + "epoch": 0.42, + "learning_rate": 1.9604802112259832e-05, + "loss": 2.1647, + "step": 2083 + }, + { + "epoch": 0.42, + "learning_rate": 1.9604321167925934e-05, + "loss": 2.2423, + "step": 2084 + }, + { + "epoch": 0.42, + "learning_rate": 1.960383993702884e-05, + "loss": 2.1808, + "step": 2085 + }, + { + "epoch": 0.42, + "learning_rate": 1.9603358419582915e-05, + "loss": 2.1812, + "step": 2086 + }, + { + "epoch": 0.42, + "learning_rate": 1.9602876615602522e-05, + "loss": 2.2002, + "step": 2087 + }, + { + "epoch": 0.42, + "learning_rate": 1.9602394525102037e-05, + "loss": 2.1646, + "step": 2088 + }, + { + "epoch": 0.42, + "learning_rate": 1.9601912148095846e-05, + "loss": 2.1347, + "step": 2089 + }, + { + "epoch": 0.42, + "learning_rate": 1.960142948459834e-05, + "loss": 2.1491, + "step": 2090 + }, + { + "epoch": 0.42, + "learning_rate": 1.9600946534623923e-05, + "loss": 2.2045, + "step": 2091 + }, + { + "epoch": 0.42, + "learning_rate": 1.9600463298187004e-05, + "loss": 2.1737, + "step": 2092 + }, + { + "epoch": 0.42, + "learning_rate": 1.9599979775301997e-05, + "loss": 2.137, + "step": 2093 + }, + { + "epoch": 0.43, + "learning_rate": 1.959949596598333e-05, + "loss": 2.2339, + "step": 2094 + }, + { + "epoch": 0.43, + "learning_rate": 1.9599011870245445e-05, + "loss": 2.1714, + "step": 2095 + }, + { + "epoch": 0.43, + "learning_rate": 1.9598527488102776e-05, + "loss": 2.133, + "step": 2096 + }, + { + "epoch": 0.43, + "learning_rate": 1.9598042819569784e-05, + "loss": 2.1379, + "step": 2097 + }, + { + "epoch": 0.43, + "learning_rate": 1.9597557864660922e-05, + "loss": 2.1287, + "step": 2098 + }, + { + "epoch": 0.43, + "learning_rate": 1.9597072623390668e-05, + "loss": 2.0925, + "step": 2099 + }, + { + "epoch": 0.43, + "learning_rate": 1.9596587095773496e-05, + "loss": 2.1221, + "step": 2100 + }, + { + "epoch": 0.43, + "learning_rate": 1.959610128182389e-05, + "loss": 2.1551, + "step": 2101 + }, + { + "epoch": 0.43, + "learning_rate": 1.9595615181556352e-05, + "loss": 2.1013, + "step": 2102 + }, + { + "epoch": 0.43, + "learning_rate": 1.959512879498538e-05, + "loss": 2.1086, + "step": 2103 + }, + { + "epoch": 0.43, + "learning_rate": 1.9594642122125487e-05, + "loss": 2.1478, + "step": 2104 + }, + { + "epoch": 0.43, + "learning_rate": 1.9594155162991196e-05, + "loss": 2.2205, + "step": 2105 + }, + { + "epoch": 0.43, + "learning_rate": 1.9593667917597034e-05, + "loss": 2.1522, + "step": 2106 + }, + { + "epoch": 0.43, + "learning_rate": 1.959318038595754e-05, + "loss": 2.0706, + "step": 2107 + }, + { + "epoch": 0.43, + "learning_rate": 1.9592692568087265e-05, + "loss": 2.1534, + "step": 2108 + }, + { + "epoch": 0.43, + "learning_rate": 1.9592204464000755e-05, + "loss": 2.1153, + "step": 2109 + }, + { + "epoch": 0.43, + "learning_rate": 1.9591716073712576e-05, + "loss": 2.1754, + "step": 2110 + }, + { + "epoch": 0.43, + "learning_rate": 1.9591227397237307e-05, + "loss": 2.1426, + "step": 2111 + }, + { + "epoch": 0.43, + "learning_rate": 1.9590738434589518e-05, + "loss": 2.1513, + "step": 2112 + }, + { + "epoch": 0.43, + "learning_rate": 1.959024918578381e-05, + "loss": 2.1273, + "step": 2113 + }, + { + "epoch": 0.43, + "learning_rate": 1.958975965083477e-05, + "loss": 2.1838, + "step": 2114 + }, + { + "epoch": 0.43, + "learning_rate": 1.958926982975701e-05, + "loss": 2.088, + "step": 2115 + }, + { + "epoch": 0.43, + "learning_rate": 1.9588779722565142e-05, + "loss": 2.1013, + "step": 2116 + }, + { + "epoch": 0.43, + "learning_rate": 1.9588289329273792e-05, + "loss": 2.1887, + "step": 2117 + }, + { + "epoch": 0.43, + "learning_rate": 1.958779864989759e-05, + "loss": 2.1547, + "step": 2118 + }, + { + "epoch": 0.43, + "learning_rate": 1.9587307684451177e-05, + "loss": 2.2303, + "step": 2119 + }, + { + "epoch": 0.43, + "learning_rate": 1.95868164329492e-05, + "loss": 2.2013, + "step": 2120 + }, + { + "epoch": 0.43, + "learning_rate": 1.9586324895406323e-05, + "loss": 2.1737, + "step": 2121 + }, + { + "epoch": 0.43, + "learning_rate": 1.95858330718372e-05, + "loss": 2.1813, + "step": 2122 + }, + { + "epoch": 0.43, + "learning_rate": 1.958534096225652e-05, + "loss": 2.157, + "step": 2123 + }, + { + "epoch": 0.43, + "learning_rate": 1.9584848566678954e-05, + "loss": 2.2288, + "step": 2124 + }, + { + "epoch": 0.43, + "learning_rate": 1.9584355885119196e-05, + "loss": 2.1336, + "step": 2125 + }, + { + "epoch": 0.43, + "learning_rate": 1.958386291759195e-05, + "loss": 2.1514, + "step": 2126 + }, + { + "epoch": 0.43, + "learning_rate": 1.9583369664111925e-05, + "loss": 2.2031, + "step": 2127 + }, + { + "epoch": 0.43, + "learning_rate": 1.9582876124693837e-05, + "loss": 2.0901, + "step": 2128 + }, + { + "epoch": 0.43, + "learning_rate": 1.958238229935241e-05, + "loss": 2.1926, + "step": 2129 + }, + { + "epoch": 0.43, + "learning_rate": 1.9581888188102375e-05, + "loss": 2.1244, + "step": 2130 + }, + { + "epoch": 0.43, + "learning_rate": 1.958139379095848e-05, + "loss": 2.1583, + "step": 2131 + }, + { + "epoch": 0.43, + "learning_rate": 1.958089910793548e-05, + "loss": 2.144, + "step": 2132 + }, + { + "epoch": 0.43, + "learning_rate": 1.9580404139048123e-05, + "loss": 2.134, + "step": 2133 + }, + { + "epoch": 0.43, + "learning_rate": 1.9579908884311186e-05, + "loss": 2.1174, + "step": 2134 + }, + { + "epoch": 0.43, + "learning_rate": 1.9579413343739448e-05, + "loss": 2.1049, + "step": 2135 + }, + { + "epoch": 0.43, + "learning_rate": 1.9578917517347686e-05, + "loss": 2.209, + "step": 2136 + }, + { + "epoch": 0.43, + "learning_rate": 1.95784214051507e-05, + "loss": 2.2111, + "step": 2137 + }, + { + "epoch": 0.43, + "learning_rate": 1.957792500716329e-05, + "loss": 2.1489, + "step": 2138 + }, + { + "epoch": 0.43, + "learning_rate": 1.957742832340027e-05, + "loss": 2.1392, + "step": 2139 + }, + { + "epoch": 0.43, + "learning_rate": 1.9576931353876454e-05, + "loss": 2.2564, + "step": 2140 + }, + { + "epoch": 0.43, + "learning_rate": 1.9576434098606674e-05, + "loss": 2.2315, + "step": 2141 + }, + { + "epoch": 0.43, + "learning_rate": 1.9575936557605765e-05, + "loss": 2.226, + "step": 2142 + }, + { + "epoch": 0.43, + "learning_rate": 1.9575438730888575e-05, + "loss": 2.1498, + "step": 2143 + }, + { + "epoch": 0.44, + "learning_rate": 1.9574940618469954e-05, + "loss": 2.2158, + "step": 2144 + }, + { + "epoch": 0.44, + "learning_rate": 1.9574442220364768e-05, + "loss": 2.1268, + "step": 2145 + }, + { + "epoch": 0.44, + "learning_rate": 1.957394353658788e-05, + "loss": 2.1552, + "step": 2146 + }, + { + "epoch": 0.44, + "learning_rate": 1.957344456715418e-05, + "loss": 2.2502, + "step": 2147 + }, + { + "epoch": 0.44, + "learning_rate": 1.957294531207855e-05, + "loss": 2.1458, + "step": 2148 + }, + { + "epoch": 0.44, + "learning_rate": 1.9572445771375882e-05, + "loss": 2.1445, + "step": 2149 + }, + { + "epoch": 0.44, + "learning_rate": 1.9571945945061088e-05, + "loss": 2.1729, + "step": 2150 + }, + { + "epoch": 0.44, + "learning_rate": 1.9571445833149078e-05, + "loss": 2.172, + "step": 2151 + }, + { + "epoch": 0.44, + "learning_rate": 1.9570945435654773e-05, + "loss": 2.2303, + "step": 2152 + }, + { + "epoch": 0.44, + "learning_rate": 1.9570444752593106e-05, + "loss": 2.0949, + "step": 2153 + }, + { + "epoch": 0.44, + "learning_rate": 1.9569943783979016e-05, + "loss": 2.1629, + "step": 2154 + }, + { + "epoch": 0.44, + "learning_rate": 1.9569442529827444e-05, + "loss": 2.1607, + "step": 2155 + }, + { + "epoch": 0.44, + "learning_rate": 1.9568940990153354e-05, + "loss": 2.0994, + "step": 2156 + }, + { + "epoch": 0.44, + "learning_rate": 1.9568439164971705e-05, + "loss": 2.1692, + "step": 2157 + }, + { + "epoch": 0.44, + "learning_rate": 1.9567937054297476e-05, + "loss": 2.0926, + "step": 2158 + }, + { + "epoch": 0.44, + "learning_rate": 1.9567434658145638e-05, + "loss": 2.1115, + "step": 2159 + }, + { + "epoch": 0.44, + "learning_rate": 1.956693197653119e-05, + "loss": 2.2099, + "step": 2160 + }, + { + "epoch": 0.44, + "learning_rate": 1.956642900946913e-05, + "loss": 2.1763, + "step": 2161 + }, + { + "epoch": 0.44, + "learning_rate": 1.956592575697446e-05, + "loss": 2.1995, + "step": 2162 + }, + { + "epoch": 0.44, + "learning_rate": 1.95654222190622e-05, + "loss": 2.2296, + "step": 2163 + }, + { + "epoch": 0.44, + "learning_rate": 1.9564918395747368e-05, + "loss": 2.0673, + "step": 2164 + }, + { + "epoch": 0.44, + "learning_rate": 1.9564414287045002e-05, + "loss": 2.2319, + "step": 2165 + }, + { + "epoch": 0.44, + "learning_rate": 1.9563909892970145e-05, + "loss": 2.1738, + "step": 2166 + }, + { + "epoch": 0.44, + "learning_rate": 1.9563405213537843e-05, + "loss": 2.1947, + "step": 2167 + }, + { + "epoch": 0.44, + "learning_rate": 1.9562900248763153e-05, + "loss": 2.1336, + "step": 2168 + }, + { + "epoch": 0.44, + "learning_rate": 1.9562394998661143e-05, + "loss": 2.1512, + "step": 2169 + }, + { + "epoch": 0.44, + "learning_rate": 1.9561889463246886e-05, + "loss": 2.1081, + "step": 2170 + }, + { + "epoch": 0.44, + "learning_rate": 1.9561383642535472e-05, + "loss": 2.1389, + "step": 2171 + }, + { + "epoch": 0.44, + "learning_rate": 1.9560877536541983e-05, + "loss": 2.1069, + "step": 2172 + }, + { + "epoch": 0.44, + "learning_rate": 1.956037114528153e-05, + "loss": 2.164, + "step": 2173 + }, + { + "epoch": 0.44, + "learning_rate": 1.9559864468769216e-05, + "loss": 2.1538, + "step": 2174 + }, + { + "epoch": 0.44, + "learning_rate": 1.9559357507020163e-05, + "loss": 2.2007, + "step": 2175 + }, + { + "epoch": 0.44, + "learning_rate": 1.9558850260049493e-05, + "loss": 2.1373, + "step": 2176 + }, + { + "epoch": 0.44, + "learning_rate": 1.955834272787234e-05, + "loss": 2.1596, + "step": 2177 + }, + { + "epoch": 0.44, + "learning_rate": 1.9557834910503847e-05, + "loss": 2.1495, + "step": 2178 + }, + { + "epoch": 0.44, + "learning_rate": 1.9557326807959173e-05, + "loss": 2.2256, + "step": 2179 + }, + { + "epoch": 0.44, + "learning_rate": 1.955681842025347e-05, + "loss": 2.1473, + "step": 2180 + }, + { + "epoch": 0.44, + "learning_rate": 1.9556309747401912e-05, + "loss": 2.2018, + "step": 2181 + }, + { + "epoch": 0.44, + "learning_rate": 1.9555800789419674e-05, + "loss": 2.2872, + "step": 2182 + }, + { + "epoch": 0.44, + "learning_rate": 1.955529154632194e-05, + "loss": 2.1644, + "step": 2183 + }, + { + "epoch": 0.44, + "learning_rate": 1.955478201812391e-05, + "loss": 2.2217, + "step": 2184 + }, + { + "epoch": 0.44, + "learning_rate": 1.955427220484078e-05, + "loss": 2.1496, + "step": 2185 + }, + { + "epoch": 0.44, + "learning_rate": 1.955376210648776e-05, + "loss": 2.095, + "step": 2186 + }, + { + "epoch": 0.44, + "learning_rate": 1.955325172308008e-05, + "loss": 2.1889, + "step": 2187 + }, + { + "epoch": 0.44, + "learning_rate": 1.9552741054632963e-05, + "loss": 2.1383, + "step": 2188 + }, + { + "epoch": 0.44, + "learning_rate": 1.9552230101161637e-05, + "loss": 2.18, + "step": 2189 + }, + { + "epoch": 0.44, + "learning_rate": 1.9551718862681363e-05, + "loss": 2.2413, + "step": 2190 + }, + { + "epoch": 0.44, + "learning_rate": 1.9551207339207384e-05, + "loss": 2.1947, + "step": 2191 + }, + { + "epoch": 0.44, + "learning_rate": 1.9550695530754963e-05, + "loss": 2.1723, + "step": 2192 + }, + { + "epoch": 0.45, + "learning_rate": 1.9550183437339375e-05, + "loss": 2.2465, + "step": 2193 + }, + { + "epoch": 0.45, + "learning_rate": 1.95496710589759e-05, + "loss": 2.151, + "step": 2194 + }, + { + "epoch": 0.45, + "learning_rate": 1.9549158395679818e-05, + "loss": 2.2117, + "step": 2195 + }, + { + "epoch": 0.45, + "learning_rate": 1.9548645447466433e-05, + "loss": 2.1413, + "step": 2196 + }, + { + "epoch": 0.45, + "learning_rate": 1.9548132214351043e-05, + "loss": 2.1209, + "step": 2197 + }, + { + "epoch": 0.45, + "learning_rate": 1.9547618696348972e-05, + "loss": 2.1369, + "step": 2198 + }, + { + "epoch": 0.45, + "learning_rate": 1.954710489347553e-05, + "loss": 2.1991, + "step": 2199 + }, + { + "epoch": 0.45, + "learning_rate": 1.9546590805746054e-05, + "loss": 2.1339, + "step": 2200 + }, + { + "epoch": 0.45, + "learning_rate": 1.954607643317588e-05, + "loss": 2.1673, + "step": 2201 + }, + { + "epoch": 0.45, + "learning_rate": 1.954556177578036e-05, + "loss": 2.2146, + "step": 2202 + }, + { + "epoch": 0.45, + "learning_rate": 1.9545046833574843e-05, + "loss": 2.1438, + "step": 2203 + }, + { + "epoch": 0.45, + "learning_rate": 1.9544531606574697e-05, + "loss": 2.1713, + "step": 2204 + }, + { + "epoch": 0.45, + "learning_rate": 1.9544016094795294e-05, + "loss": 2.1666, + "step": 2205 + }, + { + "epoch": 0.45, + "learning_rate": 1.9543500298252017e-05, + "loss": 2.191, + "step": 2206 + }, + { + "epoch": 0.45, + "learning_rate": 1.9542984216960254e-05, + "loss": 2.1841, + "step": 2207 + }, + { + "epoch": 0.45, + "learning_rate": 1.9542467850935405e-05, + "loss": 2.128, + "step": 2208 + }, + { + "epoch": 0.45, + "learning_rate": 1.9541951200192876e-05, + "loss": 2.1664, + "step": 2209 + }, + { + "epoch": 0.45, + "learning_rate": 1.9541434264748078e-05, + "loss": 2.1844, + "step": 2210 + }, + { + "epoch": 0.45, + "learning_rate": 1.954091704461644e-05, + "loss": 2.1731, + "step": 2211 + }, + { + "epoch": 0.45, + "learning_rate": 1.9540399539813393e-05, + "loss": 2.1825, + "step": 2212 + }, + { + "epoch": 0.45, + "learning_rate": 1.9539881750354376e-05, + "loss": 2.1829, + "step": 2213 + }, + { + "epoch": 0.45, + "learning_rate": 1.9539363676254842e-05, + "loss": 2.238, + "step": 2214 + }, + { + "epoch": 0.45, + "learning_rate": 1.9538845317530243e-05, + "loss": 2.0982, + "step": 2215 + }, + { + "epoch": 0.45, + "learning_rate": 1.953832667419605e-05, + "loss": 2.1998, + "step": 2216 + }, + { + "epoch": 0.45, + "learning_rate": 1.953780774626774e-05, + "loss": 2.1636, + "step": 2217 + }, + { + "epoch": 0.45, + "learning_rate": 1.9537288533760787e-05, + "loss": 2.1486, + "step": 2218 + }, + { + "epoch": 0.45, + "learning_rate": 1.9536769036690693e-05, + "loss": 2.1045, + "step": 2219 + }, + { + "epoch": 0.45, + "learning_rate": 1.953624925507295e-05, + "loss": 2.1367, + "step": 2220 + }, + { + "epoch": 0.45, + "learning_rate": 1.953572918892307e-05, + "loss": 2.1511, + "step": 2221 + }, + { + "epoch": 0.45, + "learning_rate": 1.953520883825657e-05, + "loss": 2.2317, + "step": 2222 + }, + { + "epoch": 0.45, + "learning_rate": 1.9534688203088978e-05, + "loss": 2.144, + "step": 2223 + }, + { + "epoch": 0.45, + "learning_rate": 1.9534167283435825e-05, + "loss": 2.103, + "step": 2224 + }, + { + "epoch": 0.45, + "learning_rate": 1.9533646079312656e-05, + "loss": 2.1242, + "step": 2225 + }, + { + "epoch": 0.45, + "learning_rate": 1.953312459073502e-05, + "loss": 2.1451, + "step": 2226 + }, + { + "epoch": 0.45, + "learning_rate": 1.9532602817718474e-05, + "loss": 2.1857, + "step": 2227 + }, + { + "epoch": 0.45, + "learning_rate": 1.9532080760278594e-05, + "loss": 2.2169, + "step": 2228 + }, + { + "epoch": 0.45, + "learning_rate": 1.953155841843095e-05, + "loss": 2.1749, + "step": 2229 + }, + { + "epoch": 0.45, + "learning_rate": 1.9531035792191127e-05, + "loss": 2.1666, + "step": 2230 + }, + { + "epoch": 0.45, + "learning_rate": 1.9530512881574725e-05, + "loss": 2.1376, + "step": 2231 + }, + { + "epoch": 0.45, + "learning_rate": 1.952998968659734e-05, + "loss": 2.2377, + "step": 2232 + }, + { + "epoch": 0.45, + "learning_rate": 1.9529466207274583e-05, + "loss": 2.1216, + "step": 2233 + }, + { + "epoch": 0.45, + "learning_rate": 1.9528942443622075e-05, + "loss": 2.1874, + "step": 2234 + }, + { + "epoch": 0.45, + "learning_rate": 1.9528418395655443e-05, + "loss": 2.2051, + "step": 2235 + }, + { + "epoch": 0.45, + "learning_rate": 1.952789406339032e-05, + "loss": 2.1119, + "step": 2236 + }, + { + "epoch": 0.45, + "learning_rate": 1.9527369446842356e-05, + "loss": 2.152, + "step": 2237 + }, + { + "epoch": 0.45, + "learning_rate": 1.95268445460272e-05, + "loss": 2.1776, + "step": 2238 + }, + { + "epoch": 0.45, + "learning_rate": 1.9526319360960514e-05, + "loss": 2.1927, + "step": 2239 + }, + { + "epoch": 0.45, + "learning_rate": 1.9525793891657973e-05, + "loss": 2.141, + "step": 2240 + }, + { + "epoch": 0.45, + "learning_rate": 1.9525268138135244e-05, + "loss": 2.176, + "step": 2241 + }, + { + "epoch": 0.46, + "learning_rate": 1.9524742100408022e-05, + "loss": 2.1874, + "step": 2242 + }, + { + "epoch": 0.46, + "learning_rate": 1.9524215778492003e-05, + "loss": 2.1735, + "step": 2243 + }, + { + "epoch": 0.46, + "learning_rate": 1.9523689172402888e-05, + "loss": 2.2197, + "step": 2244 + }, + { + "epoch": 0.46, + "learning_rate": 1.952316228215639e-05, + "loss": 2.2692, + "step": 2245 + }, + { + "epoch": 0.46, + "learning_rate": 1.952263510776823e-05, + "loss": 2.192, + "step": 2246 + }, + { + "epoch": 0.46, + "learning_rate": 1.9522107649254135e-05, + "loss": 2.2148, + "step": 2247 + }, + { + "epoch": 0.46, + "learning_rate": 1.952157990662985e-05, + "loss": 2.044, + "step": 2248 + }, + { + "epoch": 0.46, + "learning_rate": 1.9521051879911108e-05, + "loss": 2.2062, + "step": 2249 + }, + { + "epoch": 0.46, + "learning_rate": 1.952052356911368e-05, + "loss": 2.1129, + "step": 2250 + }, + { + "epoch": 0.46, + "learning_rate": 1.9519994974253315e-05, + "loss": 2.1808, + "step": 2251 + }, + { + "epoch": 0.46, + "learning_rate": 1.9519466095345788e-05, + "loss": 2.2165, + "step": 2252 + }, + { + "epoch": 0.46, + "learning_rate": 1.9518936932406888e-05, + "loss": 2.1436, + "step": 2253 + }, + { + "epoch": 0.46, + "learning_rate": 1.9518407485452395e-05, + "loss": 2.2217, + "step": 2254 + }, + { + "epoch": 0.46, + "learning_rate": 1.9517877754498107e-05, + "loss": 2.1545, + "step": 2255 + }, + { + "epoch": 0.46, + "learning_rate": 1.951734773955983e-05, + "loss": 2.1491, + "step": 2256 + }, + { + "epoch": 0.46, + "learning_rate": 1.9516817440653382e-05, + "loss": 2.1278, + "step": 2257 + }, + { + "epoch": 0.46, + "learning_rate": 1.9516286857794582e-05, + "loss": 2.2592, + "step": 2258 + }, + { + "epoch": 0.46, + "learning_rate": 1.951575599099926e-05, + "loss": 2.1222, + "step": 2259 + }, + { + "epoch": 0.46, + "learning_rate": 1.9515224840283255e-05, + "loss": 2.2248, + "step": 2260 + }, + { + "epoch": 0.46, + "learning_rate": 1.951469340566242e-05, + "loss": 2.2058, + "step": 2261 + }, + { + "epoch": 0.46, + "learning_rate": 1.9514161687152602e-05, + "loss": 2.1252, + "step": 2262 + }, + { + "epoch": 0.46, + "learning_rate": 1.9513629684769676e-05, + "loss": 2.1574, + "step": 2263 + }, + { + "epoch": 0.46, + "learning_rate": 1.951309739852951e-05, + "loss": 2.1592, + "step": 2264 + }, + { + "epoch": 0.46, + "learning_rate": 1.951256482844799e-05, + "loss": 2.1623, + "step": 2265 + }, + { + "epoch": 0.46, + "learning_rate": 1.9512031974540998e-05, + "loss": 2.1069, + "step": 2266 + }, + { + "epoch": 0.46, + "learning_rate": 1.9511498836824437e-05, + "loss": 2.187, + "step": 2267 + }, + { + "epoch": 0.46, + "learning_rate": 1.9510965415314218e-05, + "loss": 2.1468, + "step": 2268 + }, + { + "epoch": 0.46, + "learning_rate": 1.9510431710026253e-05, + "loss": 2.1171, + "step": 2269 + }, + { + "epoch": 0.46, + "learning_rate": 1.9509897720976467e-05, + "loss": 2.12, + "step": 2270 + }, + { + "epoch": 0.46, + "learning_rate": 1.950936344818079e-05, + "loss": 2.2043, + "step": 2271 + }, + { + "epoch": 0.46, + "learning_rate": 1.9508828891655167e-05, + "loss": 2.1514, + "step": 2272 + }, + { + "epoch": 0.46, + "learning_rate": 1.9508294051415545e-05, + "loss": 2.1258, + "step": 2273 + }, + { + "epoch": 0.46, + "learning_rate": 1.9507758927477884e-05, + "loss": 2.1767, + "step": 2274 + }, + { + "epoch": 0.46, + "learning_rate": 1.9507223519858148e-05, + "loss": 2.1902, + "step": 2275 + }, + { + "epoch": 0.46, + "learning_rate": 1.9506687828572313e-05, + "loss": 2.0874, + "step": 2276 + }, + { + "epoch": 0.46, + "learning_rate": 1.9506151853636364e-05, + "loss": 2.1434, + "step": 2277 + }, + { + "epoch": 0.46, + "learning_rate": 1.950561559506629e-05, + "loss": 2.1593, + "step": 2278 + }, + { + "epoch": 0.46, + "learning_rate": 1.950507905287809e-05, + "loss": 2.1132, + "step": 2279 + }, + { + "epoch": 0.46, + "learning_rate": 1.950454222708778e-05, + "loss": 2.1165, + "step": 2280 + }, + { + "epoch": 0.46, + "learning_rate": 1.950400511771137e-05, + "loss": 2.1296, + "step": 2281 + }, + { + "epoch": 0.46, + "learning_rate": 1.950346772476489e-05, + "loss": 2.1597, + "step": 2282 + }, + { + "epoch": 0.46, + "learning_rate": 1.950293004826437e-05, + "loss": 2.1306, + "step": 2283 + }, + { + "epoch": 0.46, + "learning_rate": 1.9502392088225857e-05, + "loss": 2.0988, + "step": 2284 + }, + { + "epoch": 0.46, + "learning_rate": 1.95018538446654e-05, + "loss": 2.1453, + "step": 2285 + }, + { + "epoch": 0.46, + "learning_rate": 1.9501315317599056e-05, + "loss": 2.1793, + "step": 2286 + }, + { + "epoch": 0.46, + "learning_rate": 1.9500776507042896e-05, + "loss": 2.1964, + "step": 2287 + }, + { + "epoch": 0.46, + "learning_rate": 1.9500237413013e-05, + "loss": 2.0982, + "step": 2288 + }, + { + "epoch": 0.46, + "learning_rate": 1.9499698035525446e-05, + "loss": 2.21, + "step": 2289 + }, + { + "epoch": 0.46, + "learning_rate": 1.9499158374596326e-05, + "loss": 2.1523, + "step": 2290 + }, + { + "epoch": 0.47, + "learning_rate": 1.9498618430241755e-05, + "loss": 2.2003, + "step": 2291 + }, + { + "epoch": 0.47, + "learning_rate": 1.9498078202477827e-05, + "loss": 2.1963, + "step": 2292 + }, + { + "epoch": 0.47, + "learning_rate": 1.949753769132067e-05, + "loss": 2.145, + "step": 2293 + }, + { + "epoch": 0.47, + "learning_rate": 1.9496996896786408e-05, + "loss": 2.1168, + "step": 2294 + }, + { + "epoch": 0.47, + "learning_rate": 1.949645581889118e-05, + "loss": 2.1393, + "step": 2295 + }, + { + "epoch": 0.47, + "learning_rate": 1.9495914457651128e-05, + "loss": 2.1129, + "step": 2296 + }, + { + "epoch": 0.47, + "learning_rate": 1.9495372813082403e-05, + "loss": 2.1669, + "step": 2297 + }, + { + "epoch": 0.47, + "learning_rate": 1.949483088520117e-05, + "loss": 2.1611, + "step": 2298 + }, + { + "epoch": 0.47, + "learning_rate": 1.9494288674023592e-05, + "loss": 2.1132, + "step": 2299 + }, + { + "epoch": 0.47, + "learning_rate": 1.9493746179565854e-05, + "loss": 2.0717, + "step": 2300 + }, + { + "epoch": 0.47, + "learning_rate": 1.949320340184414e-05, + "loss": 2.2103, + "step": 2301 + }, + { + "epoch": 0.47, + "learning_rate": 1.9492660340874638e-05, + "loss": 2.1084, + "step": 2302 + }, + { + "epoch": 0.47, + "learning_rate": 1.9492116996673562e-05, + "loss": 2.1174, + "step": 2303 + }, + { + "epoch": 0.47, + "learning_rate": 1.949157336925712e-05, + "loss": 2.1462, + "step": 2304 + }, + { + "epoch": 0.47, + "learning_rate": 1.9491029458641527e-05, + "loss": 2.1899, + "step": 2305 + }, + { + "epoch": 0.47, + "learning_rate": 1.949048526484302e-05, + "loss": 2.1691, + "step": 2306 + }, + { + "epoch": 0.47, + "learning_rate": 1.948994078787783e-05, + "loss": 2.0482, + "step": 2307 + }, + { + "epoch": 0.47, + "learning_rate": 1.9489396027762202e-05, + "loss": 2.2499, + "step": 2308 + }, + { + "epoch": 0.47, + "learning_rate": 1.9488850984512394e-05, + "loss": 2.1895, + "step": 2309 + }, + { + "epoch": 0.47, + "learning_rate": 1.9488305658144666e-05, + "loss": 2.2019, + "step": 2310 + }, + { + "epoch": 0.47, + "learning_rate": 1.948776004867529e-05, + "loss": 2.1537, + "step": 2311 + }, + { + "epoch": 0.47, + "learning_rate": 1.9487214156120546e-05, + "loss": 2.0967, + "step": 2312 + }, + { + "epoch": 0.47, + "learning_rate": 1.9486667980496718e-05, + "loss": 2.2067, + "step": 2313 + }, + { + "epoch": 0.47, + "learning_rate": 1.9486121521820106e-05, + "loss": 2.2231, + "step": 2314 + }, + { + "epoch": 0.47, + "learning_rate": 1.9485574780107015e-05, + "loss": 2.189, + "step": 2315 + }, + { + "epoch": 0.47, + "learning_rate": 1.9485027755373755e-05, + "loss": 2.1638, + "step": 2316 + }, + { + "epoch": 0.47, + "learning_rate": 1.948448044763665e-05, + "loss": 2.1957, + "step": 2317 + }, + { + "epoch": 0.47, + "learning_rate": 1.9483932856912024e-05, + "loss": 2.1057, + "step": 2318 + }, + { + "epoch": 0.47, + "learning_rate": 1.9483384983216225e-05, + "loss": 2.1957, + "step": 2319 + }, + { + "epoch": 0.47, + "learning_rate": 1.9482836826565593e-05, + "loss": 2.1406, + "step": 2320 + }, + { + "epoch": 0.47, + "learning_rate": 1.9482288386976487e-05, + "loss": 2.1633, + "step": 2321 + }, + { + "epoch": 0.47, + "learning_rate": 1.948173966446527e-05, + "loss": 2.0714, + "step": 2322 + }, + { + "epoch": 0.47, + "learning_rate": 1.948119065904831e-05, + "loss": 2.1903, + "step": 2323 + }, + { + "epoch": 0.47, + "learning_rate": 1.9480641370741994e-05, + "loss": 2.0932, + "step": 2324 + }, + { + "epoch": 0.47, + "learning_rate": 1.9480091799562706e-05, + "loss": 2.0496, + "step": 2325 + }, + { + "epoch": 0.47, + "learning_rate": 1.9479541945526844e-05, + "loss": 2.1595, + "step": 2326 + }, + { + "epoch": 0.47, + "learning_rate": 1.947899180865082e-05, + "loss": 2.1189, + "step": 2327 + }, + { + "epoch": 0.47, + "learning_rate": 1.9478441388951043e-05, + "loss": 2.1493, + "step": 2328 + }, + { + "epoch": 0.47, + "learning_rate": 1.9477890686443938e-05, + "loss": 2.079, + "step": 2329 + }, + { + "epoch": 0.47, + "learning_rate": 1.947733970114593e-05, + "loss": 2.192, + "step": 2330 + }, + { + "epoch": 0.47, + "learning_rate": 1.9476788433073465e-05, + "loss": 2.1736, + "step": 2331 + }, + { + "epoch": 0.47, + "learning_rate": 1.9476236882242992e-05, + "loss": 2.2774, + "step": 2332 + }, + { + "epoch": 0.47, + "learning_rate": 1.9475685048670963e-05, + "loss": 2.0695, + "step": 2333 + }, + { + "epoch": 0.47, + "learning_rate": 1.9475132932373844e-05, + "loss": 2.1534, + "step": 2334 + }, + { + "epoch": 0.47, + "learning_rate": 1.9474580533368116e-05, + "loss": 2.1915, + "step": 2335 + }, + { + "epoch": 0.47, + "learning_rate": 1.9474027851670254e-05, + "loss": 2.1867, + "step": 2336 + }, + { + "epoch": 0.47, + "learning_rate": 1.9473474887296744e-05, + "loss": 2.178, + "step": 2337 + }, + { + "epoch": 0.47, + "learning_rate": 1.9472921640264092e-05, + "loss": 2.1642, + "step": 2338 + }, + { + "epoch": 0.47, + "learning_rate": 1.94723681105888e-05, + "loss": 2.1967, + "step": 2339 + }, + { + "epoch": 0.47, + "learning_rate": 1.947181429828739e-05, + "loss": 2.1416, + "step": 2340 + }, + { + "epoch": 0.48, + "learning_rate": 1.9471260203376383e-05, + "loss": 2.2426, + "step": 2341 + }, + { + "epoch": 0.48, + "learning_rate": 1.9470705825872306e-05, + "loss": 2.206, + "step": 2342 + }, + { + "epoch": 0.48, + "learning_rate": 1.947015116579171e-05, + "loss": 2.102, + "step": 2343 + }, + { + "epoch": 0.48, + "learning_rate": 1.9469596223151138e-05, + "loss": 2.1565, + "step": 2344 + }, + { + "epoch": 0.48, + "learning_rate": 1.946904099796715e-05, + "loss": 2.161, + "step": 2345 + }, + { + "epoch": 0.48, + "learning_rate": 1.946848549025631e-05, + "loss": 2.2127, + "step": 2346 + }, + { + "epoch": 0.48, + "learning_rate": 1.9467929700035194e-05, + "loss": 2.1621, + "step": 2347 + }, + { + "epoch": 0.48, + "learning_rate": 1.9467373627320385e-05, + "loss": 2.1395, + "step": 2348 + }, + { + "epoch": 0.48, + "learning_rate": 1.9466817272128472e-05, + "loss": 2.1222, + "step": 2349 + }, + { + "epoch": 0.48, + "learning_rate": 1.946626063447606e-05, + "loss": 2.0447, + "step": 2350 + }, + { + "epoch": 0.48, + "learning_rate": 1.9465703714379755e-05, + "loss": 2.1347, + "step": 2351 + }, + { + "epoch": 0.48, + "learning_rate": 1.9465146511856172e-05, + "loss": 2.1485, + "step": 2352 + }, + { + "epoch": 0.48, + "learning_rate": 1.946458902692194e-05, + "loss": 2.175, + "step": 2353 + }, + { + "epoch": 0.48, + "learning_rate": 1.946403125959369e-05, + "loss": 2.1641, + "step": 2354 + }, + { + "epoch": 0.48, + "learning_rate": 1.9463473209888063e-05, + "loss": 2.1805, + "step": 2355 + }, + { + "epoch": 0.48, + "learning_rate": 1.946291487782171e-05, + "loss": 2.2096, + "step": 2356 + }, + { + "epoch": 0.48, + "learning_rate": 1.946235626341129e-05, + "loss": 2.1254, + "step": 2357 + }, + { + "epoch": 0.48, + "learning_rate": 1.946179736667347e-05, + "loss": 2.17, + "step": 2358 + }, + { + "epoch": 0.48, + "learning_rate": 1.946123818762493e-05, + "loss": 2.1003, + "step": 2359 + }, + { + "epoch": 0.48, + "learning_rate": 1.946067872628235e-05, + "loss": 2.147, + "step": 2360 + }, + { + "epoch": 0.48, + "learning_rate": 1.9460118982662427e-05, + "loss": 2.2079, + "step": 2361 + }, + { + "epoch": 0.48, + "learning_rate": 1.9459558956781855e-05, + "loss": 2.1682, + "step": 2362 + }, + { + "epoch": 0.48, + "learning_rate": 1.9458998648657346e-05, + "loss": 2.2009, + "step": 2363 + }, + { + "epoch": 0.48, + "learning_rate": 1.945843805830562e-05, + "loss": 2.1302, + "step": 2364 + }, + { + "epoch": 0.48, + "learning_rate": 1.9457877185743405e-05, + "loss": 2.2939, + "step": 2365 + }, + { + "epoch": 0.48, + "learning_rate": 1.9457316030987424e-05, + "loss": 2.1269, + "step": 2366 + }, + { + "epoch": 0.48, + "learning_rate": 1.9456754594054435e-05, + "loss": 2.1577, + "step": 2367 + }, + { + "epoch": 0.48, + "learning_rate": 1.9456192874961185e-05, + "loss": 2.2172, + "step": 2368 + }, + { + "epoch": 0.48, + "learning_rate": 1.9455630873724432e-05, + "loss": 2.1784, + "step": 2369 + }, + { + "epoch": 0.48, + "learning_rate": 1.9455068590360943e-05, + "loss": 2.2045, + "step": 2370 + }, + { + "epoch": 0.48, + "learning_rate": 1.94545060248875e-05, + "loss": 2.1674, + "step": 2371 + }, + { + "epoch": 0.48, + "learning_rate": 1.9453943177320877e-05, + "loss": 2.1035, + "step": 2372 + }, + { + "epoch": 0.48, + "learning_rate": 1.945338004767788e-05, + "loss": 2.1788, + "step": 2373 + }, + { + "epoch": 0.48, + "learning_rate": 1.945281663597531e-05, + "loss": 2.1946, + "step": 2374 + }, + { + "epoch": 0.48, + "learning_rate": 1.945225294222997e-05, + "loss": 2.0796, + "step": 2375 + }, + { + "epoch": 0.48, + "learning_rate": 1.9451688966458683e-05, + "loss": 2.1657, + "step": 2376 + }, + { + "epoch": 0.48, + "learning_rate": 1.9451124708678274e-05, + "loss": 2.1592, + "step": 2377 + }, + { + "epoch": 0.48, + "learning_rate": 1.9450560168905587e-05, + "loss": 2.1617, + "step": 2378 + }, + { + "epoch": 0.48, + "learning_rate": 1.9449995347157454e-05, + "loss": 2.1975, + "step": 2379 + }, + { + "epoch": 0.48, + "learning_rate": 1.9449430243450736e-05, + "loss": 2.0855, + "step": 2380 + }, + { + "epoch": 0.48, + "learning_rate": 1.944886485780229e-05, + "loss": 2.125, + "step": 2381 + }, + { + "epoch": 0.48, + "learning_rate": 1.9448299190228992e-05, + "loss": 2.113, + "step": 2382 + }, + { + "epoch": 0.48, + "learning_rate": 1.944773324074771e-05, + "loss": 2.106, + "step": 2383 + }, + { + "epoch": 0.48, + "learning_rate": 1.9447167009375336e-05, + "loss": 2.1897, + "step": 2384 + }, + { + "epoch": 0.48, + "learning_rate": 1.944660049612876e-05, + "loss": 2.2343, + "step": 2385 + }, + { + "epoch": 0.48, + "learning_rate": 1.944603370102489e-05, + "loss": 2.1391, + "step": 2386 + }, + { + "epoch": 0.48, + "learning_rate": 1.9445466624080637e-05, + "loss": 2.1441, + "step": 2387 + }, + { + "epoch": 0.48, + "learning_rate": 1.9444899265312923e-05, + "loss": 2.1352, + "step": 2388 + }, + { + "epoch": 0.48, + "learning_rate": 1.9444331624738665e-05, + "loss": 2.1534, + "step": 2389 + }, + { + "epoch": 0.49, + "learning_rate": 1.9443763702374815e-05, + "loss": 2.1323, + "step": 2390 + }, + { + "epoch": 0.49, + "learning_rate": 1.9443195498238302e-05, + "loss": 2.1687, + "step": 2391 + }, + { + "epoch": 0.49, + "learning_rate": 1.9442627012346094e-05, + "loss": 2.1125, + "step": 2392 + }, + { + "epoch": 0.49, + "learning_rate": 1.9442058244715143e-05, + "loss": 2.1395, + "step": 2393 + }, + { + "epoch": 0.49, + "learning_rate": 1.9441489195362427e-05, + "loss": 2.2364, + "step": 2394 + }, + { + "epoch": 0.49, + "learning_rate": 1.944091986430492e-05, + "loss": 2.1629, + "step": 2395 + }, + { + "epoch": 0.49, + "learning_rate": 1.944035025155961e-05, + "loss": 2.2104, + "step": 2396 + }, + { + "epoch": 0.49, + "learning_rate": 1.943978035714349e-05, + "loss": 2.1834, + "step": 2397 + }, + { + "epoch": 0.49, + "learning_rate": 1.9439210181073566e-05, + "loss": 2.2373, + "step": 2398 + }, + { + "epoch": 0.49, + "learning_rate": 1.9438639723366852e-05, + "loss": 2.1337, + "step": 2399 + }, + { + "epoch": 0.49, + "learning_rate": 1.9438068984040366e-05, + "loss": 2.1016, + "step": 2400 + }, + { + "epoch": 0.49, + "learning_rate": 1.9437497963111142e-05, + "loss": 2.2227, + "step": 2401 + }, + { + "epoch": 0.49, + "learning_rate": 1.9436926660596206e-05, + "loss": 2.1872, + "step": 2402 + }, + { + "epoch": 0.49, + "learning_rate": 1.9436355076512618e-05, + "loss": 2.1378, + "step": 2403 + }, + { + "epoch": 0.49, + "learning_rate": 1.9435783210877422e-05, + "loss": 2.1128, + "step": 2404 + }, + { + "epoch": 0.49, + "learning_rate": 1.943521106370769e-05, + "loss": 2.1523, + "step": 2405 + }, + { + "epoch": 0.49, + "learning_rate": 1.943463863502048e-05, + "loss": 2.0806, + "step": 2406 + }, + { + "epoch": 0.49, + "learning_rate": 1.9434065924832885e-05, + "loss": 2.1375, + "step": 2407 + }, + { + "epoch": 0.49, + "learning_rate": 1.9433492933161986e-05, + "loss": 2.1504, + "step": 2408 + }, + { + "epoch": 0.49, + "learning_rate": 1.9432919660024878e-05, + "loss": 2.2033, + "step": 2409 + }, + { + "epoch": 0.49, + "learning_rate": 1.943234610543867e-05, + "loss": 2.1346, + "step": 2410 + }, + { + "epoch": 0.49, + "learning_rate": 1.9431772269420475e-05, + "loss": 2.2132, + "step": 2411 + }, + { + "epoch": 0.49, + "learning_rate": 1.943119815198741e-05, + "loss": 2.0995, + "step": 2412 + }, + { + "epoch": 0.49, + "learning_rate": 1.9430623753156607e-05, + "loss": 2.1493, + "step": 2413 + }, + { + "epoch": 0.49, + "learning_rate": 1.9430049072945206e-05, + "loss": 2.1639, + "step": 2414 + }, + { + "epoch": 0.49, + "learning_rate": 1.942947411137035e-05, + "loss": 2.2236, + "step": 2415 + }, + { + "epoch": 0.49, + "learning_rate": 1.94288988684492e-05, + "loss": 2.1626, + "step": 2416 + }, + { + "epoch": 0.49, + "learning_rate": 1.9428323344198917e-05, + "loss": 2.1481, + "step": 2417 + }, + { + "epoch": 0.49, + "learning_rate": 1.9427747538636666e-05, + "loss": 2.1442, + "step": 2418 + }, + { + "epoch": 0.49, + "learning_rate": 1.9427171451779642e-05, + "loss": 2.0907, + "step": 2419 + }, + { + "epoch": 0.49, + "learning_rate": 1.9426595083645016e-05, + "loss": 2.1406, + "step": 2420 + }, + { + "epoch": 0.49, + "learning_rate": 1.942601843425e-05, + "loss": 2.195, + "step": 2421 + }, + { + "epoch": 0.49, + "learning_rate": 1.942544150361179e-05, + "loss": 2.2731, + "step": 2422 + }, + { + "epoch": 0.49, + "learning_rate": 1.9424864291747606e-05, + "loss": 2.1496, + "step": 2423 + }, + { + "epoch": 0.49, + "learning_rate": 1.9424286798674665e-05, + "loss": 2.1843, + "step": 2424 + }, + { + "epoch": 0.49, + "learning_rate": 1.94237090244102e-05, + "loss": 2.1358, + "step": 2425 + }, + { + "epoch": 0.49, + "learning_rate": 1.942313096897145e-05, + "loss": 2.2, + "step": 2426 + }, + { + "epoch": 0.49, + "learning_rate": 1.942255263237566e-05, + "loss": 2.1857, + "step": 2427 + }, + { + "epoch": 0.49, + "learning_rate": 1.9421974014640094e-05, + "loss": 2.1945, + "step": 2428 + }, + { + "epoch": 0.49, + "learning_rate": 1.9421395115782004e-05, + "loss": 2.1255, + "step": 2429 + }, + { + "epoch": 0.49, + "learning_rate": 1.9420815935818673e-05, + "loss": 2.1325, + "step": 2430 + }, + { + "epoch": 0.49, + "learning_rate": 1.9420236474767377e-05, + "loss": 2.1606, + "step": 2431 + }, + { + "epoch": 0.49, + "learning_rate": 1.9419656732645406e-05, + "loss": 2.173, + "step": 2432 + }, + { + "epoch": 0.49, + "learning_rate": 1.9419076709470053e-05, + "loss": 2.0752, + "step": 2433 + }, + { + "epoch": 0.49, + "learning_rate": 1.9418496405258634e-05, + "loss": 2.1423, + "step": 2434 + }, + { + "epoch": 0.49, + "learning_rate": 1.9417915820028456e-05, + "loss": 2.1971, + "step": 2435 + }, + { + "epoch": 0.49, + "learning_rate": 1.9417334953796847e-05, + "loss": 2.2164, + "step": 2436 + }, + { + "epoch": 0.49, + "learning_rate": 1.9416753806581132e-05, + "loss": 2.104, + "step": 2437 + }, + { + "epoch": 0.49, + "learning_rate": 1.9416172378398656e-05, + "loss": 2.1436, + "step": 2438 + }, + { + "epoch": 0.5, + "learning_rate": 1.9415590669266766e-05, + "loss": 2.1454, + "step": 2439 + }, + { + "epoch": 0.5, + "learning_rate": 1.9415008679202816e-05, + "loss": 2.1438, + "step": 2440 + }, + { + "epoch": 0.5, + "learning_rate": 1.941442640822417e-05, + "loss": 2.1537, + "step": 2441 + }, + { + "epoch": 0.5, + "learning_rate": 1.941384385634821e-05, + "loss": 2.2119, + "step": 2442 + }, + { + "epoch": 0.5, + "learning_rate": 1.9413261023592306e-05, + "loss": 2.0992, + "step": 2443 + }, + { + "epoch": 0.5, + "learning_rate": 1.9412677909973853e-05, + "loss": 2.1527, + "step": 2444 + }, + { + "epoch": 0.5, + "learning_rate": 1.941209451551025e-05, + "loss": 2.1586, + "step": 2445 + }, + { + "epoch": 0.5, + "learning_rate": 1.9411510840218902e-05, + "loss": 2.2132, + "step": 2446 + }, + { + "epoch": 0.5, + "learning_rate": 1.9410926884117227e-05, + "loss": 2.1159, + "step": 2447 + }, + { + "epoch": 0.5, + "learning_rate": 1.9410342647222643e-05, + "loss": 2.1762, + "step": 2448 + }, + { + "epoch": 0.5, + "learning_rate": 1.940975812955259e-05, + "loss": 2.1034, + "step": 2449 + }, + { + "epoch": 0.5, + "learning_rate": 1.9409173331124498e-05, + "loss": 2.1486, + "step": 2450 + }, + { + "epoch": 0.5, + "learning_rate": 1.9408588251955825e-05, + "loss": 2.1591, + "step": 2451 + }, + { + "epoch": 0.5, + "learning_rate": 1.940800289206402e-05, + "loss": 2.2275, + "step": 2452 + }, + { + "epoch": 0.5, + "learning_rate": 1.9407417251466555e-05, + "loss": 2.223, + "step": 2453 + }, + { + "epoch": 0.5, + "learning_rate": 1.9406831330180904e-05, + "loss": 2.2375, + "step": 2454 + }, + { + "epoch": 0.5, + "learning_rate": 1.9406245128224543e-05, + "loss": 2.1571, + "step": 2455 + }, + { + "epoch": 0.5, + "learning_rate": 1.940565864561496e-05, + "loss": 2.1203, + "step": 2456 + }, + { + "epoch": 0.5, + "learning_rate": 1.9405071882369667e-05, + "loss": 2.158, + "step": 2457 + }, + { + "epoch": 0.5, + "learning_rate": 1.940448483850616e-05, + "loss": 2.1674, + "step": 2458 + }, + { + "epoch": 0.5, + "learning_rate": 1.9403897514041962e-05, + "loss": 2.1621, + "step": 2459 + }, + { + "epoch": 0.5, + "learning_rate": 1.940330990899459e-05, + "loss": 2.1314, + "step": 2460 + }, + { + "epoch": 0.5, + "learning_rate": 1.940272202338158e-05, + "loss": 2.1666, + "step": 2461 + }, + { + "epoch": 0.5, + "learning_rate": 1.940213385722047e-05, + "loss": 2.2179, + "step": 2462 + }, + { + "epoch": 0.5, + "learning_rate": 1.940154541052881e-05, + "loss": 2.1113, + "step": 2463 + }, + { + "epoch": 0.5, + "learning_rate": 1.940095668332416e-05, + "loss": 2.2153, + "step": 2464 + }, + { + "epoch": 0.5, + "learning_rate": 1.9400367675624085e-05, + "loss": 2.1775, + "step": 2465 + }, + { + "epoch": 0.5, + "learning_rate": 1.939977838744616e-05, + "loss": 2.2093, + "step": 2466 + }, + { + "epoch": 0.5, + "learning_rate": 1.9399188818807966e-05, + "loss": 2.1463, + "step": 2467 + }, + { + "epoch": 0.5, + "learning_rate": 1.9398598969727088e-05, + "loss": 2.1484, + "step": 2468 + }, + { + "epoch": 0.5, + "learning_rate": 1.939800884022114e-05, + "loss": 2.1515, + "step": 2469 + }, + { + "epoch": 0.5, + "learning_rate": 1.9397418430307714e-05, + "loss": 2.1784, + "step": 2470 + }, + { + "epoch": 0.5, + "learning_rate": 1.9396827740004437e-05, + "loss": 2.1776, + "step": 2471 + }, + { + "epoch": 0.5, + "learning_rate": 1.9396236769328927e-05, + "loss": 2.1909, + "step": 2472 + }, + { + "epoch": 0.5, + "learning_rate": 1.9395645518298818e-05, + "loss": 2.1135, + "step": 2473 + }, + { + "epoch": 0.5, + "learning_rate": 1.9395053986931753e-05, + "loss": 2.1692, + "step": 2474 + }, + { + "epoch": 0.5, + "learning_rate": 1.9394462175245382e-05, + "loss": 2.1955, + "step": 2475 + }, + { + "epoch": 0.5, + "learning_rate": 1.9393870083257357e-05, + "loss": 2.1385, + "step": 2476 + }, + { + "epoch": 0.5, + "learning_rate": 1.9393277710985354e-05, + "loss": 2.1664, + "step": 2477 + }, + { + "epoch": 0.5, + "learning_rate": 1.939268505844704e-05, + "loss": 2.2201, + "step": 2478 + }, + { + "epoch": 0.5, + "learning_rate": 1.9392092125660097e-05, + "loss": 2.1136, + "step": 2479 + }, + { + "epoch": 0.5, + "learning_rate": 1.9391498912642222e-05, + "loss": 2.1938, + "step": 2480 + }, + { + "epoch": 0.5, + "learning_rate": 1.939090541941111e-05, + "loss": 2.1435, + "step": 2481 + }, + { + "epoch": 0.5, + "learning_rate": 1.9390311645984473e-05, + "loss": 2.1835, + "step": 2482 + }, + { + "epoch": 0.5, + "learning_rate": 1.9389717592380025e-05, + "loss": 2.1706, + "step": 2483 + }, + { + "epoch": 0.5, + "learning_rate": 1.938912325861549e-05, + "loss": 2.202, + "step": 2484 + }, + { + "epoch": 0.5, + "learning_rate": 1.9388528644708603e-05, + "loss": 2.1095, + "step": 2485 + }, + { + "epoch": 0.5, + "learning_rate": 1.9387933750677103e-05, + "loss": 2.1213, + "step": 2486 + }, + { + "epoch": 0.5, + "learning_rate": 1.9387338576538743e-05, + "loss": 2.1633, + "step": 2487 + }, + { + "epoch": 0.51, + "learning_rate": 1.938674312231128e-05, + "loss": 2.1205, + "step": 2488 + }, + { + "epoch": 0.51, + "learning_rate": 1.9386147388012477e-05, + "loss": 2.1423, + "step": 2489 + }, + { + "epoch": 0.51, + "learning_rate": 1.9385551373660113e-05, + "loss": 2.1862, + "step": 2490 + }, + { + "epoch": 0.51, + "learning_rate": 1.938495507927197e-05, + "loss": 2.1249, + "step": 2491 + }, + { + "epoch": 0.51, + "learning_rate": 1.938435850486584e-05, + "loss": 2.1666, + "step": 2492 + }, + { + "epoch": 0.51, + "learning_rate": 1.938376165045952e-05, + "loss": 2.1928, + "step": 2493 + }, + { + "epoch": 0.51, + "learning_rate": 1.9383164516070824e-05, + "loss": 2.1544, + "step": 2494 + }, + { + "epoch": 0.51, + "learning_rate": 1.9382567101717564e-05, + "loss": 2.1575, + "step": 2495 + }, + { + "epoch": 0.51, + "learning_rate": 1.9381969407417566e-05, + "loss": 2.1837, + "step": 2496 + }, + { + "epoch": 0.51, + "learning_rate": 1.9381371433188666e-05, + "loss": 2.2225, + "step": 2497 + }, + { + "epoch": 0.51, + "learning_rate": 1.9380773179048703e-05, + "loss": 2.1269, + "step": 2498 + }, + { + "epoch": 0.51, + "learning_rate": 1.9380174645015525e-05, + "loss": 2.1086, + "step": 2499 + }, + { + "epoch": 0.51, + "learning_rate": 1.9379575831106995e-05, + "loss": 2.1784, + "step": 2500 + }, + { + "epoch": 0.51, + "learning_rate": 1.9378976737340977e-05, + "loss": 2.109, + "step": 2501 + }, + { + "epoch": 0.51, + "learning_rate": 1.937837736373535e-05, + "loss": 2.1548, + "step": 2502 + }, + { + "epoch": 0.51, + "learning_rate": 1.937777771030799e-05, + "loss": 2.13, + "step": 2503 + }, + { + "epoch": 0.51, + "learning_rate": 1.9377177777076793e-05, + "loss": 1.9998, + "step": 2504 + }, + { + "epoch": 0.51, + "learning_rate": 1.937657756405966e-05, + "loss": 2.1789, + "step": 2505 + }, + { + "epoch": 0.51, + "learning_rate": 1.9375977071274498e-05, + "loss": 2.1002, + "step": 2506 + }, + { + "epoch": 0.51, + "learning_rate": 1.937537629873923e-05, + "loss": 2.1736, + "step": 2507 + }, + { + "epoch": 0.51, + "learning_rate": 1.937477524647177e-05, + "loss": 2.1294, + "step": 2508 + }, + { + "epoch": 0.51, + "learning_rate": 1.9374173914490056e-05, + "loss": 2.1442, + "step": 2509 + }, + { + "epoch": 0.51, + "learning_rate": 1.9373572302812036e-05, + "loss": 2.1255, + "step": 2510 + }, + { + "epoch": 0.51, + "learning_rate": 1.9372970411455653e-05, + "loss": 2.1849, + "step": 2511 + }, + { + "epoch": 0.51, + "learning_rate": 1.9372368240438864e-05, + "loss": 2.1398, + "step": 2512 + }, + { + "epoch": 0.51, + "learning_rate": 1.9371765789779646e-05, + "loss": 2.1264, + "step": 2513 + }, + { + "epoch": 0.51, + "learning_rate": 1.9371163059495964e-05, + "loss": 2.2195, + "step": 2514 + }, + { + "epoch": 0.51, + "learning_rate": 1.937056004960581e-05, + "loss": 2.1647, + "step": 2515 + }, + { + "epoch": 0.51, + "learning_rate": 1.9369956760127173e-05, + "loss": 2.1654, + "step": 2516 + }, + { + "epoch": 0.51, + "learning_rate": 1.9369353191078045e-05, + "loss": 2.1222, + "step": 2517 + }, + { + "epoch": 0.51, + "learning_rate": 1.9368749342476444e-05, + "loss": 2.1952, + "step": 2518 + }, + { + "epoch": 0.51, + "learning_rate": 1.9368145214340388e-05, + "loss": 2.1606, + "step": 2519 + }, + { + "epoch": 0.51, + "learning_rate": 1.9367540806687894e-05, + "loss": 2.177, + "step": 2520 + }, + { + "epoch": 0.51, + "learning_rate": 1.9366936119537004e-05, + "loss": 2.1523, + "step": 2521 + }, + { + "epoch": 0.51, + "learning_rate": 1.9366331152905757e-05, + "loss": 2.1101, + "step": 2522 + }, + { + "epoch": 0.51, + "learning_rate": 1.9365725906812198e-05, + "loss": 2.1078, + "step": 2523 + }, + { + "epoch": 0.51, + "learning_rate": 1.9365120381274395e-05, + "loss": 2.1767, + "step": 2524 + }, + { + "epoch": 0.51, + "learning_rate": 1.9364514576310408e-05, + "loss": 2.2092, + "step": 2525 + }, + { + "epoch": 0.51, + "learning_rate": 1.9363908491938316e-05, + "loss": 2.144, + "step": 2526 + }, + { + "epoch": 0.51, + "learning_rate": 1.9363302128176204e-05, + "loss": 2.1536, + "step": 2527 + }, + { + "epoch": 0.51, + "learning_rate": 1.9362695485042158e-05, + "loss": 2.1834, + "step": 2528 + }, + { + "epoch": 0.51, + "learning_rate": 1.9362088562554282e-05, + "loss": 2.1412, + "step": 2529 + }, + { + "epoch": 0.51, + "learning_rate": 1.9361481360730687e-05, + "loss": 2.1654, + "step": 2530 + }, + { + "epoch": 0.51, + "learning_rate": 1.9360873879589484e-05, + "loss": 2.2677, + "step": 2531 + }, + { + "epoch": 0.51, + "learning_rate": 1.9360266119148806e-05, + "loss": 2.1351, + "step": 2532 + }, + { + "epoch": 0.51, + "learning_rate": 1.9359658079426783e-05, + "loss": 2.1628, + "step": 2533 + }, + { + "epoch": 0.51, + "learning_rate": 1.935904976044155e-05, + "loss": 2.0842, + "step": 2534 + }, + { + "epoch": 0.51, + "learning_rate": 1.935844116221127e-05, + "loss": 2.1311, + "step": 2535 + }, + { + "epoch": 0.51, + "learning_rate": 1.9357832284754097e-05, + "loss": 2.1287, + "step": 2536 + }, + { + "epoch": 0.51, + "learning_rate": 1.935722312808819e-05, + "loss": 2.174, + "step": 2537 + }, + { + "epoch": 0.52, + "learning_rate": 1.935661369223174e-05, + "loss": 2.2305, + "step": 2538 + }, + { + "epoch": 0.52, + "learning_rate": 1.9356003977202915e-05, + "loss": 2.1143, + "step": 2539 + }, + { + "epoch": 0.52, + "learning_rate": 1.9355393983019915e-05, + "loss": 2.1351, + "step": 2540 + }, + { + "epoch": 0.52, + "learning_rate": 1.935478370970094e-05, + "loss": 2.1502, + "step": 2541 + }, + { + "epoch": 0.52, + "learning_rate": 1.93541731572642e-05, + "loss": 2.1431, + "step": 2542 + }, + { + "epoch": 0.52, + "learning_rate": 1.9353562325727905e-05, + "loss": 2.1587, + "step": 2543 + }, + { + "epoch": 0.52, + "learning_rate": 1.935295121511029e-05, + "loss": 2.2014, + "step": 2544 + }, + { + "epoch": 0.52, + "learning_rate": 1.935233982542958e-05, + "loss": 2.1995, + "step": 2545 + }, + { + "epoch": 0.52, + "learning_rate": 1.9351728156704022e-05, + "loss": 2.2518, + "step": 2546 + }, + { + "epoch": 0.52, + "learning_rate": 1.9351116208951866e-05, + "loss": 2.1526, + "step": 2547 + }, + { + "epoch": 0.52, + "learning_rate": 1.9350503982191368e-05, + "loss": 2.1223, + "step": 2548 + }, + { + "epoch": 0.52, + "learning_rate": 1.9349891476440798e-05, + "loss": 2.1574, + "step": 2549 + }, + { + "epoch": 0.52, + "learning_rate": 1.9349278691718426e-05, + "loss": 2.2297, + "step": 2550 + }, + { + "epoch": 0.52, + "learning_rate": 1.9348665628042544e-05, + "loss": 2.1875, + "step": 2551 + }, + { + "epoch": 0.52, + "learning_rate": 1.934805228543144e-05, + "loss": 2.108, + "step": 2552 + }, + { + "epoch": 0.52, + "learning_rate": 1.9347438663903412e-05, + "loss": 2.0994, + "step": 2553 + }, + { + "epoch": 0.52, + "learning_rate": 1.9346824763476767e-05, + "loss": 2.1085, + "step": 2554 + }, + { + "epoch": 0.52, + "learning_rate": 1.934621058416983e-05, + "loss": 2.2065, + "step": 2555 + }, + { + "epoch": 0.52, + "learning_rate": 1.9345596126000917e-05, + "loss": 2.1902, + "step": 2556 + }, + { + "epoch": 0.52, + "learning_rate": 1.9344981388988365e-05, + "loss": 2.1419, + "step": 2557 + }, + { + "epoch": 0.52, + "learning_rate": 1.9344366373150523e-05, + "loss": 2.1796, + "step": 2558 + }, + { + "epoch": 0.52, + "learning_rate": 1.934375107850573e-05, + "loss": 2.2099, + "step": 2559 + }, + { + "epoch": 0.52, + "learning_rate": 1.934313550507235e-05, + "loss": 2.0866, + "step": 2560 + }, + { + "epoch": 0.52, + "learning_rate": 1.9342519652868752e-05, + "loss": 2.1662, + "step": 2561 + }, + { + "epoch": 0.52, + "learning_rate": 1.9341903521913306e-05, + "loss": 2.1088, + "step": 2562 + }, + { + "epoch": 0.52, + "learning_rate": 1.9341287112224396e-05, + "loss": 2.112, + "step": 2563 + }, + { + "epoch": 0.52, + "learning_rate": 1.9340670423820417e-05, + "loss": 2.2098, + "step": 2564 + }, + { + "epoch": 0.52, + "learning_rate": 1.9340053456719768e-05, + "loss": 2.1142, + "step": 2565 + }, + { + "epoch": 0.52, + "learning_rate": 1.933943621094086e-05, + "loss": 2.1878, + "step": 2566 + }, + { + "epoch": 0.52, + "learning_rate": 1.9338818686502102e-05, + "loss": 2.2168, + "step": 2567 + }, + { + "epoch": 0.52, + "learning_rate": 1.9338200883421924e-05, + "loss": 2.1586, + "step": 2568 + }, + { + "epoch": 0.52, + "learning_rate": 1.9337582801718758e-05, + "loss": 2.1644, + "step": 2569 + }, + { + "epoch": 0.52, + "learning_rate": 1.9336964441411047e-05, + "loss": 2.2167, + "step": 2570 + }, + { + "epoch": 0.52, + "learning_rate": 1.9336345802517246e-05, + "loss": 2.1194, + "step": 2571 + }, + { + "epoch": 0.52, + "learning_rate": 1.9335726885055802e-05, + "loss": 2.1644, + "step": 2572 + }, + { + "epoch": 0.52, + "learning_rate": 1.933510768904519e-05, + "loss": 2.2297, + "step": 2573 + }, + { + "epoch": 0.52, + "learning_rate": 1.933448821450388e-05, + "loss": 2.1486, + "step": 2574 + }, + { + "epoch": 0.52, + "learning_rate": 1.933386846145036e-05, + "loss": 2.2, + "step": 2575 + }, + { + "epoch": 0.52, + "learning_rate": 1.9333248429903117e-05, + "loss": 2.1447, + "step": 2576 + }, + { + "epoch": 0.52, + "learning_rate": 1.9332628119880653e-05, + "loss": 2.1276, + "step": 2577 + }, + { + "epoch": 0.52, + "learning_rate": 1.9332007531401478e-05, + "loss": 2.1712, + "step": 2578 + }, + { + "epoch": 0.52, + "learning_rate": 1.9331386664484104e-05, + "loss": 2.1967, + "step": 2579 + }, + { + "epoch": 0.52, + "learning_rate": 1.9330765519147058e-05, + "loss": 2.2032, + "step": 2580 + }, + { + "epoch": 0.52, + "learning_rate": 1.9330144095408877e-05, + "loss": 2.1194, + "step": 2581 + }, + { + "epoch": 0.52, + "learning_rate": 1.9329522393288096e-05, + "loss": 2.2233, + "step": 2582 + }, + { + "epoch": 0.52, + "learning_rate": 1.9328900412803266e-05, + "loss": 2.2059, + "step": 2583 + }, + { + "epoch": 0.52, + "learning_rate": 1.9328278153972947e-05, + "loss": 2.1109, + "step": 2584 + }, + { + "epoch": 0.52, + "learning_rate": 1.9327655616815706e-05, + "loss": 2.1664, + "step": 2585 + }, + { + "epoch": 0.52, + "learning_rate": 1.9327032801350113e-05, + "loss": 2.2095, + "step": 2586 + }, + { + "epoch": 0.53, + "learning_rate": 1.932640970759476e-05, + "loss": 2.1195, + "step": 2587 + }, + { + "epoch": 0.53, + "learning_rate": 1.9325786335568225e-05, + "loss": 2.1445, + "step": 2588 + }, + { + "epoch": 0.53, + "learning_rate": 1.9325162685289114e-05, + "loss": 2.1836, + "step": 2589 + }, + { + "epoch": 0.53, + "learning_rate": 1.9324538756776042e-05, + "loss": 2.1973, + "step": 2590 + }, + { + "epoch": 0.53, + "learning_rate": 1.9323914550047615e-05, + "loss": 2.2522, + "step": 2591 + }, + { + "epoch": 0.53, + "learning_rate": 1.9323290065122462e-05, + "loss": 2.2331, + "step": 2592 + }, + { + "epoch": 0.53, + "learning_rate": 1.932266530201921e-05, + "loss": 2.1611, + "step": 2593 + }, + { + "epoch": 0.53, + "learning_rate": 1.9322040260756513e-05, + "loss": 2.1725, + "step": 2594 + }, + { + "epoch": 0.53, + "learning_rate": 1.9321414941353006e-05, + "loss": 2.162, + "step": 2595 + }, + { + "epoch": 0.53, + "learning_rate": 1.932078934382735e-05, + "loss": 2.2039, + "step": 2596 + }, + { + "epoch": 0.53, + "learning_rate": 1.9320163468198218e-05, + "loss": 2.1581, + "step": 2597 + }, + { + "epoch": 0.53, + "learning_rate": 1.9319537314484277e-05, + "loss": 2.1295, + "step": 2598 + }, + { + "epoch": 0.53, + "learning_rate": 1.9318910882704215e-05, + "loss": 2.2377, + "step": 2599 + }, + { + "epoch": 0.53, + "learning_rate": 1.931828417287672e-05, + "loss": 2.1718, + "step": 2600 + }, + { + "epoch": 0.53, + "learning_rate": 1.9317657185020485e-05, + "loss": 2.1564, + "step": 2601 + }, + { + "epoch": 0.53, + "learning_rate": 1.931702991915423e-05, + "loss": 2.1692, + "step": 2602 + }, + { + "epoch": 0.53, + "learning_rate": 1.9316402375296662e-05, + "loss": 2.1303, + "step": 2603 + }, + { + "epoch": 0.53, + "learning_rate": 1.9315774553466508e-05, + "loss": 2.2091, + "step": 2604 + }, + { + "epoch": 0.53, + "learning_rate": 1.9315146453682495e-05, + "loss": 2.0384, + "step": 2605 + }, + { + "epoch": 0.53, + "learning_rate": 1.931451807596337e-05, + "loss": 2.1521, + "step": 2606 + }, + { + "epoch": 0.53, + "learning_rate": 1.931388942032788e-05, + "loss": 2.1122, + "step": 2607 + }, + { + "epoch": 0.53, + "learning_rate": 1.9313260486794782e-05, + "loss": 2.16, + "step": 2608 + }, + { + "epoch": 0.53, + "learning_rate": 1.931263127538284e-05, + "loss": 2.214, + "step": 2609 + }, + { + "epoch": 0.53, + "learning_rate": 1.931200178611083e-05, + "loss": 2.2083, + "step": 2610 + }, + { + "epoch": 0.53, + "learning_rate": 1.9311372018997532e-05, + "loss": 2.1769, + "step": 2611 + }, + { + "epoch": 0.53, + "learning_rate": 1.9310741974061736e-05, + "loss": 2.194, + "step": 2612 + }, + { + "epoch": 0.53, + "learning_rate": 1.9310111651322244e-05, + "loss": 2.1898, + "step": 2613 + }, + { + "epoch": 0.53, + "learning_rate": 1.9309481050797857e-05, + "loss": 2.1323, + "step": 2614 + }, + { + "epoch": 0.53, + "learning_rate": 1.9308850172507398e-05, + "loss": 2.1657, + "step": 2615 + }, + { + "epoch": 0.53, + "learning_rate": 1.9308219016469686e-05, + "loss": 2.1204, + "step": 2616 + }, + { + "epoch": 0.53, + "learning_rate": 1.9307587582703552e-05, + "loss": 2.2191, + "step": 2617 + }, + { + "epoch": 0.53, + "learning_rate": 1.9306955871227834e-05, + "loss": 2.154, + "step": 2618 + }, + { + "epoch": 0.53, + "learning_rate": 1.930632388206139e-05, + "loss": 2.1747, + "step": 2619 + }, + { + "epoch": 0.53, + "learning_rate": 1.9305691615223065e-05, + "loss": 2.1532, + "step": 2620 + }, + { + "epoch": 0.53, + "learning_rate": 1.9305059070731727e-05, + "loss": 2.1981, + "step": 2621 + }, + { + "epoch": 0.53, + "learning_rate": 1.9304426248606253e-05, + "loss": 2.2077, + "step": 2622 + }, + { + "epoch": 0.53, + "learning_rate": 1.9303793148865523e-05, + "loss": 2.1608, + "step": 2623 + }, + { + "epoch": 0.53, + "learning_rate": 1.930315977152843e-05, + "loss": 2.1742, + "step": 2624 + }, + { + "epoch": 0.53, + "learning_rate": 1.9302526116613863e-05, + "loss": 2.0753, + "step": 2625 + }, + { + "epoch": 0.53, + "learning_rate": 1.9301892184140735e-05, + "loss": 2.1913, + "step": 2626 + }, + { + "epoch": 0.53, + "learning_rate": 1.9301257974127957e-05, + "loss": 2.1497, + "step": 2627 + }, + { + "epoch": 0.53, + "learning_rate": 1.9300623486594457e-05, + "loss": 2.1105, + "step": 2628 + }, + { + "epoch": 0.53, + "learning_rate": 1.9299988721559164e-05, + "loss": 2.1141, + "step": 2629 + }, + { + "epoch": 0.53, + "learning_rate": 1.9299353679041013e-05, + "loss": 2.1361, + "step": 2630 + }, + { + "epoch": 0.53, + "learning_rate": 1.9298718359058955e-05, + "loss": 2.1624, + "step": 2631 + }, + { + "epoch": 0.53, + "learning_rate": 1.929808276163195e-05, + "loss": 2.172, + "step": 2632 + }, + { + "epoch": 0.53, + "learning_rate": 1.9297446886778957e-05, + "loss": 2.206, + "step": 2633 + }, + { + "epoch": 0.53, + "learning_rate": 1.929681073451895e-05, + "loss": 2.2934, + "step": 2634 + }, + { + "epoch": 0.53, + "learning_rate": 1.9296174304870907e-05, + "loss": 2.1956, + "step": 2635 + }, + { + "epoch": 0.54, + "learning_rate": 1.929553759785382e-05, + "loss": 2.2235, + "step": 2636 + }, + { + "epoch": 0.54, + "learning_rate": 1.929490061348669e-05, + "loss": 2.1898, + "step": 2637 + }, + { + "epoch": 0.54, + "learning_rate": 1.929426335178852e-05, + "loss": 2.1455, + "step": 2638 + }, + { + "epoch": 0.54, + "learning_rate": 1.9293625812778318e-05, + "loss": 2.2041, + "step": 2639 + }, + { + "epoch": 0.54, + "learning_rate": 1.9292987996475113e-05, + "loss": 2.1254, + "step": 2640 + }, + { + "epoch": 0.54, + "learning_rate": 1.929234990289793e-05, + "loss": 2.164, + "step": 2641 + }, + { + "epoch": 0.54, + "learning_rate": 1.9291711532065816e-05, + "loss": 2.1954, + "step": 2642 + }, + { + "epoch": 0.54, + "learning_rate": 1.9291072883997813e-05, + "loss": 2.2055, + "step": 2643 + }, + { + "epoch": 0.54, + "learning_rate": 1.9290433958712972e-05, + "loss": 2.2356, + "step": 2644 + }, + { + "epoch": 0.54, + "learning_rate": 1.9289794756230365e-05, + "loss": 2.1905, + "step": 2645 + }, + { + "epoch": 0.54, + "learning_rate": 1.928915527656906e-05, + "loss": 2.1981, + "step": 2646 + }, + { + "epoch": 0.54, + "learning_rate": 1.9288515519748135e-05, + "loss": 2.1099, + "step": 2647 + }, + { + "epoch": 0.54, + "learning_rate": 1.928787548578668e-05, + "loss": 2.124, + "step": 2648 + }, + { + "epoch": 0.54, + "learning_rate": 1.9287235174703794e-05, + "loss": 2.124, + "step": 2649 + }, + { + "epoch": 0.54, + "learning_rate": 1.9286594586518575e-05, + "loss": 2.1047, + "step": 2650 + }, + { + "epoch": 0.54, + "learning_rate": 1.9285953721250146e-05, + "loss": 2.1106, + "step": 2651 + }, + { + "epoch": 0.54, + "learning_rate": 1.9285312578917623e-05, + "loss": 2.1961, + "step": 2652 + }, + { + "epoch": 0.54, + "learning_rate": 1.9284671159540135e-05, + "loss": 2.1042, + "step": 2653 + }, + { + "epoch": 0.54, + "learning_rate": 1.9284029463136822e-05, + "loss": 2.0897, + "step": 2654 + }, + { + "epoch": 0.54, + "learning_rate": 1.9283387489726827e-05, + "loss": 2.1109, + "step": 2655 + }, + { + "epoch": 0.54, + "learning_rate": 1.928274523932931e-05, + "loss": 2.1777, + "step": 2656 + }, + { + "epoch": 0.54, + "learning_rate": 1.928210271196343e-05, + "loss": 2.1219, + "step": 2657 + }, + { + "epoch": 0.54, + "learning_rate": 1.9281459907648356e-05, + "loss": 2.1444, + "step": 2658 + }, + { + "epoch": 0.54, + "learning_rate": 1.9280816826403274e-05, + "loss": 2.2452, + "step": 2659 + }, + { + "epoch": 0.54, + "learning_rate": 1.9280173468247364e-05, + "loss": 2.092, + "step": 2660 + }, + { + "epoch": 0.54, + "learning_rate": 1.9279529833199825e-05, + "loss": 2.2146, + "step": 2661 + }, + { + "epoch": 0.54, + "learning_rate": 1.9278885921279862e-05, + "loss": 2.237, + "step": 2662 + }, + { + "epoch": 0.54, + "learning_rate": 1.927824173250669e-05, + "loss": 2.17, + "step": 2663 + }, + { + "epoch": 0.54, + "learning_rate": 1.927759726689952e-05, + "loss": 2.2136, + "step": 2664 + }, + { + "epoch": 0.54, + "learning_rate": 1.9276952524477594e-05, + "loss": 2.2403, + "step": 2665 + }, + { + "epoch": 0.54, + "learning_rate": 1.9276307505260137e-05, + "loss": 2.1818, + "step": 2666 + }, + { + "epoch": 0.54, + "learning_rate": 1.92756622092664e-05, + "loss": 2.1458, + "step": 2667 + }, + { + "epoch": 0.54, + "learning_rate": 1.927501663651564e-05, + "loss": 2.1864, + "step": 2668 + }, + { + "epoch": 0.54, + "learning_rate": 1.927437078702711e-05, + "loss": 2.1298, + "step": 2669 + }, + { + "epoch": 0.54, + "learning_rate": 1.9273724660820086e-05, + "loss": 2.1467, + "step": 2670 + }, + { + "epoch": 0.54, + "learning_rate": 1.927307825791385e-05, + "loss": 2.1567, + "step": 2671 + }, + { + "epoch": 0.54, + "learning_rate": 1.9272431578327683e-05, + "loss": 2.2132, + "step": 2672 + }, + { + "epoch": 0.54, + "learning_rate": 1.927178462208088e-05, + "loss": 2.2013, + "step": 2673 + }, + { + "epoch": 0.54, + "learning_rate": 1.9271137389192747e-05, + "loss": 2.1226, + "step": 2674 + }, + { + "epoch": 0.54, + "learning_rate": 1.9270489879682592e-05, + "loss": 2.0937, + "step": 2675 + }, + { + "epoch": 0.54, + "learning_rate": 1.926984209356974e-05, + "loss": 2.1512, + "step": 2676 + }, + { + "epoch": 0.54, + "learning_rate": 1.926919403087351e-05, + "loss": 2.1785, + "step": 2677 + }, + { + "epoch": 0.54, + "learning_rate": 1.9268545691613247e-05, + "loss": 2.1844, + "step": 2678 + }, + { + "epoch": 0.54, + "learning_rate": 1.926789707580829e-05, + "loss": 2.1441, + "step": 2679 + }, + { + "epoch": 0.54, + "learning_rate": 1.9267248183477994e-05, + "loss": 2.1429, + "step": 2680 + }, + { + "epoch": 0.54, + "learning_rate": 1.9266599014641724e-05, + "loss": 2.2061, + "step": 2681 + }, + { + "epoch": 0.54, + "learning_rate": 1.9265949569318842e-05, + "loss": 2.1746, + "step": 2682 + }, + { + "epoch": 0.54, + "learning_rate": 1.9265299847528726e-05, + "loss": 2.2086, + "step": 2683 + }, + { + "epoch": 0.54, + "learning_rate": 1.9264649849290767e-05, + "loss": 2.1582, + "step": 2684 + }, + { + "epoch": 0.55, + "learning_rate": 1.9263999574624357e-05, + "loss": 2.1019, + "step": 2685 + }, + { + "epoch": 0.55, + "learning_rate": 1.9263349023548895e-05, + "loss": 2.1868, + "step": 2686 + }, + { + "epoch": 0.55, + "learning_rate": 1.9262698196083793e-05, + "loss": 2.1504, + "step": 2687 + }, + { + "epoch": 0.55, + "learning_rate": 1.9262047092248473e-05, + "loss": 2.1391, + "step": 2688 + }, + { + "epoch": 0.55, + "learning_rate": 1.9261395712062355e-05, + "loss": 2.2141, + "step": 2689 + }, + { + "epoch": 0.55, + "learning_rate": 1.926074405554488e-05, + "loss": 2.2627, + "step": 2690 + }, + { + "epoch": 0.55, + "learning_rate": 1.926009212271549e-05, + "loss": 2.136, + "step": 2691 + }, + { + "epoch": 0.55, + "learning_rate": 1.9259439913593637e-05, + "loss": 2.1137, + "step": 2692 + }, + { + "epoch": 0.55, + "learning_rate": 1.925878742819878e-05, + "loss": 2.2346, + "step": 2693 + }, + { + "epoch": 0.55, + "learning_rate": 1.925813466655039e-05, + "loss": 2.1282, + "step": 2694 + }, + { + "epoch": 0.55, + "learning_rate": 1.925748162866794e-05, + "loss": 2.0665, + "step": 2695 + }, + { + "epoch": 0.55, + "learning_rate": 1.925682831457091e-05, + "loss": 2.1408, + "step": 2696 + }, + { + "epoch": 0.55, + "learning_rate": 1.9256174724278804e-05, + "loss": 2.1207, + "step": 2697 + }, + { + "epoch": 0.55, + "learning_rate": 1.9255520857811117e-05, + "loss": 2.1772, + "step": 2698 + }, + { + "epoch": 0.55, + "learning_rate": 1.9254866715187357e-05, + "loss": 2.214, + "step": 2699 + }, + { + "epoch": 0.55, + "learning_rate": 1.9254212296427043e-05, + "loss": 2.1819, + "step": 2700 + }, + { + "epoch": 0.55, + "learning_rate": 1.9253557601549704e-05, + "loss": 2.1835, + "step": 2701 + }, + { + "epoch": 0.55, + "learning_rate": 1.925290263057487e-05, + "loss": 2.0105, + "step": 2702 + }, + { + "epoch": 0.55, + "learning_rate": 1.925224738352208e-05, + "loss": 2.1939, + "step": 2703 + }, + { + "epoch": 0.55, + "learning_rate": 1.9251591860410893e-05, + "loss": 2.103, + "step": 2704 + }, + { + "epoch": 0.55, + "learning_rate": 1.9250936061260866e-05, + "loss": 2.1417, + "step": 2705 + }, + { + "epoch": 0.55, + "learning_rate": 1.925027998609156e-05, + "loss": 2.1638, + "step": 2706 + }, + { + "epoch": 0.55, + "learning_rate": 1.9249623634922557e-05, + "loss": 2.1564, + "step": 2707 + }, + { + "epoch": 0.55, + "learning_rate": 1.924896700777344e-05, + "loss": 2.1697, + "step": 2708 + }, + { + "epoch": 0.55, + "learning_rate": 1.9248310104663793e-05, + "loss": 2.123, + "step": 2709 + }, + { + "epoch": 0.55, + "learning_rate": 1.924765292561322e-05, + "loss": 2.1406, + "step": 2710 + }, + { + "epoch": 0.55, + "learning_rate": 1.9246995470641335e-05, + "loss": 2.2177, + "step": 2711 + }, + { + "epoch": 0.55, + "learning_rate": 1.9246337739767748e-05, + "loss": 2.1554, + "step": 2712 + }, + { + "epoch": 0.55, + "learning_rate": 1.9245679733012087e-05, + "loss": 2.0865, + "step": 2713 + }, + { + "epoch": 0.55, + "learning_rate": 1.9245021450393985e-05, + "loss": 2.2033, + "step": 2714 + }, + { + "epoch": 0.55, + "learning_rate": 1.9244362891933077e-05, + "loss": 2.1966, + "step": 2715 + }, + { + "epoch": 0.55, + "learning_rate": 1.924370405764902e-05, + "loss": 2.1706, + "step": 2716 + }, + { + "epoch": 0.55, + "learning_rate": 1.924304494756147e-05, + "loss": 2.2078, + "step": 2717 + }, + { + "epoch": 0.55, + "learning_rate": 1.9242385561690088e-05, + "loss": 2.1822, + "step": 2718 + }, + { + "epoch": 0.55, + "learning_rate": 1.9241725900054554e-05, + "loss": 2.218, + "step": 2719 + }, + { + "epoch": 0.55, + "learning_rate": 1.9241065962674543e-05, + "loss": 2.1717, + "step": 2720 + }, + { + "epoch": 0.55, + "learning_rate": 1.9240405749569755e-05, + "loss": 2.1736, + "step": 2721 + }, + { + "epoch": 0.55, + "learning_rate": 1.923974526075988e-05, + "loss": 2.2359, + "step": 2722 + }, + { + "epoch": 0.55, + "learning_rate": 1.9239084496264633e-05, + "loss": 2.1713, + "step": 2723 + }, + { + "epoch": 0.55, + "learning_rate": 1.923842345610372e-05, + "loss": 2.178, + "step": 2724 + }, + { + "epoch": 0.55, + "learning_rate": 1.9237762140296873e-05, + "loss": 2.1602, + "step": 2725 + }, + { + "epoch": 0.55, + "learning_rate": 1.923710054886382e-05, + "loss": 2.0854, + "step": 2726 + }, + { + "epoch": 0.55, + "learning_rate": 1.92364386818243e-05, + "loss": 2.1157, + "step": 2727 + }, + { + "epoch": 0.55, + "learning_rate": 1.9235776539198058e-05, + "loss": 2.1294, + "step": 2728 + }, + { + "epoch": 0.55, + "learning_rate": 1.923511412100486e-05, + "loss": 2.2267, + "step": 2729 + }, + { + "epoch": 0.55, + "learning_rate": 1.923445142726446e-05, + "loss": 2.1946, + "step": 2730 + }, + { + "epoch": 0.55, + "learning_rate": 1.923378845799664e-05, + "loss": 2.2061, + "step": 2731 + }, + { + "epoch": 0.55, + "learning_rate": 1.9233125213221173e-05, + "loss": 2.1993, + "step": 2732 + }, + { + "epoch": 0.55, + "learning_rate": 1.923246169295785e-05, + "loss": 2.2274, + "step": 2733 + }, + { + "epoch": 0.55, + "learning_rate": 1.9231797897226473e-05, + "loss": 2.2104, + "step": 2734 + }, + { + "epoch": 0.56, + "learning_rate": 1.9231133826046845e-05, + "loss": 2.1285, + "step": 2735 + }, + { + "epoch": 0.56, + "learning_rate": 1.9230469479438775e-05, + "loss": 2.2109, + "step": 2736 + }, + { + "epoch": 0.56, + "learning_rate": 1.9229804857422092e-05, + "loss": 2.175, + "step": 2737 + }, + { + "epoch": 0.56, + "learning_rate": 1.9229139960016624e-05, + "loss": 2.2154, + "step": 2738 + }, + { + "epoch": 0.56, + "learning_rate": 1.922847478724221e-05, + "loss": 2.1365, + "step": 2739 + }, + { + "epoch": 0.56, + "learning_rate": 1.9227809339118694e-05, + "loss": 2.1353, + "step": 2740 + }, + { + "epoch": 0.56, + "learning_rate": 1.9227143615665934e-05, + "loss": 2.1233, + "step": 2741 + }, + { + "epoch": 0.56, + "learning_rate": 1.922647761690379e-05, + "loss": 2.1716, + "step": 2742 + }, + { + "epoch": 0.56, + "learning_rate": 1.9225811342852135e-05, + "loss": 2.1964, + "step": 2743 + }, + { + "epoch": 0.56, + "learning_rate": 1.922514479353085e-05, + "loss": 2.1376, + "step": 2744 + }, + { + "epoch": 0.56, + "learning_rate": 1.922447796895982e-05, + "loss": 2.1828, + "step": 2745 + }, + { + "epoch": 0.56, + "learning_rate": 1.9223810869158943e-05, + "loss": 2.1075, + "step": 2746 + }, + { + "epoch": 0.56, + "learning_rate": 1.9223143494148125e-05, + "loss": 2.1519, + "step": 2747 + }, + { + "epoch": 0.56, + "learning_rate": 1.9222475843947273e-05, + "loss": 2.1391, + "step": 2748 + }, + { + "epoch": 0.56, + "learning_rate": 1.9221807918576313e-05, + "loss": 2.1253, + "step": 2749 + }, + { + "epoch": 0.56, + "learning_rate": 1.922113971805517e-05, + "loss": 2.1759, + "step": 2750 + }, + { + "epoch": 0.56, + "learning_rate": 1.922047124240378e-05, + "loss": 2.1473, + "step": 2751 + }, + { + "epoch": 0.56, + "learning_rate": 1.9219802491642094e-05, + "loss": 2.1791, + "step": 2752 + }, + { + "epoch": 0.56, + "learning_rate": 1.9219133465790062e-05, + "loss": 2.1411, + "step": 2753 + }, + { + "epoch": 0.56, + "learning_rate": 1.921846416486765e-05, + "loss": 2.1762, + "step": 2754 + }, + { + "epoch": 0.56, + "learning_rate": 1.9217794588894816e-05, + "loss": 2.1732, + "step": 2755 + }, + { + "epoch": 0.56, + "learning_rate": 1.9217124737891548e-05, + "loss": 2.128, + "step": 2756 + }, + { + "epoch": 0.56, + "learning_rate": 1.921645461187783e-05, + "loss": 2.1156, + "step": 2757 + }, + { + "epoch": 0.56, + "learning_rate": 1.921578421087366e-05, + "loss": 2.122, + "step": 2758 + }, + { + "epoch": 0.56, + "learning_rate": 1.9215113534899033e-05, + "loss": 2.212, + "step": 2759 + }, + { + "epoch": 0.56, + "learning_rate": 1.9214442583973965e-05, + "loss": 2.1589, + "step": 2760 + }, + { + "epoch": 0.56, + "learning_rate": 1.921377135811848e-05, + "loss": 2.2186, + "step": 2761 + }, + { + "epoch": 0.56, + "learning_rate": 1.921309985735259e-05, + "loss": 2.1469, + "step": 2762 + }, + { + "epoch": 0.56, + "learning_rate": 1.9212428081696347e-05, + "loss": 2.1804, + "step": 2763 + }, + { + "epoch": 0.56, + "learning_rate": 1.9211756031169787e-05, + "loss": 2.228, + "step": 2764 + }, + { + "epoch": 0.56, + "learning_rate": 1.9211083705792958e-05, + "loss": 2.1757, + "step": 2765 + }, + { + "epoch": 0.56, + "learning_rate": 1.9210411105585928e-05, + "loss": 2.158, + "step": 2766 + }, + { + "epoch": 0.56, + "learning_rate": 1.9209738230568762e-05, + "loss": 2.1479, + "step": 2767 + }, + { + "epoch": 0.56, + "learning_rate": 1.920906508076154e-05, + "loss": 2.2233, + "step": 2768 + }, + { + "epoch": 0.56, + "learning_rate": 1.9208391656184338e-05, + "loss": 2.1476, + "step": 2769 + }, + { + "epoch": 0.56, + "learning_rate": 1.9207717956857256e-05, + "loss": 2.1975, + "step": 2770 + }, + { + "epoch": 0.56, + "learning_rate": 1.920704398280039e-05, + "loss": 2.1979, + "step": 2771 + }, + { + "epoch": 0.56, + "learning_rate": 1.9206369734033858e-05, + "loss": 2.0896, + "step": 2772 + }, + { + "epoch": 0.56, + "learning_rate": 1.9205695210577772e-05, + "loss": 2.2627, + "step": 2773 + }, + { + "epoch": 0.56, + "learning_rate": 1.920502041245225e-05, + "loss": 2.1049, + "step": 2774 + }, + { + "epoch": 0.56, + "learning_rate": 1.9204345339677442e-05, + "loss": 2.1037, + "step": 2775 + }, + { + "epoch": 0.56, + "learning_rate": 1.920366999227348e-05, + "loss": 2.1764, + "step": 2776 + }, + { + "epoch": 0.56, + "learning_rate": 1.9202994370260516e-05, + "loss": 2.1825, + "step": 2777 + }, + { + "epoch": 0.56, + "learning_rate": 1.9202318473658707e-05, + "loss": 2.1702, + "step": 2778 + }, + { + "epoch": 0.56, + "learning_rate": 1.920164230248822e-05, + "loss": 2.1026, + "step": 2779 + }, + { + "epoch": 0.56, + "learning_rate": 1.920096585676923e-05, + "loss": 2.161, + "step": 2780 + }, + { + "epoch": 0.56, + "learning_rate": 1.9200289136521924e-05, + "loss": 2.2758, + "step": 2781 + }, + { + "epoch": 0.56, + "learning_rate": 1.9199612141766493e-05, + "loss": 2.218, + "step": 2782 + }, + { + "epoch": 0.56, + "learning_rate": 1.919893487252313e-05, + "loss": 2.1625, + "step": 2783 + }, + { + "epoch": 0.57, + "learning_rate": 1.9198257328812045e-05, + "loss": 2.1101, + "step": 2784 + }, + { + "epoch": 0.57, + "learning_rate": 1.9197579510653456e-05, + "loss": 2.1651, + "step": 2785 + }, + { + "epoch": 0.57, + "learning_rate": 1.9196901418067587e-05, + "loss": 2.1725, + "step": 2786 + }, + { + "epoch": 0.57, + "learning_rate": 1.919622305107467e-05, + "loss": 2.1525, + "step": 2787 + }, + { + "epoch": 0.57, + "learning_rate": 1.9195544409694946e-05, + "loss": 2.1641, + "step": 2788 + }, + { + "epoch": 0.57, + "learning_rate": 1.9194865493948658e-05, + "loss": 2.1217, + "step": 2789 + }, + { + "epoch": 0.57, + "learning_rate": 1.919418630385607e-05, + "loss": 2.1958, + "step": 2790 + }, + { + "epoch": 0.57, + "learning_rate": 1.9193506839437442e-05, + "loss": 2.1776, + "step": 2791 + }, + { + "epoch": 0.57, + "learning_rate": 1.919282710071305e-05, + "loss": 2.1935, + "step": 2792 + }, + { + "epoch": 0.57, + "learning_rate": 1.9192147087703174e-05, + "loss": 2.137, + "step": 2793 + }, + { + "epoch": 0.57, + "learning_rate": 1.91914668004281e-05, + "loss": 2.198, + "step": 2794 + }, + { + "epoch": 0.57, + "learning_rate": 1.9190786238908137e-05, + "loss": 2.1976, + "step": 2795 + }, + { + "epoch": 0.57, + "learning_rate": 1.919010540316358e-05, + "loss": 2.1082, + "step": 2796 + }, + { + "epoch": 0.57, + "learning_rate": 1.9189424293214748e-05, + "loss": 2.1216, + "step": 2797 + }, + { + "epoch": 0.57, + "learning_rate": 1.9188742909081956e-05, + "loss": 2.209, + "step": 2798 + }, + { + "epoch": 0.57, + "learning_rate": 1.9188061250785544e-05, + "loss": 2.1124, + "step": 2799 + }, + { + "epoch": 0.57, + "learning_rate": 1.9187379318345845e-05, + "loss": 2.1396, + "step": 2800 + }, + { + "epoch": 0.57, + "learning_rate": 1.918669711178321e-05, + "loss": 2.1402, + "step": 2801 + }, + { + "epoch": 0.57, + "learning_rate": 1.9186014631117992e-05, + "loss": 2.0936, + "step": 2802 + }, + { + "epoch": 0.57, + "learning_rate": 1.918533187637055e-05, + "loss": 2.1003, + "step": 2803 + }, + { + "epoch": 0.57, + "learning_rate": 1.9184648847561256e-05, + "loss": 2.1683, + "step": 2804 + }, + { + "epoch": 0.57, + "learning_rate": 1.9183965544710495e-05, + "loss": 2.0856, + "step": 2805 + }, + { + "epoch": 0.57, + "learning_rate": 1.918328196783865e-05, + "loss": 2.2124, + "step": 2806 + }, + { + "epoch": 0.57, + "learning_rate": 1.9182598116966122e-05, + "loss": 2.1214, + "step": 2807 + }, + { + "epoch": 0.57, + "learning_rate": 1.9181913992113312e-05, + "loss": 2.2176, + "step": 2808 + }, + { + "epoch": 0.57, + "learning_rate": 1.918122959330063e-05, + "loss": 2.1906, + "step": 2809 + }, + { + "epoch": 0.57, + "learning_rate": 1.9180544920548496e-05, + "loss": 2.182, + "step": 2810 + }, + { + "epoch": 0.57, + "learning_rate": 1.917985997387734e-05, + "loss": 2.1719, + "step": 2811 + }, + { + "epoch": 0.57, + "learning_rate": 1.9179174753307603e-05, + "loss": 2.2103, + "step": 2812 + }, + { + "epoch": 0.57, + "learning_rate": 1.9178489258859726e-05, + "loss": 2.1926, + "step": 2813 + }, + { + "epoch": 0.57, + "learning_rate": 1.917780349055416e-05, + "loss": 2.1753, + "step": 2814 + }, + { + "epoch": 0.57, + "learning_rate": 1.9177117448411368e-05, + "loss": 2.1767, + "step": 2815 + }, + { + "epoch": 0.57, + "learning_rate": 1.917643113245182e-05, + "loss": 2.1314, + "step": 2816 + }, + { + "epoch": 0.57, + "learning_rate": 1.9175744542695992e-05, + "loss": 2.1628, + "step": 2817 + }, + { + "epoch": 0.57, + "learning_rate": 1.9175057679164374e-05, + "loss": 2.2036, + "step": 2818 + }, + { + "epoch": 0.57, + "learning_rate": 1.9174370541877454e-05, + "loss": 2.1473, + "step": 2819 + }, + { + "epoch": 0.57, + "learning_rate": 1.9173683130855737e-05, + "loss": 2.1625, + "step": 2820 + }, + { + "epoch": 0.57, + "learning_rate": 1.9172995446119735e-05, + "loss": 2.1487, + "step": 2821 + }, + { + "epoch": 0.57, + "learning_rate": 1.917230748768996e-05, + "loss": 2.2045, + "step": 2822 + }, + { + "epoch": 0.57, + "learning_rate": 1.917161925558695e-05, + "loss": 2.1127, + "step": 2823 + }, + { + "epoch": 0.57, + "learning_rate": 1.917093074983123e-05, + "loss": 2.1587, + "step": 2824 + }, + { + "epoch": 0.57, + "learning_rate": 1.9170241970443344e-05, + "loss": 2.0673, + "step": 2825 + }, + { + "epoch": 0.57, + "learning_rate": 1.9169552917443844e-05, + "loss": 2.1113, + "step": 2826 + }, + { + "epoch": 0.57, + "learning_rate": 1.916886359085329e-05, + "loss": 2.1708, + "step": 2827 + }, + { + "epoch": 0.57, + "learning_rate": 1.9168173990692253e-05, + "loss": 2.1106, + "step": 2828 + }, + { + "epoch": 0.57, + "learning_rate": 1.91674841169813e-05, + "loss": 2.129, + "step": 2829 + }, + { + "epoch": 0.57, + "learning_rate": 1.9166793969741023e-05, + "loss": 2.1336, + "step": 2830 + }, + { + "epoch": 0.57, + "learning_rate": 1.916610354899201e-05, + "loss": 2.1947, + "step": 2831 + }, + { + "epoch": 0.57, + "learning_rate": 1.916541285475486e-05, + "loss": 2.1254, + "step": 2832 + }, + { + "epoch": 0.58, + "learning_rate": 1.9164721887050182e-05, + "loss": 2.0717, + "step": 2833 + }, + { + "epoch": 0.58, + "learning_rate": 1.916403064589859e-05, + "loss": 2.1392, + "step": 2834 + }, + { + "epoch": 0.58, + "learning_rate": 1.916333913132072e-05, + "loss": 2.1769, + "step": 2835 + }, + { + "epoch": 0.58, + "learning_rate": 1.916264734333719e-05, + "loss": 2.2159, + "step": 2836 + }, + { + "epoch": 0.58, + "learning_rate": 1.9161955281968646e-05, + "loss": 2.2448, + "step": 2837 + }, + { + "epoch": 0.58, + "learning_rate": 1.916126294723574e-05, + "loss": 2.2071, + "step": 2838 + }, + { + "epoch": 0.58, + "learning_rate": 1.9160570339159123e-05, + "loss": 2.2467, + "step": 2839 + }, + { + "epoch": 0.58, + "learning_rate": 1.9159877457759468e-05, + "loss": 2.2097, + "step": 2840 + }, + { + "epoch": 0.58, + "learning_rate": 1.9159184303057442e-05, + "loss": 2.1807, + "step": 2841 + }, + { + "epoch": 0.58, + "learning_rate": 1.915849087507373e-05, + "loss": 2.1608, + "step": 2842 + }, + { + "epoch": 0.58, + "learning_rate": 1.9157797173829023e-05, + "loss": 2.1409, + "step": 2843 + }, + { + "epoch": 0.58, + "learning_rate": 1.9157103199344015e-05, + "loss": 2.1562, + "step": 2844 + }, + { + "epoch": 0.58, + "learning_rate": 1.9156408951639416e-05, + "loss": 2.2209, + "step": 2845 + }, + { + "epoch": 0.58, + "learning_rate": 1.9155714430735934e-05, + "loss": 2.1768, + "step": 2846 + }, + { + "epoch": 0.58, + "learning_rate": 1.9155019636654297e-05, + "loss": 2.2404, + "step": 2847 + }, + { + "epoch": 0.58, + "learning_rate": 1.9154324569415235e-05, + "loss": 2.2391, + "step": 2848 + }, + { + "epoch": 0.58, + "learning_rate": 1.9153629229039486e-05, + "loss": 2.157, + "step": 2849 + }, + { + "epoch": 0.58, + "learning_rate": 1.91529336155478e-05, + "loss": 2.1854, + "step": 2850 + }, + { + "epoch": 0.58, + "learning_rate": 1.915223772896092e-05, + "loss": 2.1374, + "step": 2851 + }, + { + "epoch": 0.58, + "learning_rate": 1.9151541569299623e-05, + "loss": 2.1232, + "step": 2852 + }, + { + "epoch": 0.58, + "learning_rate": 1.9150845136584672e-05, + "loss": 2.1122, + "step": 2853 + }, + { + "epoch": 0.58, + "learning_rate": 1.915014843083685e-05, + "loss": 2.1449, + "step": 2854 + }, + { + "epoch": 0.58, + "learning_rate": 1.9149451452076943e-05, + "loss": 2.1823, + "step": 2855 + }, + { + "epoch": 0.58, + "learning_rate": 1.9148754200325748e-05, + "loss": 2.1569, + "step": 2856 + }, + { + "epoch": 0.58, + "learning_rate": 1.9148056675604067e-05, + "loss": 2.1275, + "step": 2857 + }, + { + "epoch": 0.58, + "learning_rate": 1.9147358877932713e-05, + "loss": 2.2363, + "step": 2858 + }, + { + "epoch": 0.58, + "learning_rate": 1.9146660807332508e-05, + "loss": 2.1405, + "step": 2859 + }, + { + "epoch": 0.58, + "learning_rate": 1.914596246382428e-05, + "loss": 2.1866, + "step": 2860 + }, + { + "epoch": 0.58, + "learning_rate": 1.914526384742886e-05, + "loss": 2.1118, + "step": 2861 + }, + { + "epoch": 0.58, + "learning_rate": 1.9144564958167097e-05, + "loss": 2.1089, + "step": 2862 + }, + { + "epoch": 0.58, + "learning_rate": 1.9143865796059845e-05, + "loss": 2.1552, + "step": 2863 + }, + { + "epoch": 0.58, + "learning_rate": 1.9143166361127964e-05, + "loss": 2.1721, + "step": 2864 + }, + { + "epoch": 0.58, + "learning_rate": 1.9142466653392317e-05, + "loss": 2.1313, + "step": 2865 + }, + { + "epoch": 0.58, + "learning_rate": 1.914176667287379e-05, + "loss": 2.1561, + "step": 2866 + }, + { + "epoch": 0.58, + "learning_rate": 1.914106641959326e-05, + "loss": 2.142, + "step": 2867 + }, + { + "epoch": 0.58, + "learning_rate": 1.9140365893571632e-05, + "loss": 2.1095, + "step": 2868 + }, + { + "epoch": 0.58, + "learning_rate": 1.9139665094829794e-05, + "loss": 2.1452, + "step": 2869 + }, + { + "epoch": 0.58, + "learning_rate": 1.9138964023388666e-05, + "loss": 2.205, + "step": 2870 + }, + { + "epoch": 0.58, + "learning_rate": 1.9138262679269157e-05, + "loss": 2.1372, + "step": 2871 + }, + { + "epoch": 0.58, + "learning_rate": 1.91375610624922e-05, + "loss": 2.1623, + "step": 2872 + }, + { + "epoch": 0.58, + "learning_rate": 1.913685917307873e-05, + "loss": 2.0826, + "step": 2873 + }, + { + "epoch": 0.58, + "learning_rate": 1.913615701104968e-05, + "loss": 2.2009, + "step": 2874 + }, + { + "epoch": 0.58, + "learning_rate": 1.913545457642601e-05, + "loss": 2.2078, + "step": 2875 + }, + { + "epoch": 0.58, + "learning_rate": 1.9134751869228678e-05, + "loss": 2.2139, + "step": 2876 + }, + { + "epoch": 0.58, + "learning_rate": 1.9134048889478637e-05, + "loss": 2.1491, + "step": 2877 + }, + { + "epoch": 0.58, + "learning_rate": 1.913334563719688e-05, + "loss": 2.1497, + "step": 2878 + }, + { + "epoch": 0.58, + "learning_rate": 1.913264211240438e-05, + "loss": 2.2009, + "step": 2879 + }, + { + "epoch": 0.58, + "learning_rate": 1.913193831512213e-05, + "loss": 2.2303, + "step": 2880 + }, + { + "epoch": 0.58, + "learning_rate": 1.9131234245371127e-05, + "loss": 2.2105, + "step": 2881 + }, + { + "epoch": 0.58, + "learning_rate": 1.9130529903172385e-05, + "loss": 2.2315, + "step": 2882 + }, + { + "epoch": 0.59, + "learning_rate": 1.9129825288546913e-05, + "loss": 2.1727, + "step": 2883 + }, + { + "epoch": 0.59, + "learning_rate": 1.9129120401515732e-05, + "loss": 2.1031, + "step": 2884 + }, + { + "epoch": 0.59, + "learning_rate": 1.912841524209988e-05, + "loss": 2.0796, + "step": 2885 + }, + { + "epoch": 0.59, + "learning_rate": 1.91277098103204e-05, + "loss": 2.1492, + "step": 2886 + }, + { + "epoch": 0.59, + "learning_rate": 1.9127004106198327e-05, + "loss": 2.2194, + "step": 2887 + }, + { + "epoch": 0.59, + "learning_rate": 1.912629812975473e-05, + "loss": 2.2011, + "step": 2888 + }, + { + "epoch": 0.59, + "learning_rate": 1.9125591881010665e-05, + "loss": 2.1477, + "step": 2889 + }, + { + "epoch": 0.59, + "learning_rate": 1.9124885359987206e-05, + "loss": 2.2086, + "step": 2890 + }, + { + "epoch": 0.59, + "learning_rate": 1.9124178566705437e-05, + "loss": 2.1294, + "step": 2891 + }, + { + "epoch": 0.59, + "learning_rate": 1.9123471501186442e-05, + "loss": 2.1601, + "step": 2892 + }, + { + "epoch": 0.59, + "learning_rate": 1.912276416345132e-05, + "loss": 2.1145, + "step": 2893 + }, + { + "epoch": 0.59, + "learning_rate": 1.9122056553521175e-05, + "loss": 2.1186, + "step": 2894 + }, + { + "epoch": 0.59, + "learning_rate": 1.912134867141712e-05, + "loss": 2.1436, + "step": 2895 + }, + { + "epoch": 0.59, + "learning_rate": 1.9120640517160276e-05, + "loss": 2.1601, + "step": 2896 + }, + { + "epoch": 0.59, + "learning_rate": 1.9119932090771775e-05, + "loss": 2.13, + "step": 2897 + }, + { + "epoch": 0.59, + "learning_rate": 1.911922339227275e-05, + "loss": 2.1273, + "step": 2898 + }, + { + "epoch": 0.59, + "learning_rate": 1.9118514421684346e-05, + "loss": 2.1091, + "step": 2899 + }, + { + "epoch": 0.59, + "learning_rate": 1.911780517902772e-05, + "loss": 2.1357, + "step": 2900 + }, + { + "epoch": 0.59, + "learning_rate": 1.9117095664324034e-05, + "loss": 2.1506, + "step": 2901 + }, + { + "epoch": 0.59, + "learning_rate": 1.9116385877594453e-05, + "loss": 2.1599, + "step": 2902 + }, + { + "epoch": 0.59, + "learning_rate": 1.9115675818860156e-05, + "loss": 2.1641, + "step": 2903 + }, + { + "epoch": 0.59, + "learning_rate": 1.9114965488142333e-05, + "loss": 2.0883, + "step": 2904 + }, + { + "epoch": 0.59, + "learning_rate": 1.9114254885462177e-05, + "loss": 2.1839, + "step": 2905 + }, + { + "epoch": 0.59, + "learning_rate": 1.9113544010840886e-05, + "loss": 2.2249, + "step": 2906 + }, + { + "epoch": 0.59, + "learning_rate": 1.9112832864299675e-05, + "loss": 2.2047, + "step": 2907 + }, + { + "epoch": 0.59, + "learning_rate": 1.9112121445859763e-05, + "loss": 2.173, + "step": 2908 + }, + { + "epoch": 0.59, + "learning_rate": 1.911140975554237e-05, + "loss": 2.1579, + "step": 2909 + }, + { + "epoch": 0.59, + "learning_rate": 1.9110697793368733e-05, + "loss": 2.1646, + "step": 2910 + }, + { + "epoch": 0.59, + "learning_rate": 1.91099855593601e-05, + "loss": 2.1813, + "step": 2911 + }, + { + "epoch": 0.59, + "learning_rate": 1.9109273053537717e-05, + "loss": 2.1196, + "step": 2912 + }, + { + "epoch": 0.59, + "learning_rate": 1.9108560275922845e-05, + "loss": 2.0697, + "step": 2913 + }, + { + "epoch": 0.59, + "learning_rate": 1.910784722653675e-05, + "loss": 2.1121, + "step": 2914 + }, + { + "epoch": 0.59, + "learning_rate": 1.910713390540071e-05, + "loss": 2.1909, + "step": 2915 + }, + { + "epoch": 0.59, + "learning_rate": 1.9106420312536e-05, + "loss": 2.1882, + "step": 2916 + }, + { + "epoch": 0.59, + "learning_rate": 1.9105706447963924e-05, + "loss": 2.1962, + "step": 2917 + }, + { + "epoch": 0.59, + "learning_rate": 1.910499231170577e-05, + "loss": 2.1605, + "step": 2918 + }, + { + "epoch": 0.59, + "learning_rate": 1.910427790378285e-05, + "loss": 2.1641, + "step": 2919 + }, + { + "epoch": 0.59, + "learning_rate": 1.9103563224216482e-05, + "loss": 2.1483, + "step": 2920 + }, + { + "epoch": 0.59, + "learning_rate": 1.9102848273027986e-05, + "loss": 2.1591, + "step": 2921 + }, + { + "epoch": 0.59, + "learning_rate": 1.9102133050238698e-05, + "loss": 2.1301, + "step": 2922 + }, + { + "epoch": 0.59, + "learning_rate": 1.9101417555869955e-05, + "loss": 2.1601, + "step": 2923 + }, + { + "epoch": 0.59, + "learning_rate": 1.9100701789943104e-05, + "loss": 2.1794, + "step": 2924 + }, + { + "epoch": 0.59, + "learning_rate": 1.9099985752479505e-05, + "loss": 2.0876, + "step": 2925 + }, + { + "epoch": 0.59, + "learning_rate": 1.9099269443500522e-05, + "loss": 2.1847, + "step": 2926 + }, + { + "epoch": 0.59, + "learning_rate": 1.9098552863027524e-05, + "loss": 2.1587, + "step": 2927 + }, + { + "epoch": 0.59, + "learning_rate": 1.9097836011081895e-05, + "loss": 2.0897, + "step": 2928 + }, + { + "epoch": 0.59, + "learning_rate": 1.909711888768502e-05, + "loss": 2.1402, + "step": 2929 + }, + { + "epoch": 0.59, + "learning_rate": 1.90964014928583e-05, + "loss": 2.1824, + "step": 2930 + }, + { + "epoch": 0.59, + "learning_rate": 1.9095683826623134e-05, + "loss": 2.1654, + "step": 2931 + }, + { + "epoch": 0.6, + "learning_rate": 1.9094965889000943e-05, + "loss": 2.1709, + "step": 2932 + }, + { + "epoch": 0.6, + "learning_rate": 1.9094247680013144e-05, + "loss": 2.1795, + "step": 2933 + }, + { + "epoch": 0.6, + "learning_rate": 1.9093529199681162e-05, + "loss": 2.234, + "step": 2934 + }, + { + "epoch": 0.6, + "learning_rate": 1.909281044802644e-05, + "loss": 2.2027, + "step": 2935 + }, + { + "epoch": 0.6, + "learning_rate": 1.9092091425070422e-05, + "loss": 2.1742, + "step": 2936 + }, + { + "epoch": 0.6, + "learning_rate": 1.9091372130834557e-05, + "loss": 2.1789, + "step": 2937 + }, + { + "epoch": 0.6, + "learning_rate": 1.9090652565340315e-05, + "loss": 2.1368, + "step": 2938 + }, + { + "epoch": 0.6, + "learning_rate": 1.9089932728609158e-05, + "loss": 2.1344, + "step": 2939 + }, + { + "epoch": 0.6, + "learning_rate": 1.908921262066257e-05, + "loss": 2.1141, + "step": 2940 + }, + { + "epoch": 0.6, + "learning_rate": 1.9088492241522033e-05, + "loss": 2.1541, + "step": 2941 + }, + { + "epoch": 0.6, + "learning_rate": 1.908777159120904e-05, + "loss": 2.1544, + "step": 2942 + }, + { + "epoch": 0.6, + "learning_rate": 1.908705066974509e-05, + "loss": 2.1792, + "step": 2943 + }, + { + "epoch": 0.6, + "learning_rate": 1.9086329477151704e-05, + "loss": 2.1139, + "step": 2944 + }, + { + "epoch": 0.6, + "learning_rate": 1.908560801345039e-05, + "loss": 2.1147, + "step": 2945 + }, + { + "epoch": 0.6, + "learning_rate": 1.908488627866268e-05, + "loss": 2.1683, + "step": 2946 + }, + { + "epoch": 0.6, + "learning_rate": 1.9084164272810102e-05, + "loss": 2.1833, + "step": 2947 + }, + { + "epoch": 0.6, + "learning_rate": 1.9083441995914204e-05, + "loss": 2.1628, + "step": 2948 + }, + { + "epoch": 0.6, + "learning_rate": 1.9082719447996537e-05, + "loss": 2.1943, + "step": 2949 + }, + { + "epoch": 0.6, + "learning_rate": 1.9081996629078658e-05, + "loss": 2.1622, + "step": 2950 + }, + { + "epoch": 0.6, + "learning_rate": 1.908127353918213e-05, + "loss": 2.1363, + "step": 2951 + }, + { + "epoch": 0.6, + "learning_rate": 1.908055017832853e-05, + "loss": 2.0916, + "step": 2952 + }, + { + "epoch": 0.6, + "learning_rate": 1.907982654653945e-05, + "loss": 2.1667, + "step": 2953 + }, + { + "epoch": 0.6, + "learning_rate": 1.9079102643836466e-05, + "loss": 2.1932, + "step": 2954 + }, + { + "epoch": 0.6, + "learning_rate": 1.9078378470241183e-05, + "loss": 2.1477, + "step": 2955 + }, + { + "epoch": 0.6, + "learning_rate": 1.9077654025775212e-05, + "loss": 2.2528, + "step": 2956 + }, + { + "epoch": 0.6, + "learning_rate": 1.9076929310460163e-05, + "loss": 2.1395, + "step": 2957 + }, + { + "epoch": 0.6, + "learning_rate": 1.9076204324317663e-05, + "loss": 2.109, + "step": 2958 + }, + { + "epoch": 0.6, + "learning_rate": 1.907547906736934e-05, + "loss": 2.193, + "step": 2959 + }, + { + "epoch": 0.6, + "learning_rate": 1.9074753539636838e-05, + "loss": 2.194, + "step": 2960 + }, + { + "epoch": 0.6, + "learning_rate": 1.90740277411418e-05, + "loss": 2.17, + "step": 2961 + }, + { + "epoch": 0.6, + "learning_rate": 1.9073301671905882e-05, + "loss": 2.2322, + "step": 2962 + }, + { + "epoch": 0.6, + "learning_rate": 1.9072575331950748e-05, + "loss": 2.1406, + "step": 2963 + }, + { + "epoch": 0.6, + "learning_rate": 1.9071848721298076e-05, + "loss": 2.1663, + "step": 2964 + }, + { + "epoch": 0.6, + "learning_rate": 1.9071121839969535e-05, + "loss": 2.1357, + "step": 2965 + }, + { + "epoch": 0.6, + "learning_rate": 1.907039468798682e-05, + "loss": 2.1726, + "step": 2966 + }, + { + "epoch": 0.6, + "learning_rate": 1.9069667265371625e-05, + "loss": 2.2166, + "step": 2967 + }, + { + "epoch": 0.6, + "learning_rate": 1.9068939572145656e-05, + "loss": 2.1695, + "step": 2968 + }, + { + "epoch": 0.6, + "learning_rate": 1.906821160833062e-05, + "loss": 2.2239, + "step": 2969 + }, + { + "epoch": 0.6, + "learning_rate": 1.9067483373948245e-05, + "loss": 2.299, + "step": 2970 + }, + { + "epoch": 0.6, + "learning_rate": 1.9066754869020248e-05, + "loss": 2.2326, + "step": 2971 + }, + { + "epoch": 0.6, + "learning_rate": 1.906602609356838e-05, + "loss": 2.1725, + "step": 2972 + }, + { + "epoch": 0.6, + "learning_rate": 1.9065297047614372e-05, + "loss": 2.138, + "step": 2973 + }, + { + "epoch": 0.6, + "learning_rate": 1.9064567731179986e-05, + "loss": 2.0962, + "step": 2974 + }, + { + "epoch": 0.6, + "learning_rate": 1.9063838144286976e-05, + "loss": 2.1686, + "step": 2975 + }, + { + "epoch": 0.6, + "learning_rate": 1.9063108286957113e-05, + "loss": 2.1487, + "step": 2976 + }, + { + "epoch": 0.6, + "learning_rate": 1.9062378159212174e-05, + "loss": 2.1746, + "step": 2977 + }, + { + "epoch": 0.6, + "learning_rate": 1.9061647761073946e-05, + "loss": 2.1058, + "step": 2978 + }, + { + "epoch": 0.6, + "learning_rate": 1.9060917092564217e-05, + "loss": 2.0748, + "step": 2979 + }, + { + "epoch": 0.6, + "learning_rate": 1.906018615370479e-05, + "loss": 2.1164, + "step": 2980 + }, + { + "epoch": 0.61, + "learning_rate": 1.9059454944517475e-05, + "loss": 2.0793, + "step": 2981 + }, + { + "epoch": 0.61, + "learning_rate": 1.9058723465024087e-05, + "loss": 2.1763, + "step": 2982 + }, + { + "epoch": 0.61, + "learning_rate": 1.9057991715246454e-05, + "loss": 2.1135, + "step": 2983 + }, + { + "epoch": 0.61, + "learning_rate": 1.9057259695206407e-05, + "loss": 2.1584, + "step": 2984 + }, + { + "epoch": 0.61, + "learning_rate": 1.9056527404925788e-05, + "loss": 2.0965, + "step": 2985 + }, + { + "epoch": 0.61, + "learning_rate": 1.905579484442645e-05, + "loss": 2.1217, + "step": 2986 + }, + { + "epoch": 0.61, + "learning_rate": 1.9055062013730237e-05, + "loss": 2.1536, + "step": 2987 + }, + { + "epoch": 0.61, + "learning_rate": 1.9054328912859033e-05, + "loss": 2.1476, + "step": 2988 + }, + { + "epoch": 0.61, + "learning_rate": 1.9053595541834697e-05, + "loss": 2.072, + "step": 2989 + }, + { + "epoch": 0.61, + "learning_rate": 1.9052861900679117e-05, + "loss": 2.1503, + "step": 2990 + }, + { + "epoch": 0.61, + "learning_rate": 1.9052127989414182e-05, + "loss": 2.1665, + "step": 2991 + }, + { + "epoch": 0.61, + "learning_rate": 1.9051393808061787e-05, + "loss": 2.1904, + "step": 2992 + }, + { + "epoch": 0.61, + "learning_rate": 1.905065935664384e-05, + "loss": 2.1602, + "step": 2993 + }, + { + "epoch": 0.61, + "learning_rate": 1.9049924635182256e-05, + "loss": 2.1762, + "step": 2994 + }, + { + "epoch": 0.61, + "learning_rate": 1.9049189643698955e-05, + "loss": 2.2645, + "step": 2995 + }, + { + "epoch": 0.61, + "learning_rate": 1.9048454382215865e-05, + "loss": 2.1725, + "step": 2996 + }, + { + "epoch": 0.61, + "learning_rate": 1.904771885075493e-05, + "loss": 2.157, + "step": 2997 + }, + { + "epoch": 0.61, + "learning_rate": 1.904698304933809e-05, + "loss": 2.1751, + "step": 2998 + }, + { + "epoch": 0.61, + "learning_rate": 1.9046246977987297e-05, + "loss": 2.1588, + "step": 2999 + }, + { + "epoch": 0.61, + "learning_rate": 1.904551063672452e-05, + "loss": 2.2012, + "step": 3000 + }, + { + "epoch": 0.61, + "learning_rate": 1.9044774025571725e-05, + "loss": 2.2574, + "step": 3001 + }, + { + "epoch": 0.61, + "learning_rate": 1.9044037144550896e-05, + "loss": 2.1739, + "step": 3002 + }, + { + "epoch": 0.61, + "learning_rate": 1.9043299993684012e-05, + "loss": 2.1142, + "step": 3003 + }, + { + "epoch": 0.61, + "learning_rate": 1.9042562572993068e-05, + "loss": 2.193, + "step": 3004 + }, + { + "epoch": 0.61, + "learning_rate": 1.904182488250007e-05, + "loss": 2.1796, + "step": 3005 + }, + { + "epoch": 0.61, + "learning_rate": 1.9041086922227028e-05, + "loss": 2.1234, + "step": 3006 + }, + { + "epoch": 0.61, + "learning_rate": 1.9040348692195957e-05, + "loss": 2.2343, + "step": 3007 + }, + { + "epoch": 0.61, + "learning_rate": 1.9039610192428884e-05, + "loss": 2.0852, + "step": 3008 + }, + { + "epoch": 0.61, + "learning_rate": 1.9038871422947853e-05, + "loss": 2.1882, + "step": 3009 + }, + { + "epoch": 0.61, + "learning_rate": 1.903813238377489e-05, + "loss": 2.0757, + "step": 3010 + }, + { + "epoch": 0.61, + "learning_rate": 1.903739307493206e-05, + "loss": 2.2297, + "step": 3011 + }, + { + "epoch": 0.61, + "learning_rate": 1.9036653496441414e-05, + "loss": 2.1005, + "step": 3012 + }, + { + "epoch": 0.61, + "learning_rate": 1.903591364832502e-05, + "loss": 2.1999, + "step": 3013 + }, + { + "epoch": 0.61, + "learning_rate": 1.9035173530604955e-05, + "loss": 2.0818, + "step": 3014 + }, + { + "epoch": 0.61, + "learning_rate": 1.90344331433033e-05, + "loss": 2.1876, + "step": 3015 + }, + { + "epoch": 0.61, + "learning_rate": 1.9033692486442147e-05, + "loss": 2.1604, + "step": 3016 + }, + { + "epoch": 0.61, + "learning_rate": 1.9032951560043594e-05, + "loss": 2.202, + "step": 3017 + }, + { + "epoch": 0.61, + "learning_rate": 1.9032210364129745e-05, + "loss": 2.0856, + "step": 3018 + }, + { + "epoch": 0.61, + "learning_rate": 1.903146889872272e-05, + "loss": 2.1593, + "step": 3019 + }, + { + "epoch": 0.61, + "learning_rate": 1.9030727163844644e-05, + "loss": 2.172, + "step": 3020 + }, + { + "epoch": 0.61, + "learning_rate": 1.902998515951764e-05, + "loss": 2.1886, + "step": 3021 + }, + { + "epoch": 0.61, + "learning_rate": 1.9029242885763853e-05, + "loss": 2.2153, + "step": 3022 + }, + { + "epoch": 0.61, + "learning_rate": 1.902850034260543e-05, + "loss": 2.1599, + "step": 3023 + }, + { + "epoch": 0.61, + "learning_rate": 1.902775753006452e-05, + "loss": 2.1726, + "step": 3024 + }, + { + "epoch": 0.61, + "learning_rate": 1.9027014448163294e-05, + "loss": 2.1805, + "step": 3025 + }, + { + "epoch": 0.61, + "learning_rate": 1.9026271096923922e-05, + "loss": 2.1466, + "step": 3026 + }, + { + "epoch": 0.61, + "learning_rate": 1.9025527476368578e-05, + "loss": 2.1577, + "step": 3027 + }, + { + "epoch": 0.61, + "learning_rate": 1.9024783586519455e-05, + "loss": 2.2215, + "step": 3028 + }, + { + "epoch": 0.61, + "learning_rate": 1.9024039427398747e-05, + "loss": 2.1424, + "step": 3029 + }, + { + "epoch": 0.62, + "learning_rate": 1.9023294999028654e-05, + "loss": 2.1843, + "step": 3030 + }, + { + "epoch": 0.62, + "learning_rate": 1.9022550301431392e-05, + "loss": 2.211, + "step": 3031 + }, + { + "epoch": 0.62, + "learning_rate": 1.9021805334629177e-05, + "loss": 2.1054, + "step": 3032 + }, + { + "epoch": 0.62, + "learning_rate": 1.902106009864424e-05, + "loss": 2.1305, + "step": 3033 + }, + { + "epoch": 0.62, + "learning_rate": 1.902031459349881e-05, + "loss": 2.179, + "step": 3034 + }, + { + "epoch": 0.62, + "learning_rate": 1.901956881921514e-05, + "loss": 2.1384, + "step": 3035 + }, + { + "epoch": 0.62, + "learning_rate": 1.9018822775815476e-05, + "loss": 2.1879, + "step": 3036 + }, + { + "epoch": 0.62, + "learning_rate": 1.9018076463322077e-05, + "loss": 2.1502, + "step": 3037 + }, + { + "epoch": 0.62, + "learning_rate": 1.9017329881757213e-05, + "loss": 2.1963, + "step": 3038 + }, + { + "epoch": 0.62, + "learning_rate": 1.901658303114316e-05, + "loss": 2.1348, + "step": 3039 + }, + { + "epoch": 0.62, + "learning_rate": 1.9015835911502198e-05, + "loss": 2.1209, + "step": 3040 + }, + { + "epoch": 0.62, + "learning_rate": 1.901508852285662e-05, + "loss": 2.1334, + "step": 3041 + }, + { + "epoch": 0.62, + "learning_rate": 1.9014340865228727e-05, + "loss": 2.2107, + "step": 3042 + }, + { + "epoch": 0.62, + "learning_rate": 1.901359293864083e-05, + "loss": 2.14, + "step": 3043 + }, + { + "epoch": 0.62, + "learning_rate": 1.901284474311524e-05, + "loss": 2.1415, + "step": 3044 + }, + { + "epoch": 0.62, + "learning_rate": 1.9012096278674283e-05, + "loss": 2.2356, + "step": 3045 + }, + { + "epoch": 0.62, + "learning_rate": 1.9011347545340287e-05, + "loss": 2.2345, + "step": 3046 + }, + { + "epoch": 0.62, + "learning_rate": 1.9010598543135596e-05, + "loss": 2.1129, + "step": 3047 + }, + { + "epoch": 0.62, + "learning_rate": 1.9009849272082557e-05, + "loss": 2.2311, + "step": 3048 + }, + { + "epoch": 0.62, + "learning_rate": 1.9009099732203528e-05, + "loss": 2.1348, + "step": 3049 + }, + { + "epoch": 0.62, + "learning_rate": 1.900834992352087e-05, + "loss": 2.1082, + "step": 3050 + }, + { + "epoch": 0.62, + "learning_rate": 1.9007599846056957e-05, + "loss": 2.1865, + "step": 3051 + }, + { + "epoch": 0.62, + "learning_rate": 1.9006849499834166e-05, + "loss": 2.1579, + "step": 3052 + }, + { + "epoch": 0.62, + "learning_rate": 1.9006098884874885e-05, + "loss": 2.1167, + "step": 3053 + }, + { + "epoch": 0.62, + "learning_rate": 1.9005348001201517e-05, + "loss": 2.1333, + "step": 3054 + }, + { + "epoch": 0.62, + "learning_rate": 1.9004596848836457e-05, + "loss": 2.1516, + "step": 3055 + }, + { + "epoch": 0.62, + "learning_rate": 1.900384542780212e-05, + "loss": 2.1901, + "step": 3056 + }, + { + "epoch": 0.62, + "learning_rate": 1.900309373812093e-05, + "loss": 2.1446, + "step": 3057 + }, + { + "epoch": 0.62, + "learning_rate": 1.900234177981531e-05, + "loss": 2.1749, + "step": 3058 + }, + { + "epoch": 0.62, + "learning_rate": 1.90015895529077e-05, + "loss": 2.1086, + "step": 3059 + }, + { + "epoch": 0.62, + "learning_rate": 1.900083705742054e-05, + "loss": 2.2117, + "step": 3060 + }, + { + "epoch": 0.62, + "learning_rate": 1.9000084293376286e-05, + "loss": 2.1658, + "step": 3061 + }, + { + "epoch": 0.62, + "learning_rate": 1.8999331260797397e-05, + "loss": 2.1752, + "step": 3062 + }, + { + "epoch": 0.62, + "learning_rate": 1.899857795970634e-05, + "loss": 2.1447, + "step": 3063 + }, + { + "epoch": 0.62, + "learning_rate": 1.899782439012559e-05, + "loss": 2.1791, + "step": 3064 + }, + { + "epoch": 0.62, + "learning_rate": 1.8997070552077636e-05, + "loss": 2.1505, + "step": 3065 + }, + { + "epoch": 0.62, + "learning_rate": 1.899631644558497e-05, + "loss": 2.1083, + "step": 3066 + }, + { + "epoch": 0.62, + "learning_rate": 1.899556207067008e-05, + "loss": 2.1027, + "step": 3067 + }, + { + "epoch": 0.62, + "learning_rate": 1.8994807427355494e-05, + "loss": 2.2479, + "step": 3068 + }, + { + "epoch": 0.62, + "learning_rate": 1.899405251566371e-05, + "loss": 2.1623, + "step": 3069 + }, + { + "epoch": 0.62, + "learning_rate": 1.8993297335617264e-05, + "loss": 2.1604, + "step": 3070 + }, + { + "epoch": 0.62, + "learning_rate": 1.8992541887238688e-05, + "loss": 2.1284, + "step": 3071 + }, + { + "epoch": 0.62, + "learning_rate": 1.8991786170550513e-05, + "loss": 2.1688, + "step": 3072 + }, + { + "epoch": 0.62, + "learning_rate": 1.8991030185575293e-05, + "loss": 2.1717, + "step": 3073 + }, + { + "epoch": 0.62, + "learning_rate": 1.8990273932335585e-05, + "loss": 2.2016, + "step": 3074 + }, + { + "epoch": 0.62, + "learning_rate": 1.8989517410853956e-05, + "loss": 2.1059, + "step": 3075 + }, + { + "epoch": 0.62, + "learning_rate": 1.898876062115297e-05, + "loss": 2.1389, + "step": 3076 + }, + { + "epoch": 0.62, + "learning_rate": 1.8988003563255212e-05, + "loss": 2.2344, + "step": 3077 + }, + { + "epoch": 0.62, + "learning_rate": 1.8987246237183275e-05, + "loss": 2.1349, + "step": 3078 + }, + { + "epoch": 0.62, + "learning_rate": 1.8986488642959745e-05, + "loss": 2.1367, + "step": 3079 + }, + { + "epoch": 0.63, + "learning_rate": 1.8985730780607237e-05, + "loss": 2.2156, + "step": 3080 + }, + { + "epoch": 0.63, + "learning_rate": 1.8984972650148354e-05, + "loss": 2.107, + "step": 3081 + }, + { + "epoch": 0.63, + "learning_rate": 1.8984214251605718e-05, + "loss": 2.2128, + "step": 3082 + }, + { + "epoch": 0.63, + "learning_rate": 1.8983455585001966e-05, + "loss": 2.1487, + "step": 3083 + }, + { + "epoch": 0.63, + "learning_rate": 1.8982696650359723e-05, + "loss": 2.124, + "step": 3084 + }, + { + "epoch": 0.63, + "learning_rate": 1.898193744770164e-05, + "loss": 2.1546, + "step": 3085 + }, + { + "epoch": 0.63, + "learning_rate": 1.8981177977050364e-05, + "loss": 2.1516, + "step": 3086 + }, + { + "epoch": 0.63, + "learning_rate": 1.8980418238428565e-05, + "loss": 2.1855, + "step": 3087 + }, + { + "epoch": 0.63, + "learning_rate": 1.89796582318589e-05, + "loss": 2.028, + "step": 3088 + }, + { + "epoch": 0.63, + "learning_rate": 1.897889795736405e-05, + "loss": 2.0952, + "step": 3089 + }, + { + "epoch": 0.63, + "learning_rate": 1.89781374149667e-05, + "loss": 2.0731, + "step": 3090 + }, + { + "epoch": 0.63, + "learning_rate": 1.897737660468954e-05, + "loss": 2.1758, + "step": 3091 + }, + { + "epoch": 0.63, + "learning_rate": 1.8976615526555272e-05, + "loss": 2.1424, + "step": 3092 + }, + { + "epoch": 0.63, + "learning_rate": 1.8975854180586604e-05, + "loss": 2.129, + "step": 3093 + }, + { + "epoch": 0.63, + "learning_rate": 1.8975092566806258e-05, + "loss": 2.1883, + "step": 3094 + }, + { + "epoch": 0.63, + "learning_rate": 1.8974330685236946e-05, + "loss": 2.1375, + "step": 3095 + }, + { + "epoch": 0.63, + "learning_rate": 1.8973568535901408e-05, + "loss": 2.1276, + "step": 3096 + }, + { + "epoch": 0.63, + "learning_rate": 1.8972806118822384e-05, + "loss": 2.1582, + "step": 3097 + }, + { + "epoch": 0.63, + "learning_rate": 1.8972043434022618e-05, + "loss": 2.1771, + "step": 3098 + }, + { + "epoch": 0.63, + "learning_rate": 1.897128048152487e-05, + "loss": 2.2177, + "step": 3099 + }, + { + "epoch": 0.63, + "learning_rate": 1.8970517261351905e-05, + "loss": 2.0894, + "step": 3100 + }, + { + "epoch": 0.63, + "learning_rate": 1.896975377352649e-05, + "loss": 2.144, + "step": 3101 + }, + { + "epoch": 0.63, + "learning_rate": 1.896899001807141e-05, + "loss": 2.2549, + "step": 3102 + }, + { + "epoch": 0.63, + "learning_rate": 1.896822599500945e-05, + "loss": 2.1262, + "step": 3103 + }, + { + "epoch": 0.63, + "learning_rate": 1.896746170436341e-05, + "loss": 2.1385, + "step": 3104 + }, + { + "epoch": 0.63, + "learning_rate": 1.8966697146156092e-05, + "loss": 2.1229, + "step": 3105 + }, + { + "epoch": 0.63, + "learning_rate": 1.896593232041031e-05, + "loss": 2.1182, + "step": 3106 + }, + { + "epoch": 0.63, + "learning_rate": 1.8965167227148872e-05, + "loss": 2.112, + "step": 3107 + }, + { + "epoch": 0.63, + "learning_rate": 1.8964401866394623e-05, + "loss": 2.1228, + "step": 3108 + }, + { + "epoch": 0.63, + "learning_rate": 1.8963636238170392e-05, + "loss": 2.1752, + "step": 3109 + }, + { + "epoch": 0.63, + "learning_rate": 1.896287034249902e-05, + "loss": 2.2159, + "step": 3110 + }, + { + "epoch": 0.63, + "learning_rate": 1.896210417940336e-05, + "loss": 2.1165, + "step": 3111 + }, + { + "epoch": 0.63, + "learning_rate": 1.896133774890628e-05, + "loss": 2.164, + "step": 3112 + }, + { + "epoch": 0.63, + "learning_rate": 1.8960571051030635e-05, + "loss": 2.0623, + "step": 3113 + }, + { + "epoch": 0.63, + "learning_rate": 1.895980408579931e-05, + "loss": 2.0736, + "step": 3114 + }, + { + "epoch": 0.63, + "learning_rate": 1.8959036853235188e-05, + "loss": 2.2346, + "step": 3115 + }, + { + "epoch": 0.63, + "learning_rate": 1.8958269353361156e-05, + "loss": 2.1663, + "step": 3116 + }, + { + "epoch": 0.63, + "learning_rate": 1.895750158620012e-05, + "loss": 2.2103, + "step": 3117 + }, + { + "epoch": 0.63, + "learning_rate": 1.8956733551774983e-05, + "loss": 2.2048, + "step": 3118 + }, + { + "epoch": 0.63, + "learning_rate": 1.895596525010866e-05, + "loss": 2.0723, + "step": 3119 + }, + { + "epoch": 0.63, + "learning_rate": 1.895519668122408e-05, + "loss": 2.1366, + "step": 3120 + }, + { + "epoch": 0.63, + "learning_rate": 1.8954427845144172e-05, + "loss": 2.2252, + "step": 3121 + }, + { + "epoch": 0.63, + "learning_rate": 1.8953658741891875e-05, + "loss": 2.1263, + "step": 3122 + }, + { + "epoch": 0.63, + "learning_rate": 1.8952889371490134e-05, + "loss": 2.1476, + "step": 3123 + }, + { + "epoch": 0.63, + "learning_rate": 1.8952119733961912e-05, + "loss": 2.2654, + "step": 3124 + }, + { + "epoch": 0.63, + "learning_rate": 1.895134982933017e-05, + "loss": 2.0978, + "step": 3125 + }, + { + "epoch": 0.63, + "learning_rate": 1.8950579657617875e-05, + "loss": 2.1751, + "step": 3126 + }, + { + "epoch": 0.63, + "learning_rate": 1.894980921884801e-05, + "loss": 2.2593, + "step": 3127 + }, + { + "epoch": 0.63, + "learning_rate": 1.894903851304356e-05, + "loss": 2.1978, + "step": 3128 + }, + { + "epoch": 0.64, + "learning_rate": 1.8948267540227527e-05, + "loss": 2.1751, + "step": 3129 + }, + { + "epoch": 0.64, + "learning_rate": 1.8947496300422905e-05, + "loss": 2.2106, + "step": 3130 + }, + { + "epoch": 0.64, + "learning_rate": 1.8946724793652716e-05, + "loss": 2.1982, + "step": 3131 + }, + { + "epoch": 0.64, + "learning_rate": 1.8945953019939968e-05, + "loss": 2.1906, + "step": 3132 + }, + { + "epoch": 0.64, + "learning_rate": 1.8945180979307697e-05, + "loss": 2.1059, + "step": 3133 + }, + { + "epoch": 0.64, + "learning_rate": 1.8944408671778937e-05, + "loss": 2.2018, + "step": 3134 + }, + { + "epoch": 0.64, + "learning_rate": 1.8943636097376728e-05, + "loss": 2.143, + "step": 3135 + }, + { + "epoch": 0.64, + "learning_rate": 1.8942863256124122e-05, + "loss": 2.0992, + "step": 3136 + }, + { + "epoch": 0.64, + "learning_rate": 1.894209014804418e-05, + "loss": 2.0738, + "step": 3137 + }, + { + "epoch": 0.64, + "learning_rate": 1.8941316773159966e-05, + "loss": 2.147, + "step": 3138 + }, + { + "epoch": 0.64, + "learning_rate": 1.894054313149456e-05, + "loss": 2.1949, + "step": 3139 + }, + { + "epoch": 0.64, + "learning_rate": 1.8939769223071046e-05, + "loss": 2.1414, + "step": 3140 + }, + { + "epoch": 0.64, + "learning_rate": 1.8938995047912506e-05, + "loss": 2.1498, + "step": 3141 + }, + { + "epoch": 0.64, + "learning_rate": 1.8938220606042047e-05, + "loss": 2.1783, + "step": 3142 + }, + { + "epoch": 0.64, + "learning_rate": 1.893744589748277e-05, + "loss": 2.1285, + "step": 3143 + }, + { + "epoch": 0.64, + "learning_rate": 1.8936670922257797e-05, + "loss": 2.2038, + "step": 3144 + }, + { + "epoch": 0.64, + "learning_rate": 1.8935895680390242e-05, + "loss": 2.1185, + "step": 3145 + }, + { + "epoch": 0.64, + "learning_rate": 1.893512017190325e-05, + "loss": 2.1344, + "step": 3146 + }, + { + "epoch": 0.64, + "learning_rate": 1.8934344396819943e-05, + "loss": 2.1249, + "step": 3147 + }, + { + "epoch": 0.64, + "learning_rate": 1.8933568355163476e-05, + "loss": 2.1516, + "step": 3148 + }, + { + "epoch": 0.64, + "learning_rate": 1.8932792046957007e-05, + "loss": 2.1016, + "step": 3149 + }, + { + "epoch": 0.64, + "learning_rate": 1.8932015472223692e-05, + "loss": 2.2529, + "step": 3150 + }, + { + "epoch": 0.64, + "learning_rate": 1.8931238630986704e-05, + "loss": 2.0848, + "step": 3151 + }, + { + "epoch": 0.64, + "learning_rate": 1.8930461523269224e-05, + "loss": 2.1554, + "step": 3152 + }, + { + "epoch": 0.64, + "learning_rate": 1.8929684149094437e-05, + "loss": 2.1384, + "step": 3153 + }, + { + "epoch": 0.64, + "learning_rate": 1.8928906508485534e-05, + "loss": 2.1548, + "step": 3154 + }, + { + "epoch": 0.64, + "learning_rate": 1.892812860146572e-05, + "loss": 2.1229, + "step": 3155 + }, + { + "epoch": 0.64, + "learning_rate": 1.8927350428058208e-05, + "loss": 2.201, + "step": 3156 + }, + { + "epoch": 0.64, + "learning_rate": 1.8926571988286215e-05, + "loss": 2.1589, + "step": 3157 + }, + { + "epoch": 0.64, + "learning_rate": 1.8925793282172962e-05, + "loss": 2.0797, + "step": 3158 + }, + { + "epoch": 0.64, + "learning_rate": 1.892501430974169e-05, + "loss": 2.2002, + "step": 3159 + }, + { + "epoch": 0.64, + "learning_rate": 1.892423507101564e-05, + "loss": 2.1366, + "step": 3160 + }, + { + "epoch": 0.64, + "learning_rate": 1.8923455566018054e-05, + "loss": 2.2174, + "step": 3161 + }, + { + "epoch": 0.64, + "learning_rate": 1.89226757947722e-05, + "loss": 2.1172, + "step": 3162 + }, + { + "epoch": 0.64, + "learning_rate": 1.8921895757301342e-05, + "loss": 2.1278, + "step": 3163 + }, + { + "epoch": 0.64, + "learning_rate": 1.892111545362875e-05, + "loss": 2.106, + "step": 3164 + }, + { + "epoch": 0.64, + "learning_rate": 1.892033488377771e-05, + "loss": 2.1181, + "step": 3165 + }, + { + "epoch": 0.64, + "learning_rate": 1.8919554047771508e-05, + "loss": 2.0918, + "step": 3166 + }, + { + "epoch": 0.64, + "learning_rate": 1.8918772945633446e-05, + "loss": 2.0617, + "step": 3167 + }, + { + "epoch": 0.64, + "learning_rate": 1.891799157738683e-05, + "loss": 2.1669, + "step": 3168 + }, + { + "epoch": 0.64, + "learning_rate": 1.8917209943054964e-05, + "loss": 2.0807, + "step": 3169 + }, + { + "epoch": 0.64, + "learning_rate": 1.8916428042661178e-05, + "loss": 2.2293, + "step": 3170 + }, + { + "epoch": 0.64, + "learning_rate": 1.8915645876228803e-05, + "loss": 2.0932, + "step": 3171 + }, + { + "epoch": 0.64, + "learning_rate": 1.8914863443781174e-05, + "loss": 2.0891, + "step": 3172 + }, + { + "epoch": 0.64, + "learning_rate": 1.8914080745341632e-05, + "loss": 2.21, + "step": 3173 + }, + { + "epoch": 0.64, + "learning_rate": 1.8913297780933534e-05, + "loss": 2.1944, + "step": 3174 + }, + { + "epoch": 0.64, + "learning_rate": 1.8912514550580242e-05, + "loss": 2.1716, + "step": 3175 + }, + { + "epoch": 0.64, + "learning_rate": 1.8911731054305127e-05, + "loss": 2.1542, + "step": 3176 + }, + { + "epoch": 0.64, + "learning_rate": 1.8910947292131564e-05, + "loss": 2.1129, + "step": 3177 + }, + { + "epoch": 0.65, + "learning_rate": 1.8910163264082934e-05, + "loss": 2.1952, + "step": 3178 + }, + { + "epoch": 0.65, + "learning_rate": 1.8909378970182636e-05, + "loss": 2.1524, + "step": 3179 + }, + { + "epoch": 0.65, + "learning_rate": 1.8908594410454068e-05, + "loss": 2.1749, + "step": 3180 + }, + { + "epoch": 0.65, + "learning_rate": 1.8907809584920638e-05, + "loss": 2.0978, + "step": 3181 + }, + { + "epoch": 0.65, + "learning_rate": 1.8907024493605766e-05, + "loss": 2.1711, + "step": 3182 + }, + { + "epoch": 0.65, + "learning_rate": 1.890623913653287e-05, + "loss": 2.1279, + "step": 3183 + }, + { + "epoch": 0.65, + "learning_rate": 1.8905453513725393e-05, + "loss": 2.2594, + "step": 3184 + }, + { + "epoch": 0.65, + "learning_rate": 1.890466762520677e-05, + "loss": 2.2482, + "step": 3185 + }, + { + "epoch": 0.65, + "learning_rate": 1.8903881471000444e-05, + "loss": 2.1218, + "step": 3186 + }, + { + "epoch": 0.65, + "learning_rate": 1.8903095051129882e-05, + "loss": 2.081, + "step": 3187 + }, + { + "epoch": 0.65, + "learning_rate": 1.890230836561854e-05, + "loss": 2.1402, + "step": 3188 + }, + { + "epoch": 0.65, + "learning_rate": 1.8901521414489897e-05, + "loss": 2.1402, + "step": 3189 + }, + { + "epoch": 0.65, + "learning_rate": 1.8900734197767424e-05, + "loss": 2.1971, + "step": 3190 + }, + { + "epoch": 0.65, + "learning_rate": 1.889994671547462e-05, + "loss": 2.1926, + "step": 3191 + }, + { + "epoch": 0.65, + "learning_rate": 1.8899158967634974e-05, + "loss": 2.1816, + "step": 3192 + }, + { + "epoch": 0.65, + "learning_rate": 1.889837095427199e-05, + "loss": 2.1955, + "step": 3193 + }, + { + "epoch": 0.65, + "learning_rate": 1.8897582675409186e-05, + "loss": 2.2423, + "step": 3194 + }, + { + "epoch": 0.65, + "learning_rate": 1.8896794131070073e-05, + "loss": 2.1169, + "step": 3195 + }, + { + "epoch": 0.65, + "learning_rate": 1.8896005321278184e-05, + "loss": 2.22, + "step": 3196 + }, + { + "epoch": 0.65, + "learning_rate": 1.8895216246057053e-05, + "loss": 2.1982, + "step": 3197 + }, + { + "epoch": 0.65, + "learning_rate": 1.889442690543023e-05, + "loss": 2.1191, + "step": 3198 + }, + { + "epoch": 0.65, + "learning_rate": 1.8893637299421254e-05, + "loss": 2.1493, + "step": 3199 + }, + { + "epoch": 0.65, + "learning_rate": 1.8892847428053692e-05, + "loss": 2.173, + "step": 3200 + }, + { + "epoch": 0.65, + "learning_rate": 1.889205729135111e-05, + "loss": 2.0506, + "step": 3201 + }, + { + "epoch": 0.65, + "learning_rate": 1.8891266889337086e-05, + "loss": 2.0739, + "step": 3202 + }, + { + "epoch": 0.65, + "learning_rate": 1.8890476222035202e-05, + "loss": 2.1443, + "step": 3203 + }, + { + "epoch": 0.65, + "learning_rate": 1.8889685289469043e-05, + "loss": 2.1702, + "step": 3204 + }, + { + "epoch": 0.65, + "learning_rate": 1.8888894091662218e-05, + "loss": 2.1268, + "step": 3205 + }, + { + "epoch": 0.65, + "learning_rate": 1.8888102628638328e-05, + "loss": 2.2258, + "step": 3206 + }, + { + "epoch": 0.65, + "learning_rate": 1.888731090042099e-05, + "loss": 2.1066, + "step": 3207 + }, + { + "epoch": 0.65, + "learning_rate": 1.8886518907033816e-05, + "loss": 2.1479, + "step": 3208 + }, + { + "epoch": 0.65, + "learning_rate": 1.8885726648500455e-05, + "loss": 2.1099, + "step": 3209 + }, + { + "epoch": 0.65, + "learning_rate": 1.8884934124844534e-05, + "loss": 2.1956, + "step": 3210 + }, + { + "epoch": 0.65, + "learning_rate": 1.8884141336089704e-05, + "loss": 2.1884, + "step": 3211 + }, + { + "epoch": 0.65, + "learning_rate": 1.8883348282259612e-05, + "loss": 2.1727, + "step": 3212 + }, + { + "epoch": 0.65, + "learning_rate": 1.888255496337793e-05, + "loss": 2.1152, + "step": 3213 + }, + { + "epoch": 0.65, + "learning_rate": 1.8881761379468324e-05, + "loss": 2.2216, + "step": 3214 + }, + { + "epoch": 0.65, + "learning_rate": 1.888096753055447e-05, + "loss": 2.2109, + "step": 3215 + }, + { + "epoch": 0.65, + "learning_rate": 1.8880173416660056e-05, + "loss": 2.0728, + "step": 3216 + }, + { + "epoch": 0.65, + "learning_rate": 1.8879379037808773e-05, + "loss": 2.0996, + "step": 3217 + }, + { + "epoch": 0.65, + "learning_rate": 1.887858439402433e-05, + "loss": 2.0316, + "step": 3218 + }, + { + "epoch": 0.65, + "learning_rate": 1.887778948533043e-05, + "loss": 2.14, + "step": 3219 + }, + { + "epoch": 0.65, + "learning_rate": 1.8876994311750794e-05, + "loss": 2.1434, + "step": 3220 + }, + { + "epoch": 0.65, + "learning_rate": 1.8876198873309148e-05, + "loss": 2.1718, + "step": 3221 + }, + { + "epoch": 0.65, + "learning_rate": 1.8875403170029217e-05, + "loss": 2.1673, + "step": 3222 + }, + { + "epoch": 0.65, + "learning_rate": 1.8874607201934755e-05, + "loss": 2.0956, + "step": 3223 + }, + { + "epoch": 0.65, + "learning_rate": 1.8873810969049503e-05, + "loss": 2.128, + "step": 3224 + }, + { + "epoch": 0.65, + "learning_rate": 1.8873014471397225e-05, + "loss": 2.2575, + "step": 3225 + }, + { + "epoch": 0.65, + "learning_rate": 1.8872217709001676e-05, + "loss": 2.1567, + "step": 3226 + }, + { + "epoch": 0.66, + "learning_rate": 1.8871420681886637e-05, + "loss": 2.1707, + "step": 3227 + }, + { + "epoch": 0.66, + "learning_rate": 1.8870623390075885e-05, + "loss": 2.1249, + "step": 3228 + }, + { + "epoch": 0.66, + "learning_rate": 1.8869825833593213e-05, + "loss": 2.1915, + "step": 3229 + }, + { + "epoch": 0.66, + "learning_rate": 1.8869028012462408e-05, + "loss": 2.1779, + "step": 3230 + }, + { + "epoch": 0.66, + "learning_rate": 1.8868229926707288e-05, + "loss": 2.2037, + "step": 3231 + }, + { + "epoch": 0.66, + "learning_rate": 1.8867431576351653e-05, + "loss": 2.214, + "step": 3232 + }, + { + "epoch": 0.66, + "learning_rate": 1.886663296141933e-05, + "loss": 2.1656, + "step": 3233 + }, + { + "epoch": 0.66, + "learning_rate": 1.8865834081934145e-05, + "loss": 2.1605, + "step": 3234 + }, + { + "epoch": 0.66, + "learning_rate": 1.8865034937919938e-05, + "loss": 2.1636, + "step": 3235 + }, + { + "epoch": 0.66, + "learning_rate": 1.8864235529400553e-05, + "loss": 2.156, + "step": 3236 + }, + { + "epoch": 0.66, + "learning_rate": 1.8863435856399833e-05, + "loss": 2.1371, + "step": 3237 + }, + { + "epoch": 0.66, + "learning_rate": 1.8862635918941644e-05, + "loss": 2.0977, + "step": 3238 + }, + { + "epoch": 0.66, + "learning_rate": 1.886183571704986e-05, + "loss": 2.1842, + "step": 3239 + }, + { + "epoch": 0.66, + "learning_rate": 1.886103525074834e-05, + "loss": 2.1546, + "step": 3240 + }, + { + "epoch": 0.66, + "learning_rate": 1.8860234520060983e-05, + "loss": 2.0674, + "step": 3241 + }, + { + "epoch": 0.66, + "learning_rate": 1.8859433525011676e-05, + "loss": 2.1643, + "step": 3242 + }, + { + "epoch": 0.66, + "learning_rate": 1.885863226562431e-05, + "loss": 2.1535, + "step": 3243 + }, + { + "epoch": 0.66, + "learning_rate": 1.8857830741922806e-05, + "loss": 2.1937, + "step": 3244 + }, + { + "epoch": 0.66, + "learning_rate": 1.885702895393107e-05, + "loss": 2.1393, + "step": 3245 + }, + { + "epoch": 0.66, + "learning_rate": 1.885622690167303e-05, + "loss": 2.1496, + "step": 3246 + }, + { + "epoch": 0.66, + "learning_rate": 1.8855424585172605e-05, + "loss": 2.1049, + "step": 3247 + }, + { + "epoch": 0.66, + "learning_rate": 1.8854622004453748e-05, + "loss": 2.0201, + "step": 3248 + }, + { + "epoch": 0.66, + "learning_rate": 1.8853819159540395e-05, + "loss": 2.1904, + "step": 3249 + }, + { + "epoch": 0.66, + "learning_rate": 1.885301605045651e-05, + "loss": 2.1646, + "step": 3250 + }, + { + "epoch": 0.66, + "learning_rate": 1.885221267722605e-05, + "loss": 2.1226, + "step": 3251 + }, + { + "epoch": 0.66, + "learning_rate": 1.8851409039872983e-05, + "loss": 2.0939, + "step": 3252 + }, + { + "epoch": 0.66, + "learning_rate": 1.8850605138421293e-05, + "loss": 2.1505, + "step": 3253 + }, + { + "epoch": 0.66, + "learning_rate": 1.884980097289496e-05, + "loss": 2.2153, + "step": 3254 + }, + { + "epoch": 0.66, + "learning_rate": 1.8848996543317982e-05, + "loss": 2.1445, + "step": 3255 + }, + { + "epoch": 0.66, + "learning_rate": 1.8848191849714358e-05, + "loss": 2.0491, + "step": 3256 + }, + { + "epoch": 0.66, + "learning_rate": 1.8847386892108097e-05, + "loss": 2.2029, + "step": 3257 + }, + { + "epoch": 0.66, + "learning_rate": 1.884658167052322e-05, + "loss": 2.113, + "step": 3258 + }, + { + "epoch": 0.66, + "learning_rate": 1.8845776184983752e-05, + "loss": 2.1245, + "step": 3259 + }, + { + "epoch": 0.66, + "learning_rate": 1.884497043551372e-05, + "loss": 2.1571, + "step": 3260 + }, + { + "epoch": 0.66, + "learning_rate": 1.8844164422137172e-05, + "loss": 2.1571, + "step": 3261 + }, + { + "epoch": 0.66, + "learning_rate": 1.8843358144878157e-05, + "loss": 2.1646, + "step": 3262 + }, + { + "epoch": 0.66, + "learning_rate": 1.8842551603760725e-05, + "loss": 2.1975, + "step": 3263 + }, + { + "epoch": 0.66, + "learning_rate": 1.884174479880895e-05, + "loss": 2.2097, + "step": 3264 + }, + { + "epoch": 0.66, + "learning_rate": 1.8840937730046895e-05, + "loss": 2.207, + "step": 3265 + }, + { + "epoch": 0.66, + "learning_rate": 1.8840130397498645e-05, + "loss": 2.2211, + "step": 3266 + }, + { + "epoch": 0.66, + "learning_rate": 1.883932280118829e-05, + "loss": 2.1835, + "step": 3267 + }, + { + "epoch": 0.66, + "learning_rate": 1.8838514941139925e-05, + "loss": 2.1714, + "step": 3268 + }, + { + "epoch": 0.66, + "learning_rate": 1.8837706817377654e-05, + "loss": 2.1213, + "step": 3269 + }, + { + "epoch": 0.66, + "learning_rate": 1.8836898429925586e-05, + "loss": 2.1576, + "step": 3270 + }, + { + "epoch": 0.66, + "learning_rate": 1.8836089778807846e-05, + "loss": 2.1804, + "step": 3271 + }, + { + "epoch": 0.66, + "learning_rate": 1.8835280864048556e-05, + "loss": 2.15, + "step": 3272 + }, + { + "epoch": 0.66, + "learning_rate": 1.8834471685671858e-05, + "loss": 2.1281, + "step": 3273 + }, + { + "epoch": 0.66, + "learning_rate": 1.883366224370189e-05, + "loss": 2.2647, + "step": 3274 + }, + { + "epoch": 0.66, + "learning_rate": 1.8832852538162806e-05, + "loss": 2.1629, + "step": 3275 + }, + { + "epoch": 0.66, + "learning_rate": 1.883204256907876e-05, + "loss": 2.1004, + "step": 3276 + }, + { + "epoch": 0.67, + "learning_rate": 1.883123233647393e-05, + "loss": 2.1342, + "step": 3277 + }, + { + "epoch": 0.67, + "learning_rate": 1.883042184037248e-05, + "loss": 2.13, + "step": 3278 + }, + { + "epoch": 0.67, + "learning_rate": 1.8829611080798595e-05, + "loss": 2.2039, + "step": 3279 + }, + { + "epoch": 0.67, + "learning_rate": 1.882880005777647e-05, + "loss": 2.1196, + "step": 3280 + }, + { + "epoch": 0.67, + "learning_rate": 1.88279887713303e-05, + "loss": 2.0627, + "step": 3281 + }, + { + "epoch": 0.67, + "learning_rate": 1.882717722148429e-05, + "loss": 2.0945, + "step": 3282 + }, + { + "epoch": 0.67, + "learning_rate": 1.8826365408262658e-05, + "loss": 2.1492, + "step": 3283 + }, + { + "epoch": 0.67, + "learning_rate": 1.8825553331689625e-05, + "loss": 2.2008, + "step": 3284 + }, + { + "epoch": 0.67, + "learning_rate": 1.8824740991789417e-05, + "loss": 2.2426, + "step": 3285 + }, + { + "epoch": 0.67, + "learning_rate": 1.8823928388586276e-05, + "loss": 2.2076, + "step": 3286 + }, + { + "epoch": 0.67, + "learning_rate": 1.8823115522104445e-05, + "loss": 2.1093, + "step": 3287 + }, + { + "epoch": 0.67, + "learning_rate": 1.882230239236818e-05, + "loss": 2.0962, + "step": 3288 + }, + { + "epoch": 0.67, + "learning_rate": 1.882148899940174e-05, + "loss": 2.1728, + "step": 3289 + }, + { + "epoch": 0.67, + "learning_rate": 1.8820675343229394e-05, + "loss": 2.1617, + "step": 3290 + }, + { + "epoch": 0.67, + "learning_rate": 1.881986142387542e-05, + "loss": 2.1857, + "step": 3291 + }, + { + "epoch": 0.67, + "learning_rate": 1.8819047241364104e-05, + "loss": 2.2509, + "step": 3292 + }, + { + "epoch": 0.67, + "learning_rate": 1.881823279571974e-05, + "loss": 2.0877, + "step": 3293 + }, + { + "epoch": 0.67, + "learning_rate": 1.881741808696662e-05, + "loss": 2.1233, + "step": 3294 + }, + { + "epoch": 0.67, + "learning_rate": 1.8816603115129064e-05, + "loss": 2.1614, + "step": 3295 + }, + { + "epoch": 0.67, + "learning_rate": 1.881578788023138e-05, + "loss": 2.1503, + "step": 3296 + }, + { + "epoch": 0.67, + "learning_rate": 1.8814972382297894e-05, + "loss": 2.0858, + "step": 3297 + }, + { + "epoch": 0.67, + "learning_rate": 1.8814156621352938e-05, + "loss": 2.1014, + "step": 3298 + }, + { + "epoch": 0.67, + "learning_rate": 1.8813340597420857e-05, + "loss": 2.1609, + "step": 3299 + }, + { + "epoch": 0.67, + "learning_rate": 1.881252431052599e-05, + "loss": 2.1392, + "step": 3300 + }, + { + "epoch": 0.67, + "learning_rate": 1.8811707760692698e-05, + "loss": 2.1769, + "step": 3301 + }, + { + "epoch": 0.67, + "learning_rate": 1.8810890947945338e-05, + "loss": 2.1192, + "step": 3302 + }, + { + "epoch": 0.67, + "learning_rate": 1.8810073872308293e-05, + "loss": 2.1637, + "step": 3303 + }, + { + "epoch": 0.67, + "learning_rate": 1.8809256533805933e-05, + "loss": 2.1306, + "step": 3304 + }, + { + "epoch": 0.67, + "learning_rate": 1.8808438932462642e-05, + "loss": 2.1383, + "step": 3305 + }, + { + "epoch": 0.67, + "learning_rate": 1.8807621068302823e-05, + "loss": 2.1194, + "step": 3306 + }, + { + "epoch": 0.67, + "learning_rate": 1.880680294135088e-05, + "loss": 2.2122, + "step": 3307 + }, + { + "epoch": 0.67, + "learning_rate": 1.8805984551631212e-05, + "loss": 2.1541, + "step": 3308 + }, + { + "epoch": 0.67, + "learning_rate": 1.8805165899168244e-05, + "loss": 2.2058, + "step": 3309 + }, + { + "epoch": 0.67, + "learning_rate": 1.8804346983986402e-05, + "loss": 2.1472, + "step": 3310 + }, + { + "epoch": 0.67, + "learning_rate": 1.8803527806110124e-05, + "loss": 2.1545, + "step": 3311 + }, + { + "epoch": 0.67, + "learning_rate": 1.8802708365563842e-05, + "loss": 2.2045, + "step": 3312 + }, + { + "epoch": 0.67, + "learning_rate": 1.8801888662372014e-05, + "loss": 2.1811, + "step": 3313 + }, + { + "epoch": 0.67, + "learning_rate": 1.8801068696559094e-05, + "loss": 2.1914, + "step": 3314 + }, + { + "epoch": 0.67, + "learning_rate": 1.8800248468149545e-05, + "loss": 2.1775, + "step": 3315 + }, + { + "epoch": 0.67, + "learning_rate": 1.8799427977167842e-05, + "loss": 2.1004, + "step": 3316 + }, + { + "epoch": 0.67, + "learning_rate": 1.879860722363847e-05, + "loss": 2.1091, + "step": 3317 + }, + { + "epoch": 0.67, + "learning_rate": 1.879778620758591e-05, + "loss": 2.1636, + "step": 3318 + }, + { + "epoch": 0.67, + "learning_rate": 1.8796964929034666e-05, + "loss": 2.1121, + "step": 3319 + }, + { + "epoch": 0.67, + "learning_rate": 1.879614338800924e-05, + "loss": 2.1712, + "step": 3320 + }, + { + "epoch": 0.67, + "learning_rate": 1.8795321584534144e-05, + "loss": 2.2048, + "step": 3321 + }, + { + "epoch": 0.67, + "learning_rate": 1.8794499518633894e-05, + "loss": 2.1963, + "step": 3322 + }, + { + "epoch": 0.67, + "learning_rate": 1.8793677190333023e-05, + "loss": 2.2077, + "step": 3323 + }, + { + "epoch": 0.67, + "learning_rate": 1.8792854599656065e-05, + "loss": 2.2015, + "step": 3324 + }, + { + "epoch": 0.67, + "learning_rate": 1.8792031746627563e-05, + "loss": 2.1859, + "step": 3325 + }, + { + "epoch": 0.68, + "learning_rate": 1.8791208631272072e-05, + "loss": 2.2153, + "step": 3326 + }, + { + "epoch": 0.68, + "learning_rate": 1.8790385253614144e-05, + "loss": 2.0949, + "step": 3327 + }, + { + "epoch": 0.68, + "learning_rate": 1.8789561613678352e-05, + "loss": 2.1811, + "step": 3328 + }, + { + "epoch": 0.68, + "learning_rate": 1.878873771148927e-05, + "loss": 2.1555, + "step": 3329 + }, + { + "epoch": 0.68, + "learning_rate": 1.8787913547071485e-05, + "loss": 2.1599, + "step": 3330 + }, + { + "epoch": 0.68, + "learning_rate": 1.8787089120449574e-05, + "loss": 2.1519, + "step": 3331 + }, + { + "epoch": 0.68, + "learning_rate": 1.878626443164815e-05, + "loss": 2.1637, + "step": 3332 + }, + { + "epoch": 0.68, + "learning_rate": 1.878543948069181e-05, + "loss": 2.1821, + "step": 3333 + }, + { + "epoch": 0.68, + "learning_rate": 1.8784614267605167e-05, + "loss": 2.161, + "step": 3334 + }, + { + "epoch": 0.68, + "learning_rate": 1.8783788792412853e-05, + "loss": 2.1871, + "step": 3335 + }, + { + "epoch": 0.68, + "learning_rate": 1.878296305513949e-05, + "loss": 2.1143, + "step": 3336 + }, + { + "epoch": 0.68, + "learning_rate": 1.8782137055809712e-05, + "loss": 2.2132, + "step": 3337 + }, + { + "epoch": 0.68, + "learning_rate": 1.8781310794448173e-05, + "loss": 2.1302, + "step": 3338 + }, + { + "epoch": 0.68, + "learning_rate": 1.8780484271079522e-05, + "loss": 2.1788, + "step": 3339 + }, + { + "epoch": 0.68, + "learning_rate": 1.8779657485728423e-05, + "loss": 2.1408, + "step": 3340 + }, + { + "epoch": 0.68, + "learning_rate": 1.8778830438419536e-05, + "loss": 2.1537, + "step": 3341 + }, + { + "epoch": 0.68, + "learning_rate": 1.8778003129177546e-05, + "loss": 2.1326, + "step": 3342 + }, + { + "epoch": 0.68, + "learning_rate": 1.8777175558027134e-05, + "loss": 2.1358, + "step": 3343 + }, + { + "epoch": 0.68, + "learning_rate": 1.8776347724992993e-05, + "loss": 2.1237, + "step": 3344 + }, + { + "epoch": 0.68, + "learning_rate": 1.8775519630099822e-05, + "loss": 2.1392, + "step": 3345 + }, + { + "epoch": 0.68, + "learning_rate": 1.877469127337233e-05, + "loss": 2.0812, + "step": 3346 + }, + { + "epoch": 0.68, + "learning_rate": 1.8773862654835235e-05, + "loss": 2.1537, + "step": 3347 + }, + { + "epoch": 0.68, + "learning_rate": 1.8773033774513254e-05, + "loss": 2.1519, + "step": 3348 + }, + { + "epoch": 0.68, + "learning_rate": 1.877220463243112e-05, + "loss": 2.1363, + "step": 3349 + }, + { + "epoch": 0.68, + "learning_rate": 1.877137522861358e-05, + "loss": 2.2003, + "step": 3350 + }, + { + "epoch": 0.68, + "learning_rate": 1.877054556308537e-05, + "loss": 2.1347, + "step": 3351 + }, + { + "epoch": 0.68, + "learning_rate": 1.8769715635871247e-05, + "loss": 2.0888, + "step": 3352 + }, + { + "epoch": 0.68, + "learning_rate": 1.876888544699598e-05, + "loss": 2.1743, + "step": 3353 + }, + { + "epoch": 0.68, + "learning_rate": 1.8768054996484337e-05, + "loss": 2.1633, + "step": 3354 + }, + { + "epoch": 0.68, + "learning_rate": 1.876722428436109e-05, + "loss": 2.147, + "step": 3355 + }, + { + "epoch": 0.68, + "learning_rate": 1.876639331065103e-05, + "loss": 2.192, + "step": 3356 + }, + { + "epoch": 0.68, + "learning_rate": 1.876556207537895e-05, + "loss": 2.1566, + "step": 3357 + }, + { + "epoch": 0.68, + "learning_rate": 1.8764730578569648e-05, + "loss": 2.2099, + "step": 3358 + }, + { + "epoch": 0.68, + "learning_rate": 1.876389882024794e-05, + "loss": 2.1108, + "step": 3359 + }, + { + "epoch": 0.68, + "learning_rate": 1.8763066800438638e-05, + "loss": 2.0865, + "step": 3360 + }, + { + "epoch": 0.68, + "learning_rate": 1.876223451916657e-05, + "loss": 2.1716, + "step": 3361 + }, + { + "epoch": 0.68, + "learning_rate": 1.8761401976456565e-05, + "loss": 2.1648, + "step": 3362 + }, + { + "epoch": 0.68, + "learning_rate": 1.8760569172333464e-05, + "loss": 2.1775, + "step": 3363 + }, + { + "epoch": 0.68, + "learning_rate": 1.875973610682212e-05, + "loss": 2.1203, + "step": 3364 + }, + { + "epoch": 0.68, + "learning_rate": 1.8758902779947385e-05, + "loss": 2.1546, + "step": 3365 + }, + { + "epoch": 0.68, + "learning_rate": 1.8758069191734125e-05, + "loss": 2.129, + "step": 3366 + }, + { + "epoch": 0.68, + "learning_rate": 1.875723534220721e-05, + "loss": 2.1219, + "step": 3367 + }, + { + "epoch": 0.68, + "learning_rate": 1.875640123139152e-05, + "loss": 2.1463, + "step": 3368 + }, + { + "epoch": 0.68, + "learning_rate": 1.875556685931194e-05, + "loss": 2.2054, + "step": 3369 + }, + { + "epoch": 0.68, + "learning_rate": 1.8754732225993367e-05, + "loss": 2.1523, + "step": 3370 + }, + { + "epoch": 0.68, + "learning_rate": 1.8753897331460708e-05, + "loss": 2.1383, + "step": 3371 + }, + { + "epoch": 0.68, + "learning_rate": 1.875306217573887e-05, + "loss": 2.15, + "step": 3372 + }, + { + "epoch": 0.68, + "learning_rate": 1.875222675885277e-05, + "loss": 2.0601, + "step": 3373 + }, + { + "epoch": 0.68, + "learning_rate": 1.8751391080827333e-05, + "loss": 2.1216, + "step": 3374 + }, + { + "epoch": 0.69, + "learning_rate": 1.87505551416875e-05, + "loss": 2.1706, + "step": 3375 + }, + { + "epoch": 0.69, + "learning_rate": 1.8749718941458205e-05, + "loss": 2.1525, + "step": 3376 + }, + { + "epoch": 0.69, + "learning_rate": 1.8748882480164404e-05, + "loss": 2.1634, + "step": 3377 + }, + { + "epoch": 0.69, + "learning_rate": 1.874804575783105e-05, + "loss": 2.1544, + "step": 3378 + }, + { + "epoch": 0.69, + "learning_rate": 1.874720877448311e-05, + "loss": 2.2026, + "step": 3379 + }, + { + "epoch": 0.69, + "learning_rate": 1.8746371530145556e-05, + "loss": 2.082, + "step": 3380 + }, + { + "epoch": 0.69, + "learning_rate": 1.8745534024843373e-05, + "loss": 2.2024, + "step": 3381 + }, + { + "epoch": 0.69, + "learning_rate": 1.8744696258601543e-05, + "loss": 2.2088, + "step": 3382 + }, + { + "epoch": 0.69, + "learning_rate": 1.8743858231445065e-05, + "loss": 2.0763, + "step": 3383 + }, + { + "epoch": 0.69, + "learning_rate": 1.8743019943398942e-05, + "loss": 2.1735, + "step": 3384 + }, + { + "epoch": 0.69, + "learning_rate": 1.8742181394488193e-05, + "loss": 2.0921, + "step": 3385 + }, + { + "epoch": 0.69, + "learning_rate": 1.8741342584737826e-05, + "loss": 2.1197, + "step": 3386 + }, + { + "epoch": 0.69, + "learning_rate": 1.8740503514172878e-05, + "loss": 2.0763, + "step": 3387 + }, + { + "epoch": 0.69, + "learning_rate": 1.873966418281838e-05, + "loss": 2.1196, + "step": 3388 + }, + { + "epoch": 0.69, + "learning_rate": 1.8738824590699378e-05, + "loss": 2.1617, + "step": 3389 + }, + { + "epoch": 0.69, + "learning_rate": 1.873798473784092e-05, + "loss": 2.2227, + "step": 3390 + }, + { + "epoch": 0.69, + "learning_rate": 1.8737144624268064e-05, + "loss": 2.1545, + "step": 3391 + }, + { + "epoch": 0.69, + "learning_rate": 1.873630425000588e-05, + "loss": 2.1317, + "step": 3392 + }, + { + "epoch": 0.69, + "learning_rate": 1.873546361507944e-05, + "loss": 2.233, + "step": 3393 + }, + { + "epoch": 0.69, + "learning_rate": 1.8734622719513823e-05, + "loss": 2.2, + "step": 3394 + }, + { + "epoch": 0.69, + "learning_rate": 1.8733781563334123e-05, + "loss": 2.1114, + "step": 3395 + }, + { + "epoch": 0.69, + "learning_rate": 1.8732940146565436e-05, + "loss": 2.2039, + "step": 3396 + }, + { + "epoch": 0.69, + "learning_rate": 1.8732098469232865e-05, + "loss": 2.1435, + "step": 3397 + }, + { + "epoch": 0.69, + "learning_rate": 1.873125653136153e-05, + "loss": 2.2062, + "step": 3398 + }, + { + "epoch": 0.69, + "learning_rate": 1.8730414332976547e-05, + "loss": 2.1575, + "step": 3399 + }, + { + "epoch": 0.69, + "learning_rate": 1.872957187410304e-05, + "loss": 2.1542, + "step": 3400 + }, + { + "epoch": 0.69, + "learning_rate": 1.8728729154766155e-05, + "loss": 2.0903, + "step": 3401 + }, + { + "epoch": 0.69, + "learning_rate": 1.872788617499103e-05, + "loss": 2.0938, + "step": 3402 + }, + { + "epoch": 0.69, + "learning_rate": 1.8727042934802817e-05, + "loss": 2.1964, + "step": 3403 + }, + { + "epoch": 0.69, + "learning_rate": 1.872619943422668e-05, + "loss": 2.1108, + "step": 3404 + }, + { + "epoch": 0.69, + "learning_rate": 1.872535567328778e-05, + "loss": 2.1635, + "step": 3405 + }, + { + "epoch": 0.69, + "learning_rate": 1.8724511652011296e-05, + "loss": 2.1406, + "step": 3406 + }, + { + "epoch": 0.69, + "learning_rate": 1.872366737042241e-05, + "loss": 2.288, + "step": 3407 + }, + { + "epoch": 0.69, + "learning_rate": 1.8722822828546318e-05, + "loss": 2.1907, + "step": 3408 + }, + { + "epoch": 0.69, + "learning_rate": 1.8721978026408208e-05, + "loss": 2.2028, + "step": 3409 + }, + { + "epoch": 0.69, + "learning_rate": 1.8721132964033293e-05, + "loss": 2.1229, + "step": 3410 + }, + { + "epoch": 0.69, + "learning_rate": 1.872028764144679e-05, + "loss": 2.1724, + "step": 3411 + }, + { + "epoch": 0.69, + "learning_rate": 1.8719442058673912e-05, + "loss": 2.1422, + "step": 3412 + }, + { + "epoch": 0.69, + "learning_rate": 1.8718596215739898e-05, + "loss": 2.1848, + "step": 3413 + }, + { + "epoch": 0.69, + "learning_rate": 1.871775011266998e-05, + "loss": 2.1496, + "step": 3414 + }, + { + "epoch": 0.69, + "learning_rate": 1.87169037494894e-05, + "loss": 2.1446, + "step": 3415 + }, + { + "epoch": 0.69, + "learning_rate": 1.8716057126223418e-05, + "loss": 2.181, + "step": 3416 + }, + { + "epoch": 0.69, + "learning_rate": 1.8715210242897296e-05, + "loss": 2.1027, + "step": 3417 + }, + { + "epoch": 0.69, + "learning_rate": 1.871436309953629e-05, + "loss": 2.2202, + "step": 3418 + }, + { + "epoch": 0.69, + "learning_rate": 1.8713515696165687e-05, + "loss": 2.0781, + "step": 3419 + }, + { + "epoch": 0.69, + "learning_rate": 1.8712668032810767e-05, + "loss": 2.1267, + "step": 3420 + }, + { + "epoch": 0.69, + "learning_rate": 1.8711820109496824e-05, + "loss": 2.147, + "step": 3421 + }, + { + "epoch": 0.69, + "learning_rate": 1.8710971926249154e-05, + "loss": 2.1874, + "step": 3422 + }, + { + "epoch": 0.69, + "learning_rate": 1.8710123483093066e-05, + "loss": 2.1742, + "step": 3423 + }, + { + "epoch": 0.7, + "learning_rate": 1.8709274780053877e-05, + "loss": 2.1959, + "step": 3424 + }, + { + "epoch": 0.7, + "learning_rate": 1.870842581715691e-05, + "loss": 2.1621, + "step": 3425 + }, + { + "epoch": 0.7, + "learning_rate": 1.8707576594427486e-05, + "loss": 2.1891, + "step": 3426 + }, + { + "epoch": 0.7, + "learning_rate": 1.8706727111890957e-05, + "loss": 2.1373, + "step": 3427 + }, + { + "epoch": 0.7, + "learning_rate": 1.870587736957266e-05, + "loss": 2.2426, + "step": 3428 + }, + { + "epoch": 0.7, + "learning_rate": 1.8705027367497948e-05, + "loss": 2.0774, + "step": 3429 + }, + { + "epoch": 0.7, + "learning_rate": 1.870417710569219e-05, + "loss": 2.1881, + "step": 3430 + }, + { + "epoch": 0.7, + "learning_rate": 1.8703326584180747e-05, + "loss": 2.1228, + "step": 3431 + }, + { + "epoch": 0.7, + "learning_rate": 1.8702475802988997e-05, + "loss": 2.173, + "step": 3432 + }, + { + "epoch": 0.7, + "learning_rate": 1.8701624762142333e-05, + "loss": 2.1082, + "step": 3433 + }, + { + "epoch": 0.7, + "learning_rate": 1.8700773461666138e-05, + "loss": 2.222, + "step": 3434 + }, + { + "epoch": 0.7, + "learning_rate": 1.8699921901585814e-05, + "loss": 2.2187, + "step": 3435 + }, + { + "epoch": 0.7, + "learning_rate": 1.8699070081926775e-05, + "loss": 2.2008, + "step": 3436 + }, + { + "epoch": 0.7, + "learning_rate": 1.869821800271443e-05, + "loss": 2.1244, + "step": 3437 + }, + { + "epoch": 0.7, + "learning_rate": 1.8697365663974202e-05, + "loss": 2.2029, + "step": 3438 + }, + { + "epoch": 0.7, + "learning_rate": 1.869651306573153e-05, + "loss": 2.1395, + "step": 3439 + }, + { + "epoch": 0.7, + "learning_rate": 1.8695660208011842e-05, + "loss": 2.1967, + "step": 3440 + }, + { + "epoch": 0.7, + "learning_rate": 1.8694807090840596e-05, + "loss": 2.1077, + "step": 3441 + }, + { + "epoch": 0.7, + "learning_rate": 1.8693953714243235e-05, + "loss": 2.1576, + "step": 3442 + }, + { + "epoch": 0.7, + "learning_rate": 1.869310007824523e-05, + "loss": 2.1329, + "step": 3443 + }, + { + "epoch": 0.7, + "learning_rate": 1.8692246182872048e-05, + "loss": 2.1145, + "step": 3444 + }, + { + "epoch": 0.7, + "learning_rate": 1.8691392028149165e-05, + "loss": 2.1813, + "step": 3445 + }, + { + "epoch": 0.7, + "learning_rate": 1.8690537614102066e-05, + "loss": 2.1324, + "step": 3446 + }, + { + "epoch": 0.7, + "learning_rate": 1.868968294075625e-05, + "loss": 2.1248, + "step": 3447 + }, + { + "epoch": 0.7, + "learning_rate": 1.868882800813721e-05, + "loss": 2.1097, + "step": 3448 + }, + { + "epoch": 0.7, + "learning_rate": 1.868797281627046e-05, + "loss": 2.139, + "step": 3449 + }, + { + "epoch": 0.7, + "learning_rate": 1.8687117365181514e-05, + "loss": 2.1471, + "step": 3450 + }, + { + "epoch": 0.7, + "learning_rate": 1.8686261654895894e-05, + "loss": 2.0919, + "step": 3451 + }, + { + "epoch": 0.7, + "learning_rate": 1.8685405685439135e-05, + "loss": 2.1108, + "step": 3452 + }, + { + "epoch": 0.7, + "learning_rate": 1.8684549456836776e-05, + "loss": 2.1045, + "step": 3453 + }, + { + "epoch": 0.7, + "learning_rate": 1.8683692969114363e-05, + "loss": 2.0259, + "step": 3454 + }, + { + "epoch": 0.7, + "learning_rate": 1.8682836222297453e-05, + "loss": 2.2196, + "step": 3455 + }, + { + "epoch": 0.7, + "learning_rate": 1.8681979216411606e-05, + "loss": 2.157, + "step": 3456 + }, + { + "epoch": 0.7, + "learning_rate": 1.8681121951482397e-05, + "loss": 2.1766, + "step": 3457 + }, + { + "epoch": 0.7, + "learning_rate": 1.8680264427535394e-05, + "loss": 2.0885, + "step": 3458 + }, + { + "epoch": 0.7, + "learning_rate": 1.8679406644596195e-05, + "loss": 2.1551, + "step": 3459 + }, + { + "epoch": 0.7, + "learning_rate": 1.867854860269039e-05, + "loss": 2.1555, + "step": 3460 + }, + { + "epoch": 0.7, + "learning_rate": 1.867769030184357e-05, + "loss": 2.2212, + "step": 3461 + }, + { + "epoch": 0.7, + "learning_rate": 1.867683174208136e-05, + "loss": 2.2178, + "step": 3462 + }, + { + "epoch": 0.7, + "learning_rate": 1.8675972923429365e-05, + "loss": 2.0676, + "step": 3463 + }, + { + "epoch": 0.7, + "learning_rate": 1.8675113845913216e-05, + "loss": 2.1209, + "step": 3464 + }, + { + "epoch": 0.7, + "learning_rate": 1.8674254509558544e-05, + "loss": 2.1212, + "step": 3465 + }, + { + "epoch": 0.7, + "learning_rate": 1.8673394914390986e-05, + "loss": 2.0908, + "step": 3466 + }, + { + "epoch": 0.7, + "learning_rate": 1.8672535060436194e-05, + "loss": 2.1774, + "step": 3467 + }, + { + "epoch": 0.7, + "learning_rate": 1.867167494771982e-05, + "loss": 2.1942, + "step": 3468 + }, + { + "epoch": 0.7, + "learning_rate": 1.8670814576267528e-05, + "loss": 2.1806, + "step": 3469 + }, + { + "epoch": 0.7, + "learning_rate": 1.866995394610499e-05, + "loss": 2.1007, + "step": 3470 + }, + { + "epoch": 0.7, + "learning_rate": 1.8669093057257884e-05, + "loss": 2.239, + "step": 3471 + }, + { + "epoch": 0.7, + "learning_rate": 1.866823190975189e-05, + "loss": 2.0954, + "step": 3472 + }, + { + "epoch": 0.7, + "learning_rate": 1.8667370503612717e-05, + "loss": 2.1786, + "step": 3473 + }, + { + "epoch": 0.71, + "learning_rate": 1.8666508838866052e-05, + "loss": 2.137, + "step": 3474 + }, + { + "epoch": 0.71, + "learning_rate": 1.866564691553761e-05, + "loss": 2.1351, + "step": 3475 + }, + { + "epoch": 0.71, + "learning_rate": 1.866478473365311e-05, + "loss": 2.1387, + "step": 3476 + }, + { + "epoch": 0.71, + "learning_rate": 1.866392229323827e-05, + "loss": 2.1474, + "step": 3477 + }, + { + "epoch": 0.71, + "learning_rate": 1.866305959431883e-05, + "loss": 2.1364, + "step": 3478 + }, + { + "epoch": 0.71, + "learning_rate": 1.8662196636920527e-05, + "loss": 2.219, + "step": 3479 + }, + { + "epoch": 0.71, + "learning_rate": 1.866133342106911e-05, + "loss": 2.0949, + "step": 3480 + }, + { + "epoch": 0.71, + "learning_rate": 1.866046994679034e-05, + "loss": 2.1242, + "step": 3481 + }, + { + "epoch": 0.71, + "learning_rate": 1.865960621410997e-05, + "loss": 2.2307, + "step": 3482 + }, + { + "epoch": 0.71, + "learning_rate": 1.8658742223053773e-05, + "loss": 2.2238, + "step": 3483 + }, + { + "epoch": 0.71, + "learning_rate": 1.865787797364753e-05, + "loss": 2.1722, + "step": 3484 + }, + { + "epoch": 0.71, + "learning_rate": 1.8657013465917032e-05, + "loss": 2.0981, + "step": 3485 + }, + { + "epoch": 0.71, + "learning_rate": 1.8656148699888068e-05, + "loss": 2.1121, + "step": 3486 + }, + { + "epoch": 0.71, + "learning_rate": 1.8655283675586443e-05, + "loss": 2.1646, + "step": 3487 + }, + { + "epoch": 0.71, + "learning_rate": 1.8654418393037966e-05, + "loss": 2.1635, + "step": 3488 + }, + { + "epoch": 0.71, + "learning_rate": 1.8653552852268447e-05, + "loss": 2.1383, + "step": 3489 + }, + { + "epoch": 0.71, + "learning_rate": 1.8652687053303725e-05, + "loss": 2.1895, + "step": 3490 + }, + { + "epoch": 0.71, + "learning_rate": 1.865182099616962e-05, + "loss": 2.0921, + "step": 3491 + }, + { + "epoch": 0.71, + "learning_rate": 1.865095468089198e-05, + "loss": 2.2, + "step": 3492 + }, + { + "epoch": 0.71, + "learning_rate": 1.865008810749665e-05, + "loss": 2.1552, + "step": 3493 + }, + { + "epoch": 0.71, + "learning_rate": 1.8649221276009482e-05, + "loss": 2.1334, + "step": 3494 + }, + { + "epoch": 0.71, + "learning_rate": 1.864835418645635e-05, + "loss": 2.0932, + "step": 3495 + }, + { + "epoch": 0.71, + "learning_rate": 1.8647486838863117e-05, + "loss": 2.226, + "step": 3496 + }, + { + "epoch": 0.71, + "learning_rate": 1.8646619233255666e-05, + "loss": 2.1848, + "step": 3497 + }, + { + "epoch": 0.71, + "learning_rate": 1.864575136965988e-05, + "loss": 2.1407, + "step": 3498 + }, + { + "epoch": 0.71, + "learning_rate": 1.8644883248101658e-05, + "loss": 2.1113, + "step": 3499 + }, + { + "epoch": 0.71, + "learning_rate": 1.8644014868606898e-05, + "loss": 2.2033, + "step": 3500 + }, + { + "epoch": 0.71, + "learning_rate": 1.864314623120151e-05, + "loss": 2.0705, + "step": 3501 + }, + { + "epoch": 0.71, + "learning_rate": 1.8642277335911413e-05, + "loss": 2.1623, + "step": 3502 + }, + { + "epoch": 0.71, + "learning_rate": 1.8641408182762537e-05, + "loss": 2.1649, + "step": 3503 + }, + { + "epoch": 0.71, + "learning_rate": 1.86405387717808e-05, + "loss": 2.1306, + "step": 3504 + }, + { + "epoch": 0.71, + "learning_rate": 1.8639669102992158e-05, + "loss": 2.2115, + "step": 3505 + }, + { + "epoch": 0.71, + "learning_rate": 1.8638799176422556e-05, + "loss": 2.0181, + "step": 3506 + }, + { + "epoch": 0.71, + "learning_rate": 1.863792899209794e-05, + "loss": 2.182, + "step": 3507 + }, + { + "epoch": 0.71, + "learning_rate": 1.8637058550044287e-05, + "loss": 2.1874, + "step": 3508 + }, + { + "epoch": 0.71, + "learning_rate": 1.863618785028756e-05, + "loss": 2.134, + "step": 3509 + }, + { + "epoch": 0.71, + "learning_rate": 1.863531689285374e-05, + "loss": 2.2045, + "step": 3510 + }, + { + "epoch": 0.71, + "learning_rate": 1.8634445677768814e-05, + "loss": 2.1427, + "step": 3511 + }, + { + "epoch": 0.71, + "learning_rate": 1.8633574205058778e-05, + "loss": 2.1405, + "step": 3512 + }, + { + "epoch": 0.71, + "learning_rate": 1.863270247474963e-05, + "loss": 2.1338, + "step": 3513 + }, + { + "epoch": 0.71, + "learning_rate": 1.8631830486867385e-05, + "loss": 2.1756, + "step": 3514 + }, + { + "epoch": 0.71, + "learning_rate": 1.8630958241438055e-05, + "loss": 2.122, + "step": 3515 + }, + { + "epoch": 0.71, + "learning_rate": 1.8630085738487666e-05, + "loss": 2.127, + "step": 3516 + }, + { + "epoch": 0.71, + "learning_rate": 1.8629212978042257e-05, + "loss": 2.2069, + "step": 3517 + }, + { + "epoch": 0.71, + "learning_rate": 1.862833996012786e-05, + "loss": 2.1371, + "step": 3518 + }, + { + "epoch": 0.71, + "learning_rate": 1.8627466684770526e-05, + "loss": 2.2079, + "step": 3519 + }, + { + "epoch": 0.71, + "learning_rate": 1.8626593151996315e-05, + "loss": 2.1987, + "step": 3520 + }, + { + "epoch": 0.71, + "learning_rate": 1.8625719361831285e-05, + "loss": 2.1386, + "step": 3521 + }, + { + "epoch": 0.71, + "learning_rate": 1.862484531430151e-05, + "loss": 2.0713, + "step": 3522 + }, + { + "epoch": 0.72, + "learning_rate": 1.862397100943307e-05, + "loss": 2.1418, + "step": 3523 + }, + { + "epoch": 0.72, + "learning_rate": 1.8623096447252048e-05, + "loss": 2.1302, + "step": 3524 + }, + { + "epoch": 0.72, + "learning_rate": 1.862222162778454e-05, + "loss": 2.1889, + "step": 3525 + }, + { + "epoch": 0.72, + "learning_rate": 1.862134655105665e-05, + "loss": 2.1848, + "step": 3526 + }, + { + "epoch": 0.72, + "learning_rate": 1.8620471217094483e-05, + "loss": 2.2646, + "step": 3527 + }, + { + "epoch": 0.72, + "learning_rate": 1.861959562592416e-05, + "loss": 2.1446, + "step": 3528 + }, + { + "epoch": 0.72, + "learning_rate": 1.8618719777571804e-05, + "loss": 2.1488, + "step": 3529 + }, + { + "epoch": 0.72, + "learning_rate": 1.8617843672063554e-05, + "loss": 2.1349, + "step": 3530 + }, + { + "epoch": 0.72, + "learning_rate": 1.8616967309425538e-05, + "loss": 2.1291, + "step": 3531 + }, + { + "epoch": 0.72, + "learning_rate": 1.861609068968391e-05, + "loss": 2.1719, + "step": 3532 + }, + { + "epoch": 0.72, + "learning_rate": 1.861521381286483e-05, + "loss": 2.1545, + "step": 3533 + }, + { + "epoch": 0.72, + "learning_rate": 1.861433667899446e-05, + "loss": 2.1439, + "step": 3534 + }, + { + "epoch": 0.72, + "learning_rate": 1.8613459288098963e-05, + "loss": 2.1768, + "step": 3535 + }, + { + "epoch": 0.72, + "learning_rate": 1.8612581640204524e-05, + "loss": 2.2124, + "step": 3536 + }, + { + "epoch": 0.72, + "learning_rate": 1.861170373533733e-05, + "loss": 2.0368, + "step": 3537 + }, + { + "epoch": 0.72, + "learning_rate": 1.8610825573523572e-05, + "loss": 2.1873, + "step": 3538 + }, + { + "epoch": 0.72, + "learning_rate": 1.8609947154789455e-05, + "loss": 2.1076, + "step": 3539 + }, + { + "epoch": 0.72, + "learning_rate": 1.8609068479161182e-05, + "loss": 2.1759, + "step": 3540 + }, + { + "epoch": 0.72, + "learning_rate": 1.860818954666498e-05, + "loss": 2.1536, + "step": 3541 + }, + { + "epoch": 0.72, + "learning_rate": 1.8607310357327064e-05, + "loss": 2.173, + "step": 3542 + }, + { + "epoch": 0.72, + "learning_rate": 1.860643091117367e-05, + "loss": 2.1571, + "step": 3543 + }, + { + "epoch": 0.72, + "learning_rate": 1.8605551208231042e-05, + "loss": 2.203, + "step": 3544 + }, + { + "epoch": 0.72, + "learning_rate": 1.860467124852542e-05, + "loss": 2.1038, + "step": 3545 + }, + { + "epoch": 0.72, + "learning_rate": 1.8603791032083062e-05, + "loss": 2.161, + "step": 3546 + }, + { + "epoch": 0.72, + "learning_rate": 1.8602910558930233e-05, + "loss": 2.1585, + "step": 3547 + }, + { + "epoch": 0.72, + "learning_rate": 1.8602029829093205e-05, + "loss": 2.1915, + "step": 3548 + }, + { + "epoch": 0.72, + "learning_rate": 1.8601148842598254e-05, + "loss": 2.2174, + "step": 3549 + }, + { + "epoch": 0.72, + "learning_rate": 1.8600267599471663e-05, + "loss": 2.2051, + "step": 3550 + }, + { + "epoch": 0.72, + "learning_rate": 1.8599386099739727e-05, + "loss": 2.1313, + "step": 3551 + }, + { + "epoch": 0.72, + "learning_rate": 1.859850434342875e-05, + "loss": 2.1565, + "step": 3552 + }, + { + "epoch": 0.72, + "learning_rate": 1.859762233056504e-05, + "loss": 2.1906, + "step": 3553 + }, + { + "epoch": 0.72, + "learning_rate": 1.8596740061174912e-05, + "loss": 2.1325, + "step": 3554 + }, + { + "epoch": 0.72, + "learning_rate": 1.8595857535284692e-05, + "loss": 2.1266, + "step": 3555 + }, + { + "epoch": 0.72, + "learning_rate": 1.859497475292071e-05, + "loss": 2.1965, + "step": 3556 + }, + { + "epoch": 0.72, + "learning_rate": 1.8594091714109308e-05, + "loss": 2.1748, + "step": 3557 + }, + { + "epoch": 0.72, + "learning_rate": 1.859320841887683e-05, + "loss": 2.185, + "step": 3558 + }, + { + "epoch": 0.72, + "learning_rate": 1.8592324867249632e-05, + "loss": 2.2151, + "step": 3559 + }, + { + "epoch": 0.72, + "learning_rate": 1.8591441059254076e-05, + "loss": 2.0587, + "step": 3560 + }, + { + "epoch": 0.72, + "learning_rate": 1.8590556994916532e-05, + "loss": 2.2042, + "step": 3561 + }, + { + "epoch": 0.72, + "learning_rate": 1.8589672674263382e-05, + "loss": 2.1689, + "step": 3562 + }, + { + "epoch": 0.72, + "learning_rate": 1.858878809732101e-05, + "loss": 2.1768, + "step": 3563 + }, + { + "epoch": 0.72, + "learning_rate": 1.85879032641158e-05, + "loss": 2.118, + "step": 3564 + }, + { + "epoch": 0.72, + "learning_rate": 1.8587018174674165e-05, + "loss": 2.1457, + "step": 3565 + }, + { + "epoch": 0.72, + "learning_rate": 1.8586132829022505e-05, + "loss": 2.0738, + "step": 3566 + }, + { + "epoch": 0.72, + "learning_rate": 1.858524722718724e-05, + "loss": 2.1919, + "step": 3567 + }, + { + "epoch": 0.72, + "learning_rate": 1.858436136919479e-05, + "loss": 2.1536, + "step": 3568 + }, + { + "epoch": 0.72, + "learning_rate": 1.8583475255071596e-05, + "loss": 2.1958, + "step": 3569 + }, + { + "epoch": 0.72, + "learning_rate": 1.8582588884844086e-05, + "loss": 2.1634, + "step": 3570 + }, + { + "epoch": 0.72, + "learning_rate": 1.858170225853871e-05, + "loss": 2.1947, + "step": 3571 + }, + { + "epoch": 0.73, + "learning_rate": 1.858081537618192e-05, + "loss": 2.2236, + "step": 3572 + }, + { + "epoch": 0.73, + "learning_rate": 1.857992823780018e-05, + "loss": 2.1796, + "step": 3573 + }, + { + "epoch": 0.73, + "learning_rate": 1.8579040843419964e-05, + "loss": 2.103, + "step": 3574 + }, + { + "epoch": 0.73, + "learning_rate": 1.8578153193067746e-05, + "loss": 2.1632, + "step": 3575 + }, + { + "epoch": 0.73, + "learning_rate": 1.8577265286770004e-05, + "loss": 2.0635, + "step": 3576 + }, + { + "epoch": 0.73, + "learning_rate": 1.857637712455324e-05, + "loss": 2.1369, + "step": 3577 + }, + { + "epoch": 0.73, + "learning_rate": 1.857548870644395e-05, + "loss": 2.0997, + "step": 3578 + }, + { + "epoch": 0.73, + "learning_rate": 1.857460003246864e-05, + "loss": 2.107, + "step": 3579 + }, + { + "epoch": 0.73, + "learning_rate": 1.8573711102653825e-05, + "loss": 2.1523, + "step": 3580 + }, + { + "epoch": 0.73, + "learning_rate": 1.8572821917026034e-05, + "loss": 2.061, + "step": 3581 + }, + { + "epoch": 0.73, + "learning_rate": 1.857193247561179e-05, + "loss": 2.2028, + "step": 3582 + }, + { + "epoch": 0.73, + "learning_rate": 1.8571042778437635e-05, + "loss": 2.1403, + "step": 3583 + }, + { + "epoch": 0.73, + "learning_rate": 1.8570152825530114e-05, + "loss": 2.15, + "step": 3584 + }, + { + "epoch": 0.73, + "learning_rate": 1.8569262616915784e-05, + "loss": 2.1114, + "step": 3585 + }, + { + "epoch": 0.73, + "learning_rate": 1.85683721526212e-05, + "loss": 2.0565, + "step": 3586 + }, + { + "epoch": 0.73, + "learning_rate": 1.8567481432672933e-05, + "loss": 2.1534, + "step": 3587 + }, + { + "epoch": 0.73, + "learning_rate": 1.856659045709756e-05, + "loss": 2.1742, + "step": 3588 + }, + { + "epoch": 0.73, + "learning_rate": 1.856569922592167e-05, + "loss": 2.1381, + "step": 3589 + }, + { + "epoch": 0.73, + "learning_rate": 1.8564807739171844e-05, + "loss": 2.1268, + "step": 3590 + }, + { + "epoch": 0.73, + "learning_rate": 1.856391599687469e-05, + "loss": 2.1925, + "step": 3591 + }, + { + "epoch": 0.73, + "learning_rate": 1.856302399905681e-05, + "loss": 2.1094, + "step": 3592 + }, + { + "epoch": 0.73, + "learning_rate": 1.8562131745744816e-05, + "loss": 2.1675, + "step": 3593 + }, + { + "epoch": 0.73, + "learning_rate": 1.856123923696534e-05, + "loss": 2.137, + "step": 3594 + }, + { + "epoch": 0.73, + "learning_rate": 1.8560346472745e-05, + "loss": 2.1514, + "step": 3595 + }, + { + "epoch": 0.73, + "learning_rate": 1.855945345311044e-05, + "loss": 2.2069, + "step": 3596 + }, + { + "epoch": 0.73, + "learning_rate": 1.855856017808831e-05, + "loss": 2.1582, + "step": 3597 + }, + { + "epoch": 0.73, + "learning_rate": 1.855766664770525e-05, + "loss": 2.1562, + "step": 3598 + }, + { + "epoch": 0.73, + "learning_rate": 1.8556772861987932e-05, + "loss": 2.1544, + "step": 3599 + }, + { + "epoch": 0.73, + "learning_rate": 1.8555878820963014e-05, + "loss": 2.1204, + "step": 3600 + }, + { + "epoch": 0.73, + "learning_rate": 1.855498452465718e-05, + "loss": 2.1083, + "step": 3601 + }, + { + "epoch": 0.73, + "learning_rate": 1.8554089973097104e-05, + "loss": 2.2427, + "step": 3602 + }, + { + "epoch": 0.73, + "learning_rate": 1.8553195166309485e-05, + "loss": 2.1488, + "step": 3603 + }, + { + "epoch": 0.73, + "learning_rate": 1.8552300104321013e-05, + "loss": 2.13, + "step": 3604 + }, + { + "epoch": 0.73, + "learning_rate": 1.8551404787158405e-05, + "loss": 2.2065, + "step": 3605 + }, + { + "epoch": 0.73, + "learning_rate": 1.8550509214848365e-05, + "loss": 2.1065, + "step": 3606 + }, + { + "epoch": 0.73, + "learning_rate": 1.854961338741762e-05, + "loss": 2.156, + "step": 3607 + }, + { + "epoch": 0.73, + "learning_rate": 1.8548717304892888e-05, + "loss": 2.1147, + "step": 3608 + }, + { + "epoch": 0.73, + "learning_rate": 1.8547820967300923e-05, + "loss": 2.231, + "step": 3609 + }, + { + "epoch": 0.73, + "learning_rate": 1.8546924374668452e-05, + "loss": 2.1248, + "step": 3610 + }, + { + "epoch": 0.73, + "learning_rate": 1.854602752702224e-05, + "loss": 2.2193, + "step": 3611 + }, + { + "epoch": 0.73, + "learning_rate": 1.8545130424389035e-05, + "loss": 2.1058, + "step": 3612 + }, + { + "epoch": 0.73, + "learning_rate": 1.854423306679561e-05, + "loss": 2.1376, + "step": 3613 + }, + { + "epoch": 0.73, + "learning_rate": 1.854333545426874e-05, + "loss": 2.1179, + "step": 3614 + }, + { + "epoch": 0.73, + "learning_rate": 1.8542437586835202e-05, + "loss": 2.1392, + "step": 3615 + }, + { + "epoch": 0.73, + "learning_rate": 1.854153946452179e-05, + "loss": 2.1635, + "step": 3616 + }, + { + "epoch": 0.73, + "learning_rate": 1.8540641087355303e-05, + "loss": 2.0462, + "step": 3617 + }, + { + "epoch": 0.73, + "learning_rate": 1.8539742455362537e-05, + "loss": 2.1349, + "step": 3618 + }, + { + "epoch": 0.73, + "learning_rate": 1.8538843568570314e-05, + "loss": 2.0981, + "step": 3619 + }, + { + "epoch": 0.73, + "learning_rate": 1.853794442700545e-05, + "loss": 2.1811, + "step": 3620 + }, + { + "epoch": 0.74, + "learning_rate": 1.853704503069477e-05, + "loss": 2.1513, + "step": 3621 + }, + { + "epoch": 0.74, + "learning_rate": 1.8536145379665114e-05, + "loss": 2.1759, + "step": 3622 + }, + { + "epoch": 0.74, + "learning_rate": 1.853524547394332e-05, + "loss": 2.1769, + "step": 3623 + }, + { + "epoch": 0.74, + "learning_rate": 1.8534345313556245e-05, + "loss": 2.169, + "step": 3624 + }, + { + "epoch": 0.74, + "learning_rate": 1.8533444898530743e-05, + "loss": 2.1905, + "step": 3625 + }, + { + "epoch": 0.74, + "learning_rate": 1.8532544228893676e-05, + "loss": 2.2241, + "step": 3626 + }, + { + "epoch": 0.74, + "learning_rate": 1.8531643304671922e-05, + "loss": 2.262, + "step": 3627 + }, + { + "epoch": 0.74, + "learning_rate": 1.853074212589236e-05, + "loss": 2.1935, + "step": 3628 + }, + { + "epoch": 0.74, + "learning_rate": 1.852984069258188e-05, + "loss": 2.1806, + "step": 3629 + }, + { + "epoch": 0.74, + "learning_rate": 1.8528939004767377e-05, + "loss": 2.1359, + "step": 3630 + }, + { + "epoch": 0.74, + "learning_rate": 1.8528037062475755e-05, + "loss": 2.1882, + "step": 3631 + }, + { + "epoch": 0.74, + "learning_rate": 1.8527134865733924e-05, + "loss": 2.1573, + "step": 3632 + }, + { + "epoch": 0.74, + "learning_rate": 1.8526232414568804e-05, + "loss": 2.1826, + "step": 3633 + }, + { + "epoch": 0.74, + "learning_rate": 1.852532970900732e-05, + "loss": 2.211, + "step": 3634 + }, + { + "epoch": 0.74, + "learning_rate": 1.852442674907641e-05, + "loss": 2.1355, + "step": 3635 + }, + { + "epoch": 0.74, + "learning_rate": 1.8523523534803006e-05, + "loss": 2.1937, + "step": 3636 + }, + { + "epoch": 0.74, + "learning_rate": 1.852262006621407e-05, + "loss": 2.0739, + "step": 3637 + }, + { + "epoch": 0.74, + "learning_rate": 1.8521716343336548e-05, + "loss": 2.1852, + "step": 3638 + }, + { + "epoch": 0.74, + "learning_rate": 1.852081236619741e-05, + "loss": 2.187, + "step": 3639 + }, + { + "epoch": 0.74, + "learning_rate": 1.8519908134823624e-05, + "loss": 2.2268, + "step": 3640 + }, + { + "epoch": 0.74, + "learning_rate": 1.851900364924217e-05, + "loss": 2.1476, + "step": 3641 + }, + { + "epoch": 0.74, + "learning_rate": 1.851809890948004e-05, + "loss": 2.1953, + "step": 3642 + }, + { + "epoch": 0.74, + "learning_rate": 1.8517193915564225e-05, + "loss": 2.1631, + "step": 3643 + }, + { + "epoch": 0.74, + "learning_rate": 1.851628866752173e-05, + "loss": 2.0904, + "step": 3644 + }, + { + "epoch": 0.74, + "learning_rate": 1.851538316537956e-05, + "loss": 2.1765, + "step": 3645 + }, + { + "epoch": 0.74, + "learning_rate": 1.8514477409164734e-05, + "loss": 2.1409, + "step": 3646 + }, + { + "epoch": 0.74, + "learning_rate": 1.851357139890428e-05, + "loss": 2.0479, + "step": 3647 + }, + { + "epoch": 0.74, + "learning_rate": 1.851266513462523e-05, + "loss": 2.2394, + "step": 3648 + }, + { + "epoch": 0.74, + "learning_rate": 1.8511758616354616e-05, + "loss": 2.1677, + "step": 3649 + }, + { + "epoch": 0.74, + "learning_rate": 1.8510851844119495e-05, + "loss": 2.1486, + "step": 3650 + }, + { + "epoch": 0.74, + "learning_rate": 1.8509944817946917e-05, + "loss": 2.1082, + "step": 3651 + }, + { + "epoch": 0.74, + "learning_rate": 1.8509037537863953e-05, + "loss": 2.0732, + "step": 3652 + }, + { + "epoch": 0.74, + "learning_rate": 1.8508130003897665e-05, + "loss": 2.1568, + "step": 3653 + }, + { + "epoch": 0.74, + "learning_rate": 1.8507222216075135e-05, + "loss": 2.121, + "step": 3654 + }, + { + "epoch": 0.74, + "learning_rate": 1.8506314174423445e-05, + "loss": 2.1219, + "step": 3655 + }, + { + "epoch": 0.74, + "learning_rate": 1.8505405878969692e-05, + "loss": 2.2124, + "step": 3656 + }, + { + "epoch": 0.74, + "learning_rate": 1.8504497329740972e-05, + "loss": 2.1156, + "step": 3657 + }, + { + "epoch": 0.74, + "learning_rate": 1.8503588526764397e-05, + "loss": 2.1473, + "step": 3658 + }, + { + "epoch": 0.74, + "learning_rate": 1.8502679470067084e-05, + "loss": 2.1683, + "step": 3659 + }, + { + "epoch": 0.74, + "learning_rate": 1.8501770159676157e-05, + "loss": 2.135, + "step": 3660 + }, + { + "epoch": 0.74, + "learning_rate": 1.850086059561874e-05, + "loss": 2.2219, + "step": 3661 + }, + { + "epoch": 0.74, + "learning_rate": 1.849995077792198e-05, + "loss": 2.134, + "step": 3662 + }, + { + "epoch": 0.74, + "learning_rate": 1.8499040706613018e-05, + "loss": 2.1497, + "step": 3663 + }, + { + "epoch": 0.74, + "learning_rate": 1.849813038171901e-05, + "loss": 2.2486, + "step": 3664 + }, + { + "epoch": 0.74, + "learning_rate": 1.8497219803267112e-05, + "loss": 2.0707, + "step": 3665 + }, + { + "epoch": 0.74, + "learning_rate": 1.84963089712845e-05, + "loss": 2.1262, + "step": 3666 + }, + { + "epoch": 0.74, + "learning_rate": 1.849539788579835e-05, + "loss": 2.1728, + "step": 3667 + }, + { + "epoch": 0.74, + "learning_rate": 1.849448654683584e-05, + "loss": 2.1241, + "step": 3668 + }, + { + "epoch": 0.74, + "learning_rate": 1.849357495442417e-05, + "loss": 2.0514, + "step": 3669 + }, + { + "epoch": 0.74, + "learning_rate": 1.849266310859053e-05, + "loss": 2.06, + "step": 3670 + }, + { + "epoch": 0.75, + "learning_rate": 1.849175100936213e-05, + "loss": 2.2237, + "step": 3671 + }, + { + "epoch": 0.75, + "learning_rate": 1.849083865676619e-05, + "loss": 2.1935, + "step": 3672 + }, + { + "epoch": 0.75, + "learning_rate": 1.8489926050829924e-05, + "loss": 2.2224, + "step": 3673 + }, + { + "epoch": 0.75, + "learning_rate": 1.848901319158056e-05, + "loss": 2.1148, + "step": 3674 + }, + { + "epoch": 0.75, + "learning_rate": 1.848810007904535e-05, + "loss": 2.1508, + "step": 3675 + }, + { + "epoch": 0.75, + "learning_rate": 1.8487186713251516e-05, + "loss": 2.1885, + "step": 3676 + }, + { + "epoch": 0.75, + "learning_rate": 1.848627309422633e-05, + "loss": 2.1955, + "step": 3677 + }, + { + "epoch": 0.75, + "learning_rate": 1.8485359221997038e-05, + "loss": 2.1498, + "step": 3678 + }, + { + "epoch": 0.75, + "learning_rate": 1.8484445096590913e-05, + "loss": 2.182, + "step": 3679 + }, + { + "epoch": 0.75, + "learning_rate": 1.848353071803523e-05, + "loss": 2.1668, + "step": 3680 + }, + { + "epoch": 0.75, + "learning_rate": 1.848261608635727e-05, + "loss": 2.1532, + "step": 3681 + }, + { + "epoch": 0.75, + "learning_rate": 1.8481701201584323e-05, + "loss": 2.0871, + "step": 3682 + }, + { + "epoch": 0.75, + "learning_rate": 1.8480786063743684e-05, + "loss": 2.1765, + "step": 3683 + }, + { + "epoch": 0.75, + "learning_rate": 1.8479870672862662e-05, + "loss": 2.1406, + "step": 3684 + }, + { + "epoch": 0.75, + "learning_rate": 1.847895502896857e-05, + "loss": 2.1295, + "step": 3685 + }, + { + "epoch": 0.75, + "learning_rate": 1.847803913208872e-05, + "loss": 2.2026, + "step": 3686 + }, + { + "epoch": 0.75, + "learning_rate": 1.847712298225045e-05, + "loss": 2.0916, + "step": 3687 + }, + { + "epoch": 0.75, + "learning_rate": 1.847620657948109e-05, + "loss": 2.1411, + "step": 3688 + }, + { + "epoch": 0.75, + "learning_rate": 1.8475289923807982e-05, + "loss": 2.165, + "step": 3689 + }, + { + "epoch": 0.75, + "learning_rate": 1.8474373015258472e-05, + "loss": 2.1051, + "step": 3690 + }, + { + "epoch": 0.75, + "learning_rate": 1.847345585385993e-05, + "loss": 2.1132, + "step": 3691 + }, + { + "epoch": 0.75, + "learning_rate": 1.847253843963971e-05, + "loss": 2.1219, + "step": 3692 + }, + { + "epoch": 0.75, + "learning_rate": 1.8471620772625188e-05, + "loss": 2.2007, + "step": 3693 + }, + { + "epoch": 0.75, + "learning_rate": 1.8470702852843747e-05, + "loss": 2.1736, + "step": 3694 + }, + { + "epoch": 0.75, + "learning_rate": 1.8469784680322772e-05, + "loss": 2.1948, + "step": 3695 + }, + { + "epoch": 0.75, + "learning_rate": 1.846886625508966e-05, + "loss": 2.1419, + "step": 3696 + }, + { + "epoch": 0.75, + "learning_rate": 1.8467947577171813e-05, + "loss": 2.1569, + "step": 3697 + }, + { + "epoch": 0.75, + "learning_rate": 1.8467028646596642e-05, + "loss": 2.1268, + "step": 3698 + }, + { + "epoch": 0.75, + "learning_rate": 1.8466109463391563e-05, + "loss": 2.1503, + "step": 3699 + }, + { + "epoch": 0.75, + "learning_rate": 1.8465190027584007e-05, + "loss": 2.2009, + "step": 3700 + }, + { + "epoch": 0.75, + "learning_rate": 1.84642703392014e-05, + "loss": 2.0975, + "step": 3701 + }, + { + "epoch": 0.75, + "learning_rate": 1.8463350398271185e-05, + "loss": 2.2627, + "step": 3702 + }, + { + "epoch": 0.75, + "learning_rate": 1.8462430204820813e-05, + "loss": 2.1963, + "step": 3703 + }, + { + "epoch": 0.75, + "learning_rate": 1.846150975887774e-05, + "loss": 2.1289, + "step": 3704 + }, + { + "epoch": 0.75, + "learning_rate": 1.846058906046943e-05, + "loss": 2.1991, + "step": 3705 + }, + { + "epoch": 0.75, + "learning_rate": 1.8459668109623344e-05, + "loss": 2.0943, + "step": 3706 + }, + { + "epoch": 0.75, + "learning_rate": 1.845874690636697e-05, + "loss": 2.1042, + "step": 3707 + }, + { + "epoch": 0.75, + "learning_rate": 1.8457825450727793e-05, + "loss": 2.1809, + "step": 3708 + }, + { + "epoch": 0.75, + "learning_rate": 1.8456903742733306e-05, + "loss": 2.1719, + "step": 3709 + }, + { + "epoch": 0.75, + "learning_rate": 1.8455981782411007e-05, + "loss": 2.1578, + "step": 3710 + }, + { + "epoch": 0.75, + "learning_rate": 1.84550595697884e-05, + "loss": 2.1824, + "step": 3711 + }, + { + "epoch": 0.75, + "learning_rate": 1.8454137104893014e-05, + "loss": 2.0707, + "step": 3712 + }, + { + "epoch": 0.75, + "learning_rate": 1.8453214387752364e-05, + "loss": 2.1721, + "step": 3713 + }, + { + "epoch": 0.75, + "learning_rate": 1.8452291418393982e-05, + "loss": 2.1269, + "step": 3714 + }, + { + "epoch": 0.75, + "learning_rate": 1.845136819684541e-05, + "loss": 2.0686, + "step": 3715 + }, + { + "epoch": 0.75, + "learning_rate": 1.8450444723134188e-05, + "loss": 2.1937, + "step": 3716 + }, + { + "epoch": 0.75, + "learning_rate": 1.8449520997287873e-05, + "loss": 2.1772, + "step": 3717 + }, + { + "epoch": 0.75, + "learning_rate": 1.844859701933403e-05, + "loss": 2.1911, + "step": 3718 + }, + { + "epoch": 0.75, + "learning_rate": 1.8447672789300215e-05, + "loss": 2.1782, + "step": 3719 + }, + { + "epoch": 0.76, + "learning_rate": 1.844674830721402e-05, + "loss": 2.1603, + "step": 3720 + }, + { + "epoch": 0.76, + "learning_rate": 1.8445823573103016e-05, + "loss": 2.1356, + "step": 3721 + }, + { + "epoch": 0.76, + "learning_rate": 1.8444898586994803e-05, + "loss": 2.1283, + "step": 3722 + }, + { + "epoch": 0.76, + "learning_rate": 1.8443973348916975e-05, + "loss": 2.1457, + "step": 3723 + }, + { + "epoch": 0.76, + "learning_rate": 1.844304785889714e-05, + "loss": 2.1836, + "step": 3724 + }, + { + "epoch": 0.76, + "learning_rate": 1.844212211696291e-05, + "loss": 2.1827, + "step": 3725 + }, + { + "epoch": 0.76, + "learning_rate": 1.8441196123141912e-05, + "loss": 2.1215, + "step": 3726 + }, + { + "epoch": 0.76, + "learning_rate": 1.8440269877461764e-05, + "loss": 2.1816, + "step": 3727 + }, + { + "epoch": 0.76, + "learning_rate": 1.8439343379950112e-05, + "loss": 2.1152, + "step": 3728 + }, + { + "epoch": 0.76, + "learning_rate": 1.8438416630634596e-05, + "loss": 2.1498, + "step": 3729 + }, + { + "epoch": 0.76, + "learning_rate": 1.843748962954287e-05, + "loss": 2.1153, + "step": 3730 + }, + { + "epoch": 0.76, + "learning_rate": 1.8436562376702587e-05, + "loss": 2.1418, + "step": 3731 + }, + { + "epoch": 0.76, + "learning_rate": 1.843563487214142e-05, + "loss": 2.0879, + "step": 3732 + }, + { + "epoch": 0.76, + "learning_rate": 1.8434707115887037e-05, + "loss": 2.2022, + "step": 3733 + }, + { + "epoch": 0.76, + "learning_rate": 1.843377910796712e-05, + "loss": 2.2489, + "step": 3734 + }, + { + "epoch": 0.76, + "learning_rate": 1.8432850848409367e-05, + "loss": 2.1758, + "step": 3735 + }, + { + "epoch": 0.76, + "learning_rate": 1.843192233724146e-05, + "loss": 2.2118, + "step": 3736 + }, + { + "epoch": 0.76, + "learning_rate": 1.843099357449112e-05, + "loss": 2.0962, + "step": 3737 + }, + { + "epoch": 0.76, + "learning_rate": 1.843006456018604e-05, + "loss": 2.1753, + "step": 3738 + }, + { + "epoch": 0.76, + "learning_rate": 1.842913529435395e-05, + "loss": 2.1479, + "step": 3739 + }, + { + "epoch": 0.76, + "learning_rate": 1.8428205777022575e-05, + "loss": 2.0527, + "step": 3740 + }, + { + "epoch": 0.76, + "learning_rate": 1.8427276008219648e-05, + "loss": 2.1186, + "step": 3741 + }, + { + "epoch": 0.76, + "learning_rate": 1.842634598797291e-05, + "loss": 2.2471, + "step": 3742 + }, + { + "epoch": 0.76, + "learning_rate": 1.842541571631011e-05, + "loss": 2.0954, + "step": 3743 + }, + { + "epoch": 0.76, + "learning_rate": 1.842448519325901e-05, + "loss": 2.1208, + "step": 3744 + }, + { + "epoch": 0.76, + "learning_rate": 1.842355441884736e-05, + "loss": 2.1615, + "step": 3745 + }, + { + "epoch": 0.76, + "learning_rate": 1.8422623393102944e-05, + "loss": 2.1536, + "step": 3746 + }, + { + "epoch": 0.76, + "learning_rate": 1.8421692116053534e-05, + "loss": 2.1394, + "step": 3747 + }, + { + "epoch": 0.76, + "learning_rate": 1.8420760587726925e-05, + "loss": 2.165, + "step": 3748 + }, + { + "epoch": 0.76, + "learning_rate": 1.8419828808150902e-05, + "loss": 2.1369, + "step": 3749 + }, + { + "epoch": 0.76, + "learning_rate": 1.8418896777353272e-05, + "loss": 2.1557, + "step": 3750 + }, + { + "epoch": 0.76, + "learning_rate": 1.841796449536184e-05, + "loss": 2.115, + "step": 3751 + }, + { + "epoch": 0.76, + "learning_rate": 1.8417031962204425e-05, + "loss": 2.1009, + "step": 3752 + }, + { + "epoch": 0.76, + "learning_rate": 1.8416099177908847e-05, + "loss": 2.1153, + "step": 3753 + }, + { + "epoch": 0.76, + "learning_rate": 1.8415166142502946e-05, + "loss": 2.1447, + "step": 3754 + }, + { + "epoch": 0.76, + "learning_rate": 1.841423285601455e-05, + "loss": 2.1787, + "step": 3755 + }, + { + "epoch": 0.76, + "learning_rate": 1.8413299318471513e-05, + "loss": 2.0994, + "step": 3756 + }, + { + "epoch": 0.76, + "learning_rate": 1.841236552990169e-05, + "loss": 2.1748, + "step": 3757 + }, + { + "epoch": 0.76, + "learning_rate": 1.8411431490332932e-05, + "loss": 2.1902, + "step": 3758 + }, + { + "epoch": 0.76, + "learning_rate": 1.841049719979312e-05, + "loss": 2.1906, + "step": 3759 + }, + { + "epoch": 0.76, + "learning_rate": 1.840956265831012e-05, + "loss": 2.1263, + "step": 3760 + }, + { + "epoch": 0.76, + "learning_rate": 1.8408627865911827e-05, + "loss": 2.0902, + "step": 3761 + }, + { + "epoch": 0.76, + "learning_rate": 1.840769282262612e-05, + "loss": 2.1935, + "step": 3762 + }, + { + "epoch": 0.76, + "learning_rate": 1.840675752848091e-05, + "loss": 2.0779, + "step": 3763 + }, + { + "epoch": 0.76, + "learning_rate": 1.8405821983504094e-05, + "loss": 2.1249, + "step": 3764 + }, + { + "epoch": 0.76, + "learning_rate": 1.840488618772359e-05, + "loss": 2.1053, + "step": 3765 + }, + { + "epoch": 0.76, + "learning_rate": 1.8403950141167316e-05, + "loss": 2.1409, + "step": 3766 + }, + { + "epoch": 0.76, + "learning_rate": 1.8403013843863203e-05, + "loss": 2.157, + "step": 3767 + }, + { + "epoch": 0.76, + "learning_rate": 1.840207729583919e-05, + "loss": 2.1306, + "step": 3768 + }, + { + "epoch": 0.77, + "learning_rate": 1.840114049712322e-05, + "loss": 2.1073, + "step": 3769 + }, + { + "epoch": 0.77, + "learning_rate": 1.8400203447743237e-05, + "loss": 2.1349, + "step": 3770 + }, + { + "epoch": 0.77, + "learning_rate": 1.8399266147727207e-05, + "loss": 2.2099, + "step": 3771 + }, + { + "epoch": 0.77, + "learning_rate": 1.8398328597103092e-05, + "loss": 2.1714, + "step": 3772 + }, + { + "epoch": 0.77, + "learning_rate": 1.839739079589887e-05, + "loss": 2.1701, + "step": 3773 + }, + { + "epoch": 0.77, + "learning_rate": 1.8396452744142518e-05, + "loss": 2.0864, + "step": 3774 + }, + { + "epoch": 0.77, + "learning_rate": 1.8395514441862027e-05, + "loss": 2.1622, + "step": 3775 + }, + { + "epoch": 0.77, + "learning_rate": 1.8394575889085392e-05, + "loss": 2.0887, + "step": 3776 + }, + { + "epoch": 0.77, + "learning_rate": 1.839363708584062e-05, + "loss": 2.0866, + "step": 3777 + }, + { + "epoch": 0.77, + "learning_rate": 1.8392698032155714e-05, + "loss": 2.1431, + "step": 3778 + }, + { + "epoch": 0.77, + "learning_rate": 1.83917587280587e-05, + "loss": 2.1333, + "step": 3779 + }, + { + "epoch": 0.77, + "learning_rate": 1.83908191735776e-05, + "loss": 2.1204, + "step": 3780 + }, + { + "epoch": 0.77, + "learning_rate": 1.8389879368740446e-05, + "loss": 2.1025, + "step": 3781 + }, + { + "epoch": 0.77, + "learning_rate": 1.8388939313575283e-05, + "loss": 2.1612, + "step": 3782 + }, + { + "epoch": 0.77, + "learning_rate": 1.838799900811016e-05, + "loss": 2.2025, + "step": 3783 + }, + { + "epoch": 0.77, + "learning_rate": 1.838705845237313e-05, + "loss": 2.1775, + "step": 3784 + }, + { + "epoch": 0.77, + "learning_rate": 1.838611764639225e-05, + "loss": 2.1905, + "step": 3785 + }, + { + "epoch": 0.77, + "learning_rate": 1.8385176590195603e-05, + "loss": 2.1179, + "step": 3786 + }, + { + "epoch": 0.77, + "learning_rate": 1.8384235283811262e-05, + "loss": 2.1977, + "step": 3787 + }, + { + "epoch": 0.77, + "learning_rate": 1.838329372726731e-05, + "loss": 2.2088, + "step": 3788 + }, + { + "epoch": 0.77, + "learning_rate": 1.8382351920591848e-05, + "loss": 2.2169, + "step": 3789 + }, + { + "epoch": 0.77, + "learning_rate": 1.8381409863812966e-05, + "loss": 2.2209, + "step": 3790 + }, + { + "epoch": 0.77, + "learning_rate": 1.8380467556958777e-05, + "loss": 2.0963, + "step": 3791 + }, + { + "epoch": 0.77, + "learning_rate": 1.8379525000057396e-05, + "loss": 2.1109, + "step": 3792 + }, + { + "epoch": 0.77, + "learning_rate": 1.837858219313695e-05, + "loss": 2.2223, + "step": 3793 + }, + { + "epoch": 0.77, + "learning_rate": 1.8377639136225563e-05, + "loss": 2.0296, + "step": 3794 + }, + { + "epoch": 0.77, + "learning_rate": 1.8376695829351378e-05, + "loss": 2.1276, + "step": 3795 + }, + { + "epoch": 0.77, + "learning_rate": 1.8375752272542537e-05, + "loss": 2.1833, + "step": 3796 + }, + { + "epoch": 0.77, + "learning_rate": 1.8374808465827193e-05, + "loss": 2.1706, + "step": 3797 + }, + { + "epoch": 0.77, + "learning_rate": 1.837386440923351e-05, + "loss": 2.1549, + "step": 3798 + }, + { + "epoch": 0.77, + "learning_rate": 1.837292010278965e-05, + "loss": 2.1418, + "step": 3799 + }, + { + "epoch": 0.77, + "learning_rate": 1.8371975546523795e-05, + "loss": 2.1615, + "step": 3800 + }, + { + "epoch": 0.77, + "learning_rate": 1.8371030740464123e-05, + "loss": 2.0767, + "step": 3801 + }, + { + "epoch": 0.77, + "learning_rate": 1.8370085684638823e-05, + "loss": 2.1403, + "step": 3802 + }, + { + "epoch": 0.77, + "learning_rate": 1.8369140379076097e-05, + "loss": 2.1566, + "step": 3803 + }, + { + "epoch": 0.77, + "learning_rate": 1.8368194823804144e-05, + "loss": 2.1406, + "step": 3804 + }, + { + "epoch": 0.77, + "learning_rate": 1.8367249018851182e-05, + "loss": 2.1484, + "step": 3805 + }, + { + "epoch": 0.77, + "learning_rate": 1.8366302964245427e-05, + "loss": 2.1771, + "step": 3806 + }, + { + "epoch": 0.77, + "learning_rate": 1.836535666001511e-05, + "loss": 2.1494, + "step": 3807 + }, + { + "epoch": 0.77, + "learning_rate": 1.8364410106188466e-05, + "loss": 2.1737, + "step": 3808 + }, + { + "epoch": 0.77, + "learning_rate": 1.8363463302793733e-05, + "loss": 2.1796, + "step": 3809 + }, + { + "epoch": 0.77, + "learning_rate": 1.8362516249859164e-05, + "loss": 2.1662, + "step": 3810 + }, + { + "epoch": 0.77, + "learning_rate": 1.8361568947413012e-05, + "loss": 2.182, + "step": 3811 + }, + { + "epoch": 0.77, + "learning_rate": 1.8360621395483548e-05, + "loss": 2.1438, + "step": 3812 + }, + { + "epoch": 0.77, + "learning_rate": 1.835967359409904e-05, + "loss": 2.1087, + "step": 3813 + }, + { + "epoch": 0.77, + "learning_rate": 1.8358725543287765e-05, + "loss": 2.1431, + "step": 3814 + }, + { + "epoch": 0.77, + "learning_rate": 1.8357777243078015e-05, + "loss": 2.1109, + "step": 3815 + }, + { + "epoch": 0.77, + "learning_rate": 1.835682869349808e-05, + "loss": 2.1829, + "step": 3816 + }, + { + "epoch": 0.77, + "learning_rate": 1.835587989457627e-05, + "loss": 2.1725, + "step": 3817 + }, + { + "epoch": 0.77, + "learning_rate": 1.8354930846340882e-05, + "loss": 2.1463, + "step": 3818 + }, + { + "epoch": 0.78, + "learning_rate": 1.835398154882024e-05, + "loss": 2.1605, + "step": 3819 + }, + { + "epoch": 0.78, + "learning_rate": 1.8353032002042674e-05, + "loss": 2.138, + "step": 3820 + }, + { + "epoch": 0.78, + "learning_rate": 1.83520822060365e-05, + "loss": 2.1674, + "step": 3821 + }, + { + "epoch": 0.78, + "learning_rate": 1.835113216083007e-05, + "loss": 2.2076, + "step": 3822 + }, + { + "epoch": 0.78, + "learning_rate": 1.835018186645172e-05, + "loss": 2.1012, + "step": 3823 + }, + { + "epoch": 0.78, + "learning_rate": 1.834923132292982e-05, + "loss": 2.1462, + "step": 3824 + }, + { + "epoch": 0.78, + "learning_rate": 1.8348280530292712e-05, + "loss": 2.1683, + "step": 3825 + }, + { + "epoch": 0.78, + "learning_rate": 1.834732948856878e-05, + "loss": 2.0971, + "step": 3826 + }, + { + "epoch": 0.78, + "learning_rate": 1.834637819778639e-05, + "loss": 2.2096, + "step": 3827 + }, + { + "epoch": 0.78, + "learning_rate": 1.834542665797393e-05, + "loss": 2.0668, + "step": 3828 + }, + { + "epoch": 0.78, + "learning_rate": 1.834447486915979e-05, + "loss": 2.1701, + "step": 3829 + }, + { + "epoch": 0.78, + "learning_rate": 1.834352283137237e-05, + "loss": 2.2077, + "step": 3830 + }, + { + "epoch": 0.78, + "learning_rate": 1.834257054464008e-05, + "loss": 2.0968, + "step": 3831 + }, + { + "epoch": 0.78, + "learning_rate": 1.834161800899132e-05, + "loss": 2.0847, + "step": 3832 + }, + { + "epoch": 0.78, + "learning_rate": 1.834066522445452e-05, + "loss": 2.2522, + "step": 3833 + }, + { + "epoch": 0.78, + "learning_rate": 1.833971219105811e-05, + "loss": 2.173, + "step": 3834 + }, + { + "epoch": 0.78, + "learning_rate": 1.833875890883052e-05, + "loss": 2.1876, + "step": 3835 + }, + { + "epoch": 0.78, + "learning_rate": 1.8337805377800197e-05, + "loss": 2.156, + "step": 3836 + }, + { + "epoch": 0.78, + "learning_rate": 1.8336851597995587e-05, + "loss": 2.1472, + "step": 3837 + }, + { + "epoch": 0.78, + "learning_rate": 1.8335897569445157e-05, + "loss": 2.1055, + "step": 3838 + }, + { + "epoch": 0.78, + "learning_rate": 1.8334943292177358e-05, + "loss": 2.1809, + "step": 3839 + }, + { + "epoch": 0.78, + "learning_rate": 1.8333988766220676e-05, + "loss": 2.1349, + "step": 3840 + }, + { + "epoch": 0.78, + "learning_rate": 1.8333033991603584e-05, + "loss": 2.0582, + "step": 3841 + }, + { + "epoch": 0.78, + "learning_rate": 1.833207896835457e-05, + "loss": 2.1711, + "step": 3842 + }, + { + "epoch": 0.78, + "learning_rate": 1.8331123696502132e-05, + "loss": 2.0745, + "step": 3843 + }, + { + "epoch": 0.78, + "learning_rate": 1.833016817607477e-05, + "loss": 2.1791, + "step": 3844 + }, + { + "epoch": 0.78, + "learning_rate": 1.8329212407100996e-05, + "loss": 2.2015, + "step": 3845 + }, + { + "epoch": 0.78, + "learning_rate": 1.8328256389609326e-05, + "loss": 2.1398, + "step": 3846 + }, + { + "epoch": 0.78, + "learning_rate": 1.832730012362828e-05, + "loss": 2.1605, + "step": 3847 + }, + { + "epoch": 0.78, + "learning_rate": 1.8326343609186396e-05, + "loss": 2.1878, + "step": 3848 + }, + { + "epoch": 0.78, + "learning_rate": 1.832538684631221e-05, + "loss": 2.2023, + "step": 3849 + }, + { + "epoch": 0.78, + "learning_rate": 1.8324429835034278e-05, + "loss": 2.0682, + "step": 3850 + }, + { + "epoch": 0.78, + "learning_rate": 1.8323472575381138e-05, + "loss": 2.1538, + "step": 3851 + }, + { + "epoch": 0.78, + "learning_rate": 1.8322515067381363e-05, + "loss": 2.1355, + "step": 3852 + }, + { + "epoch": 0.78, + "learning_rate": 1.832155731106352e-05, + "loss": 2.1797, + "step": 3853 + }, + { + "epoch": 0.78, + "learning_rate": 1.8320599306456184e-05, + "loss": 2.1531, + "step": 3854 + }, + { + "epoch": 0.78, + "learning_rate": 1.831964105358794e-05, + "loss": 2.0962, + "step": 3855 + }, + { + "epoch": 0.78, + "learning_rate": 1.831868255248738e-05, + "loss": 2.1873, + "step": 3856 + }, + { + "epoch": 0.78, + "learning_rate": 1.8317723803183097e-05, + "loss": 2.1902, + "step": 3857 + }, + { + "epoch": 0.78, + "learning_rate": 1.8316764805703708e-05, + "loss": 2.1647, + "step": 3858 + }, + { + "epoch": 0.78, + "learning_rate": 1.831580556007782e-05, + "loss": 2.1994, + "step": 3859 + }, + { + "epoch": 0.78, + "learning_rate": 1.8314846066334052e-05, + "loss": 2.1127, + "step": 3860 + }, + { + "epoch": 0.78, + "learning_rate": 1.8313886324501032e-05, + "loss": 2.1018, + "step": 3861 + }, + { + "epoch": 0.78, + "learning_rate": 1.8312926334607403e-05, + "loss": 2.1202, + "step": 3862 + }, + { + "epoch": 0.78, + "learning_rate": 1.8311966096681805e-05, + "loss": 2.1474, + "step": 3863 + }, + { + "epoch": 0.78, + "learning_rate": 1.8311005610752878e-05, + "loss": 2.1394, + "step": 3864 + }, + { + "epoch": 0.78, + "learning_rate": 1.8310044876849296e-05, + "loss": 2.1292, + "step": 3865 + }, + { + "epoch": 0.78, + "learning_rate": 1.8309083894999716e-05, + "loss": 2.1507, + "step": 3866 + }, + { + "epoch": 0.78, + "learning_rate": 1.8308122665232814e-05, + "loss": 2.0618, + "step": 3867 + }, + { + "epoch": 0.79, + "learning_rate": 1.8307161187577265e-05, + "loss": 2.1038, + "step": 3868 + }, + { + "epoch": 0.79, + "learning_rate": 1.830619946206176e-05, + "loss": 2.1808, + "step": 3869 + }, + { + "epoch": 0.79, + "learning_rate": 1.8305237488714995e-05, + "loss": 2.1591, + "step": 3870 + }, + { + "epoch": 0.79, + "learning_rate": 1.830427526756567e-05, + "loss": 2.1222, + "step": 3871 + }, + { + "epoch": 0.79, + "learning_rate": 1.8303312798642495e-05, + "loss": 2.165, + "step": 3872 + }, + { + "epoch": 0.79, + "learning_rate": 1.8302350081974184e-05, + "loss": 2.1645, + "step": 3873 + }, + { + "epoch": 0.79, + "learning_rate": 1.8301387117589473e-05, + "loss": 2.2035, + "step": 3874 + }, + { + "epoch": 0.79, + "learning_rate": 1.830042390551708e-05, + "loss": 2.0891, + "step": 3875 + }, + { + "epoch": 0.79, + "learning_rate": 1.8299460445785753e-05, + "loss": 2.1422, + "step": 3876 + }, + { + "epoch": 0.79, + "learning_rate": 1.829849673842423e-05, + "loss": 2.116, + "step": 3877 + }, + { + "epoch": 0.79, + "learning_rate": 1.8297532783461278e-05, + "loss": 2.1376, + "step": 3878 + }, + { + "epoch": 0.79, + "learning_rate": 1.829656858092565e-05, + "loss": 2.1633, + "step": 3879 + }, + { + "epoch": 0.79, + "learning_rate": 1.8295604130846115e-05, + "loss": 2.1706, + "step": 3880 + }, + { + "epoch": 0.79, + "learning_rate": 1.829463943325145e-05, + "loss": 2.1151, + "step": 3881 + }, + { + "epoch": 0.79, + "learning_rate": 1.829367448817044e-05, + "loss": 2.1593, + "step": 3882 + }, + { + "epoch": 0.79, + "learning_rate": 1.8292709295631873e-05, + "loss": 2.1493, + "step": 3883 + }, + { + "epoch": 0.79, + "learning_rate": 1.8291743855664556e-05, + "loss": 2.1891, + "step": 3884 + }, + { + "epoch": 0.79, + "learning_rate": 1.829077816829728e-05, + "loss": 2.1931, + "step": 3885 + }, + { + "epoch": 0.79, + "learning_rate": 1.8289812233558868e-05, + "loss": 2.1319, + "step": 3886 + }, + { + "epoch": 0.79, + "learning_rate": 1.828884605147814e-05, + "loss": 2.1297, + "step": 3887 + }, + { + "epoch": 0.79, + "learning_rate": 1.828787962208392e-05, + "loss": 2.1271, + "step": 3888 + }, + { + "epoch": 0.79, + "learning_rate": 1.828691294540505e-05, + "loss": 2.2075, + "step": 3889 + }, + { + "epoch": 0.79, + "learning_rate": 1.8285946021470364e-05, + "loss": 2.2115, + "step": 3890 + }, + { + "epoch": 0.79, + "learning_rate": 1.828497885030872e-05, + "loss": 2.0157, + "step": 3891 + }, + { + "epoch": 0.79, + "learning_rate": 1.8284011431948968e-05, + "loss": 2.2382, + "step": 3892 + }, + { + "epoch": 0.79, + "learning_rate": 1.828304376641998e-05, + "loss": 2.1399, + "step": 3893 + }, + { + "epoch": 0.79, + "learning_rate": 1.8282075853750624e-05, + "loss": 2.194, + "step": 3894 + }, + { + "epoch": 0.79, + "learning_rate": 1.828110769396978e-05, + "loss": 2.1487, + "step": 3895 + }, + { + "epoch": 0.79, + "learning_rate": 1.8280139287106332e-05, + "loss": 2.1845, + "step": 3896 + }, + { + "epoch": 0.79, + "learning_rate": 1.8279170633189184e-05, + "loss": 2.1734, + "step": 3897 + }, + { + "epoch": 0.79, + "learning_rate": 1.8278201732247227e-05, + "loss": 2.129, + "step": 3898 + }, + { + "epoch": 0.79, + "learning_rate": 1.8277232584309374e-05, + "loss": 2.2235, + "step": 3899 + }, + { + "epoch": 0.79, + "learning_rate": 1.827626318940454e-05, + "loss": 2.1248, + "step": 3900 + }, + { + "epoch": 0.79, + "learning_rate": 1.8275293547561652e-05, + "loss": 2.1549, + "step": 3901 + }, + { + "epoch": 0.79, + "learning_rate": 1.8274323658809638e-05, + "loss": 2.1111, + "step": 3902 + }, + { + "epoch": 0.79, + "learning_rate": 1.827335352317744e-05, + "loss": 2.1923, + "step": 3903 + }, + { + "epoch": 0.79, + "learning_rate": 1.8272383140694e-05, + "loss": 2.1334, + "step": 3904 + }, + { + "epoch": 0.79, + "learning_rate": 1.8271412511388273e-05, + "loss": 2.2071, + "step": 3905 + }, + { + "epoch": 0.79, + "learning_rate": 1.827044163528922e-05, + "loss": 2.1705, + "step": 3906 + }, + { + "epoch": 0.79, + "learning_rate": 1.826947051242581e-05, + "loss": 2.1594, + "step": 3907 + }, + { + "epoch": 0.79, + "learning_rate": 1.8268499142827014e-05, + "loss": 2.1417, + "step": 3908 + }, + { + "epoch": 0.79, + "learning_rate": 1.8267527526521814e-05, + "loss": 2.1327, + "step": 3909 + }, + { + "epoch": 0.79, + "learning_rate": 1.826655566353921e-05, + "loss": 2.0906, + "step": 3910 + }, + { + "epoch": 0.79, + "learning_rate": 1.8265583553908187e-05, + "loss": 2.1209, + "step": 3911 + }, + { + "epoch": 0.79, + "learning_rate": 1.826461119765776e-05, + "loss": 2.1745, + "step": 3912 + }, + { + "epoch": 0.79, + "learning_rate": 1.8263638594816934e-05, + "loss": 2.1541, + "step": 3913 + }, + { + "epoch": 0.79, + "learning_rate": 1.8262665745414728e-05, + "loss": 2.1188, + "step": 3914 + }, + { + "epoch": 0.79, + "learning_rate": 1.8261692649480174e-05, + "loss": 2.0805, + "step": 3915 + }, + { + "epoch": 0.79, + "learning_rate": 1.826071930704231e-05, + "loss": 2.1303, + "step": 3916 + }, + { + "epoch": 0.8, + "learning_rate": 1.8259745718130163e-05, + "loss": 2.104, + "step": 3917 + }, + { + "epoch": 0.8, + "learning_rate": 1.8258771882772794e-05, + "loss": 2.1479, + "step": 3918 + }, + { + "epoch": 0.8, + "learning_rate": 1.8257797800999256e-05, + "loss": 2.1627, + "step": 3919 + }, + { + "epoch": 0.8, + "learning_rate": 1.825682347283861e-05, + "loss": 2.1828, + "step": 3920 + }, + { + "epoch": 0.8, + "learning_rate": 1.8255848898319932e-05, + "loss": 2.1671, + "step": 3921 + }, + { + "epoch": 0.8, + "learning_rate": 1.8254874077472292e-05, + "loss": 2.1117, + "step": 3922 + }, + { + "epoch": 0.8, + "learning_rate": 1.8253899010324785e-05, + "loss": 2.0898, + "step": 3923 + }, + { + "epoch": 0.8, + "learning_rate": 1.82529236969065e-05, + "loss": 2.1533, + "step": 3924 + }, + { + "epoch": 0.8, + "learning_rate": 1.825194813724654e-05, + "loss": 2.1773, + "step": 3925 + }, + { + "epoch": 0.8, + "learning_rate": 1.8250972331374006e-05, + "loss": 2.1299, + "step": 3926 + }, + { + "epoch": 0.8, + "learning_rate": 1.8249996279318017e-05, + "loss": 2.1771, + "step": 3927 + }, + { + "epoch": 0.8, + "learning_rate": 1.8249019981107695e-05, + "loss": 2.136, + "step": 3928 + }, + { + "epoch": 0.8, + "learning_rate": 1.8248043436772173e-05, + "loss": 2.2648, + "step": 3929 + }, + { + "epoch": 0.8, + "learning_rate": 1.824706664634058e-05, + "loss": 2.2047, + "step": 3930 + }, + { + "epoch": 0.8, + "learning_rate": 1.824608960984207e-05, + "loss": 2.2402, + "step": 3931 + }, + { + "epoch": 0.8, + "learning_rate": 1.824511232730579e-05, + "loss": 2.1321, + "step": 3932 + }, + { + "epoch": 0.8, + "learning_rate": 1.82441347987609e-05, + "loss": 2.206, + "step": 3933 + }, + { + "epoch": 0.8, + "learning_rate": 1.824315702423656e-05, + "loss": 2.1544, + "step": 3934 + }, + { + "epoch": 0.8, + "learning_rate": 1.8242179003761957e-05, + "loss": 2.1749, + "step": 3935 + }, + { + "epoch": 0.8, + "learning_rate": 1.824120073736626e-05, + "loss": 2.1733, + "step": 3936 + }, + { + "epoch": 0.8, + "learning_rate": 1.8240222225078662e-05, + "loss": 2.1535, + "step": 3937 + }, + { + "epoch": 0.8, + "learning_rate": 1.8239243466928363e-05, + "loss": 2.124, + "step": 3938 + }, + { + "epoch": 0.8, + "learning_rate": 1.8238264462944556e-05, + "loss": 2.0928, + "step": 3939 + }, + { + "epoch": 0.8, + "learning_rate": 1.8237285213156463e-05, + "loss": 2.1181, + "step": 3940 + }, + { + "epoch": 0.8, + "learning_rate": 1.8236305717593293e-05, + "loss": 2.1986, + "step": 3941 + }, + { + "epoch": 0.8, + "learning_rate": 1.8235325976284276e-05, + "loss": 2.0914, + "step": 3942 + }, + { + "epoch": 0.8, + "learning_rate": 1.823434598925864e-05, + "loss": 2.1673, + "step": 3943 + }, + { + "epoch": 0.8, + "learning_rate": 1.8233365756545632e-05, + "loss": 2.1809, + "step": 3944 + }, + { + "epoch": 0.8, + "learning_rate": 1.823238527817449e-05, + "loss": 2.1656, + "step": 3945 + }, + { + "epoch": 0.8, + "learning_rate": 1.8231404554174478e-05, + "loss": 2.1828, + "step": 3946 + }, + { + "epoch": 0.8, + "learning_rate": 1.823042358457485e-05, + "loss": 2.1822, + "step": 3947 + }, + { + "epoch": 0.8, + "learning_rate": 1.822944236940488e-05, + "loss": 2.0542, + "step": 3948 + }, + { + "epoch": 0.8, + "learning_rate": 1.8228460908693836e-05, + "loss": 2.2153, + "step": 3949 + }, + { + "epoch": 0.8, + "learning_rate": 1.8227479202471016e-05, + "loss": 2.1405, + "step": 3950 + }, + { + "epoch": 0.8, + "learning_rate": 1.8226497250765697e-05, + "loss": 2.119, + "step": 3951 + }, + { + "epoch": 0.8, + "learning_rate": 1.8225515053607185e-05, + "loss": 2.1422, + "step": 3952 + }, + { + "epoch": 0.8, + "learning_rate": 1.8224532611024786e-05, + "loss": 2.2122, + "step": 3953 + }, + { + "epoch": 0.8, + "learning_rate": 1.822354992304781e-05, + "loss": 2.1043, + "step": 3954 + }, + { + "epoch": 0.8, + "learning_rate": 1.8222566989705582e-05, + "loss": 2.158, + "step": 3955 + }, + { + "epoch": 0.8, + "learning_rate": 1.8221583811027422e-05, + "loss": 2.1567, + "step": 3956 + }, + { + "epoch": 0.8, + "learning_rate": 1.8220600387042672e-05, + "loss": 2.1072, + "step": 3957 + }, + { + "epoch": 0.8, + "learning_rate": 1.821961671778067e-05, + "loss": 2.1555, + "step": 3958 + }, + { + "epoch": 0.8, + "learning_rate": 1.821863280327077e-05, + "loss": 2.1753, + "step": 3959 + }, + { + "epoch": 0.8, + "learning_rate": 1.8217648643542326e-05, + "loss": 2.1368, + "step": 3960 + }, + { + "epoch": 0.8, + "learning_rate": 1.82166642386247e-05, + "loss": 2.1365, + "step": 3961 + }, + { + "epoch": 0.8, + "learning_rate": 1.821567958854727e-05, + "loss": 2.1873, + "step": 3962 + }, + { + "epoch": 0.8, + "learning_rate": 1.821469469333941e-05, + "loss": 2.1596, + "step": 3963 + }, + { + "epoch": 0.8, + "learning_rate": 1.82137095530305e-05, + "loss": 2.1787, + "step": 3964 + }, + { + "epoch": 0.8, + "learning_rate": 1.821272416764995e-05, + "loss": 2.236, + "step": 3965 + }, + { + "epoch": 0.81, + "learning_rate": 1.8211738537227148e-05, + "loss": 2.1525, + "step": 3966 + }, + { + "epoch": 0.81, + "learning_rate": 1.8210752661791504e-05, + "loss": 2.0817, + "step": 3967 + }, + { + "epoch": 0.81, + "learning_rate": 1.820976654137244e-05, + "loss": 2.1759, + "step": 3968 + }, + { + "epoch": 0.81, + "learning_rate": 1.820878017599937e-05, + "loss": 2.1811, + "step": 3969 + }, + { + "epoch": 0.81, + "learning_rate": 1.820779356570173e-05, + "loss": 2.02, + "step": 3970 + }, + { + "epoch": 0.81, + "learning_rate": 1.820680671050896e-05, + "loss": 2.1718, + "step": 3971 + }, + { + "epoch": 0.81, + "learning_rate": 1.8205819610450492e-05, + "loss": 2.1712, + "step": 3972 + }, + { + "epoch": 0.81, + "learning_rate": 1.8204832265555793e-05, + "loss": 2.0955, + "step": 3973 + }, + { + "epoch": 0.81, + "learning_rate": 1.820384467585431e-05, + "loss": 2.219, + "step": 3974 + }, + { + "epoch": 0.81, + "learning_rate": 1.8202856841375517e-05, + "loss": 2.0866, + "step": 3975 + }, + { + "epoch": 0.81, + "learning_rate": 1.8201868762148887e-05, + "loss": 2.1735, + "step": 3976 + }, + { + "epoch": 0.81, + "learning_rate": 1.8200880438203902e-05, + "loss": 2.1507, + "step": 3977 + }, + { + "epoch": 0.81, + "learning_rate": 1.819989186957005e-05, + "loss": 2.2394, + "step": 3978 + }, + { + "epoch": 0.81, + "learning_rate": 1.8198903056276823e-05, + "loss": 2.0694, + "step": 3979 + }, + { + "epoch": 0.81, + "learning_rate": 1.8197913998353726e-05, + "loss": 2.1283, + "step": 3980 + }, + { + "epoch": 0.81, + "learning_rate": 1.8196924695830272e-05, + "loss": 2.0544, + "step": 3981 + }, + { + "epoch": 0.81, + "learning_rate": 1.8195935148735976e-05, + "loss": 2.1623, + "step": 3982 + }, + { + "epoch": 0.81, + "learning_rate": 1.8194945357100367e-05, + "loss": 2.0487, + "step": 3983 + }, + { + "epoch": 0.81, + "learning_rate": 1.819395532095297e-05, + "loss": 2.0525, + "step": 3984 + }, + { + "epoch": 0.81, + "learning_rate": 1.819296504032333e-05, + "loss": 2.1772, + "step": 3985 + }, + { + "epoch": 0.81, + "learning_rate": 1.8191974515240994e-05, + "loss": 2.087, + "step": 3986 + }, + { + "epoch": 0.81, + "learning_rate": 1.8190983745735514e-05, + "loss": 2.1341, + "step": 3987 + }, + { + "epoch": 0.81, + "learning_rate": 1.8189992731836455e-05, + "loss": 2.1449, + "step": 3988 + }, + { + "epoch": 0.81, + "learning_rate": 1.818900147357338e-05, + "loss": 2.159, + "step": 3989 + }, + { + "epoch": 0.81, + "learning_rate": 1.8188009970975874e-05, + "loss": 2.2389, + "step": 3990 + }, + { + "epoch": 0.81, + "learning_rate": 1.8187018224073506e-05, + "loss": 2.2367, + "step": 3991 + }, + { + "epoch": 0.81, + "learning_rate": 1.8186026232895883e-05, + "loss": 2.1984, + "step": 3992 + }, + { + "epoch": 0.81, + "learning_rate": 1.8185033997472592e-05, + "loss": 2.1965, + "step": 3993 + }, + { + "epoch": 0.81, + "learning_rate": 1.8184041517833243e-05, + "loss": 2.1065, + "step": 3994 + }, + { + "epoch": 0.81, + "learning_rate": 1.8183048794007445e-05, + "loss": 2.1382, + "step": 3995 + }, + { + "epoch": 0.81, + "learning_rate": 1.8182055826024823e-05, + "loss": 2.178, + "step": 3996 + }, + { + "epoch": 0.81, + "learning_rate": 1.8181062613915e-05, + "loss": 2.2202, + "step": 3997 + }, + { + "epoch": 0.81, + "learning_rate": 1.818006915770761e-05, + "loss": 2.105, + "step": 3998 + }, + { + "epoch": 0.81, + "learning_rate": 1.8179075457432295e-05, + "loss": 2.16, + "step": 3999 + }, + { + "epoch": 0.81, + "learning_rate": 1.8178081513118705e-05, + "loss": 2.1494, + "step": 4000 + }, + { + "epoch": 0.81, + "learning_rate": 1.81770873247965e-05, + "loss": 2.2093, + "step": 4001 + }, + { + "epoch": 0.81, + "learning_rate": 1.817609289249534e-05, + "loss": 2.2426, + "step": 4002 + }, + { + "epoch": 0.81, + "learning_rate": 1.8175098216244892e-05, + "loss": 2.1513, + "step": 4003 + }, + { + "epoch": 0.81, + "learning_rate": 1.8174103296074838e-05, + "loss": 2.1187, + "step": 4004 + }, + { + "epoch": 0.81, + "learning_rate": 1.817310813201486e-05, + "loss": 2.1708, + "step": 4005 + }, + { + "epoch": 0.81, + "learning_rate": 1.817211272409466e-05, + "loss": 2.1659, + "step": 4006 + }, + { + "epoch": 0.81, + "learning_rate": 1.8171117072343928e-05, + "loss": 2.075, + "step": 4007 + }, + { + "epoch": 0.81, + "learning_rate": 1.8170121176792376e-05, + "loss": 2.0979, + "step": 4008 + }, + { + "epoch": 0.81, + "learning_rate": 1.8169125037469716e-05, + "loss": 2.1784, + "step": 4009 + }, + { + "epoch": 0.81, + "learning_rate": 1.816812865440567e-05, + "loss": 2.1232, + "step": 4010 + }, + { + "epoch": 0.81, + "learning_rate": 1.816713202762997e-05, + "loss": 2.2495, + "step": 4011 + }, + { + "epoch": 0.81, + "learning_rate": 1.8166135157172345e-05, + "loss": 2.0627, + "step": 4012 + }, + { + "epoch": 0.81, + "learning_rate": 1.816513804306255e-05, + "loss": 2.1987, + "step": 4013 + }, + { + "epoch": 0.81, + "learning_rate": 1.8164140685330326e-05, + "loss": 2.1434, + "step": 4014 + }, + { + "epoch": 0.81, + "learning_rate": 1.816314308400543e-05, + "loss": 2.1848, + "step": 4015 + }, + { + "epoch": 0.82, + "learning_rate": 1.8162145239117638e-05, + "loss": 2.2142, + "step": 4016 + }, + { + "epoch": 0.82, + "learning_rate": 1.8161147150696716e-05, + "loss": 2.0984, + "step": 4017 + }, + { + "epoch": 0.82, + "learning_rate": 1.8160148818772442e-05, + "loss": 2.1439, + "step": 4018 + }, + { + "epoch": 0.82, + "learning_rate": 1.8159150243374606e-05, + "loss": 2.25, + "step": 4019 + }, + { + "epoch": 0.82, + "learning_rate": 1.8158151424533002e-05, + "loss": 2.171, + "step": 4020 + }, + { + "epoch": 0.82, + "learning_rate": 1.815715236227743e-05, + "loss": 2.2001, + "step": 4021 + }, + { + "epoch": 0.82, + "learning_rate": 1.81561530566377e-05, + "loss": 2.1416, + "step": 4022 + }, + { + "epoch": 0.82, + "learning_rate": 1.815515350764363e-05, + "loss": 2.1602, + "step": 4023 + }, + { + "epoch": 0.82, + "learning_rate": 1.815415371532504e-05, + "loss": 2.1468, + "step": 4024 + }, + { + "epoch": 0.82, + "learning_rate": 1.8153153679711762e-05, + "loss": 2.1251, + "step": 4025 + }, + { + "epoch": 0.82, + "learning_rate": 1.815215340083364e-05, + "loss": 2.0808, + "step": 4026 + }, + { + "epoch": 0.82, + "learning_rate": 1.815115287872051e-05, + "loss": 2.2095, + "step": 4027 + }, + { + "epoch": 0.82, + "learning_rate": 1.8150152113402228e-05, + "loss": 2.1501, + "step": 4028 + }, + { + "epoch": 0.82, + "learning_rate": 1.8149151104908654e-05, + "loss": 2.1077, + "step": 4029 + }, + { + "epoch": 0.82, + "learning_rate": 1.8148149853269655e-05, + "loss": 2.1812, + "step": 4030 + }, + { + "epoch": 0.82, + "learning_rate": 1.8147148358515103e-05, + "loss": 2.0975, + "step": 4031 + }, + { + "epoch": 0.82, + "learning_rate": 1.8146146620674886e-05, + "loss": 2.0825, + "step": 4032 + }, + { + "epoch": 0.82, + "learning_rate": 1.8145144639778887e-05, + "loss": 2.2103, + "step": 4033 + }, + { + "epoch": 0.82, + "learning_rate": 1.8144142415857e-05, + "loss": 2.0771, + "step": 4034 + }, + { + "epoch": 0.82, + "learning_rate": 1.8143139948939138e-05, + "loss": 2.1684, + "step": 4035 + }, + { + "epoch": 0.82, + "learning_rate": 1.8142137239055203e-05, + "loss": 2.1048, + "step": 4036 + }, + { + "epoch": 0.82, + "learning_rate": 1.8141134286235114e-05, + "loss": 2.1076, + "step": 4037 + }, + { + "epoch": 0.82, + "learning_rate": 1.81401310905088e-05, + "loss": 2.1599, + "step": 4038 + }, + { + "epoch": 0.82, + "learning_rate": 1.8139127651906183e-05, + "loss": 2.1816, + "step": 4039 + }, + { + "epoch": 0.82, + "learning_rate": 1.8138123970457213e-05, + "loss": 2.192, + "step": 4040 + }, + { + "epoch": 0.82, + "learning_rate": 1.8137120046191835e-05, + "loss": 2.174, + "step": 4041 + }, + { + "epoch": 0.82, + "learning_rate": 1.813611587914e-05, + "loss": 2.1372, + "step": 4042 + }, + { + "epoch": 0.82, + "learning_rate": 1.813511146933167e-05, + "loss": 2.163, + "step": 4043 + }, + { + "epoch": 0.82, + "learning_rate": 1.8134106816796818e-05, + "loss": 2.1262, + "step": 4044 + }, + { + "epoch": 0.82, + "learning_rate": 1.8133101921565416e-05, + "loss": 2.1676, + "step": 4045 + }, + { + "epoch": 0.82, + "learning_rate": 1.8132096783667444e-05, + "loss": 2.2338, + "step": 4046 + }, + { + "epoch": 0.82, + "learning_rate": 1.8131091403132895e-05, + "loss": 2.1447, + "step": 4047 + }, + { + "epoch": 0.82, + "learning_rate": 1.8130085779991767e-05, + "loss": 2.1725, + "step": 4048 + }, + { + "epoch": 0.82, + "learning_rate": 1.8129079914274066e-05, + "loss": 2.1805, + "step": 4049 + }, + { + "epoch": 0.82, + "learning_rate": 1.81280738060098e-05, + "loss": 2.2063, + "step": 4050 + }, + { + "epoch": 0.82, + "learning_rate": 1.8127067455228992e-05, + "loss": 2.1623, + "step": 4051 + }, + { + "epoch": 0.82, + "learning_rate": 1.8126060861961666e-05, + "loss": 2.1567, + "step": 4052 + }, + { + "epoch": 0.82, + "learning_rate": 1.8125054026237853e-05, + "loss": 2.1202, + "step": 4053 + }, + { + "epoch": 0.82, + "learning_rate": 1.8124046948087604e-05, + "loss": 2.1577, + "step": 4054 + }, + { + "epoch": 0.82, + "learning_rate": 1.8123039627540953e-05, + "loss": 2.1506, + "step": 4055 + }, + { + "epoch": 0.82, + "learning_rate": 1.8122032064627965e-05, + "loss": 2.2426, + "step": 4056 + }, + { + "epoch": 0.82, + "learning_rate": 1.8121024259378704e-05, + "loss": 2.149, + "step": 4057 + }, + { + "epoch": 0.82, + "learning_rate": 1.812001621182323e-05, + "loss": 2.1282, + "step": 4058 + }, + { + "epoch": 0.82, + "learning_rate": 1.8119007921991632e-05, + "loss": 2.1843, + "step": 4059 + }, + { + "epoch": 0.82, + "learning_rate": 1.8117999389913984e-05, + "loss": 2.1507, + "step": 4060 + }, + { + "epoch": 0.82, + "learning_rate": 1.8116990615620384e-05, + "loss": 2.1354, + "step": 4061 + }, + { + "epoch": 0.82, + "learning_rate": 1.8115981599140927e-05, + "loss": 2.157, + "step": 4062 + }, + { + "epoch": 0.82, + "learning_rate": 1.8114972340505722e-05, + "loss": 2.2475, + "step": 4063 + }, + { + "epoch": 0.82, + "learning_rate": 1.8113962839744875e-05, + "loss": 2.2129, + "step": 4064 + }, + { + "epoch": 0.83, + "learning_rate": 1.8112953096888517e-05, + "loss": 2.1722, + "step": 4065 + }, + { + "epoch": 0.83, + "learning_rate": 1.811194311196677e-05, + "loss": 2.1277, + "step": 4066 + }, + { + "epoch": 0.83, + "learning_rate": 1.8110932885009765e-05, + "loss": 2.1725, + "step": 4067 + }, + { + "epoch": 0.83, + "learning_rate": 1.8109922416047653e-05, + "loss": 2.2096, + "step": 4068 + }, + { + "epoch": 0.83, + "learning_rate": 1.8108911705110577e-05, + "loss": 2.1705, + "step": 4069 + }, + { + "epoch": 0.83, + "learning_rate": 1.8107900752228696e-05, + "loss": 2.2019, + "step": 4070 + }, + { + "epoch": 0.83, + "learning_rate": 1.810688955743217e-05, + "loss": 2.1278, + "step": 4071 + }, + { + "epoch": 0.83, + "learning_rate": 1.8105878120751176e-05, + "loss": 2.224, + "step": 4072 + }, + { + "epoch": 0.83, + "learning_rate": 1.810486644221589e-05, + "loss": 2.17, + "step": 4073 + }, + { + "epoch": 0.83, + "learning_rate": 1.8103854521856486e-05, + "loss": 2.2141, + "step": 4074 + }, + { + "epoch": 0.83, + "learning_rate": 1.8102842359703177e-05, + "loss": 2.1417, + "step": 4075 + }, + { + "epoch": 0.83, + "learning_rate": 1.810182995578615e-05, + "loss": 2.1259, + "step": 4076 + }, + { + "epoch": 0.83, + "learning_rate": 1.8100817310135612e-05, + "loss": 2.1649, + "step": 4077 + }, + { + "epoch": 0.83, + "learning_rate": 1.809980442278178e-05, + "loss": 2.1282, + "step": 4078 + }, + { + "epoch": 0.83, + "learning_rate": 1.8098791293754877e-05, + "loss": 2.137, + "step": 4079 + }, + { + "epoch": 0.83, + "learning_rate": 1.809777792308513e-05, + "loss": 2.2261, + "step": 4080 + }, + { + "epoch": 0.83, + "learning_rate": 1.8096764310802776e-05, + "loss": 2.1366, + "step": 4081 + }, + { + "epoch": 0.83, + "learning_rate": 1.8095750456938056e-05, + "loss": 2.1467, + "step": 4082 + }, + { + "epoch": 0.83, + "learning_rate": 1.809473636152122e-05, + "loss": 2.2094, + "step": 4083 + }, + { + "epoch": 0.83, + "learning_rate": 1.8093722024582522e-05, + "loss": 2.1035, + "step": 4084 + }, + { + "epoch": 0.83, + "learning_rate": 1.8092707446152236e-05, + "loss": 2.1757, + "step": 4085 + }, + { + "epoch": 0.83, + "learning_rate": 1.8091692626260626e-05, + "loss": 2.1371, + "step": 4086 + }, + { + "epoch": 0.83, + "learning_rate": 1.809067756493798e-05, + "loss": 2.1107, + "step": 4087 + }, + { + "epoch": 0.83, + "learning_rate": 1.808966226221457e-05, + "loss": 2.1389, + "step": 4088 + }, + { + "epoch": 0.83, + "learning_rate": 1.8088646718120703e-05, + "loss": 2.1738, + "step": 4089 + }, + { + "epoch": 0.83, + "learning_rate": 1.8087630932686675e-05, + "loss": 2.1677, + "step": 4090 + }, + { + "epoch": 0.83, + "learning_rate": 1.808661490594279e-05, + "loss": 2.1793, + "step": 4091 + }, + { + "epoch": 0.83, + "learning_rate": 1.8085598637919367e-05, + "loss": 2.1691, + "step": 4092 + }, + { + "epoch": 0.83, + "learning_rate": 1.808458212864673e-05, + "loss": 2.1721, + "step": 4093 + }, + { + "epoch": 0.83, + "learning_rate": 1.8083565378155205e-05, + "loss": 2.1758, + "step": 4094 + }, + { + "epoch": 0.83, + "learning_rate": 1.8082548386475126e-05, + "loss": 2.2466, + "step": 4095 + }, + { + "epoch": 0.83, + "learning_rate": 1.808153115363685e-05, + "loss": 2.1541, + "step": 4096 + }, + { + "epoch": 0.83, + "learning_rate": 1.808051367967071e-05, + "loss": 2.1441, + "step": 4097 + }, + { + "epoch": 0.83, + "learning_rate": 1.8079495964607078e-05, + "loss": 2.1809, + "step": 4098 + }, + { + "epoch": 0.83, + "learning_rate": 1.8078478008476313e-05, + "loss": 2.1558, + "step": 4099 + }, + { + "epoch": 0.83, + "learning_rate": 1.807745981130879e-05, + "loss": 2.1311, + "step": 4100 + }, + { + "epoch": 0.83, + "learning_rate": 1.8076441373134887e-05, + "loss": 2.129, + "step": 4101 + }, + { + "epoch": 0.83, + "learning_rate": 1.807542269398499e-05, + "loss": 2.1419, + "step": 4102 + }, + { + "epoch": 0.83, + "learning_rate": 1.80744037738895e-05, + "loss": 2.0925, + "step": 4103 + }, + { + "epoch": 0.83, + "learning_rate": 1.8073384612878812e-05, + "loss": 2.1491, + "step": 4104 + }, + { + "epoch": 0.83, + "learning_rate": 1.8072365210983335e-05, + "loss": 2.1162, + "step": 4105 + }, + { + "epoch": 0.83, + "learning_rate": 1.8071345568233487e-05, + "loss": 2.2019, + "step": 4106 + }, + { + "epoch": 0.83, + "learning_rate": 1.807032568465969e-05, + "loss": 2.087, + "step": 4107 + }, + { + "epoch": 0.83, + "learning_rate": 1.8069305560292373e-05, + "loss": 2.1291, + "step": 4108 + }, + { + "epoch": 0.83, + "learning_rate": 1.8068285195161975e-05, + "loss": 2.2239, + "step": 4109 + }, + { + "epoch": 0.83, + "learning_rate": 1.8067264589298945e-05, + "loss": 2.1477, + "step": 4110 + }, + { + "epoch": 0.83, + "learning_rate": 1.8066243742733726e-05, + "loss": 2.1808, + "step": 4111 + }, + { + "epoch": 0.83, + "learning_rate": 1.806522265549678e-05, + "loss": 2.2056, + "step": 4112 + }, + { + "epoch": 0.83, + "learning_rate": 1.8064201327618576e-05, + "loss": 2.0979, + "step": 4113 + }, + { + "epoch": 0.84, + "learning_rate": 1.8063179759129585e-05, + "loss": 2.1913, + "step": 4114 + }, + { + "epoch": 0.84, + "learning_rate": 1.8062157950060286e-05, + "loss": 2.0784, + "step": 4115 + }, + { + "epoch": 0.84, + "learning_rate": 1.806113590044117e-05, + "loss": 2.0945, + "step": 4116 + }, + { + "epoch": 0.84, + "learning_rate": 1.8060113610302728e-05, + "loss": 2.1119, + "step": 4117 + }, + { + "epoch": 0.84, + "learning_rate": 1.805909107967547e-05, + "loss": 2.1641, + "step": 4118 + }, + { + "epoch": 0.84, + "learning_rate": 1.805806830858989e-05, + "loss": 2.1489, + "step": 4119 + }, + { + "epoch": 0.84, + "learning_rate": 1.805704529707652e-05, + "loss": 2.1353, + "step": 4120 + }, + { + "epoch": 0.84, + "learning_rate": 1.8056022045165872e-05, + "loss": 2.1438, + "step": 4121 + }, + { + "epoch": 0.84, + "learning_rate": 1.8054998552888485e-05, + "loss": 2.1093, + "step": 4122 + }, + { + "epoch": 0.84, + "learning_rate": 1.8053974820274894e-05, + "loss": 2.1069, + "step": 4123 + }, + { + "epoch": 0.84, + "learning_rate": 1.805295084735564e-05, + "loss": 2.1827, + "step": 4124 + }, + { + "epoch": 0.84, + "learning_rate": 1.8051926634161282e-05, + "loss": 2.196, + "step": 4125 + }, + { + "epoch": 0.84, + "learning_rate": 1.8050902180722376e-05, + "loss": 2.2213, + "step": 4126 + }, + { + "epoch": 0.84, + "learning_rate": 1.8049877487069486e-05, + "loss": 2.173, + "step": 4127 + }, + { + "epoch": 0.84, + "learning_rate": 1.804885255323319e-05, + "loss": 2.2045, + "step": 4128 + }, + { + "epoch": 0.84, + "learning_rate": 1.8047827379244068e-05, + "loss": 2.1443, + "step": 4129 + }, + { + "epoch": 0.84, + "learning_rate": 1.804680196513271e-05, + "loss": 2.0782, + "step": 4130 + }, + { + "epoch": 0.84, + "learning_rate": 1.80457763109297e-05, + "loss": 2.1307, + "step": 4131 + }, + { + "epoch": 0.84, + "learning_rate": 1.8044750416665655e-05, + "loss": 2.2043, + "step": 4132 + }, + { + "epoch": 0.84, + "learning_rate": 1.8043724282371176e-05, + "loss": 2.1537, + "step": 4133 + }, + { + "epoch": 0.84, + "learning_rate": 1.804269790807688e-05, + "loss": 2.0951, + "step": 4134 + }, + { + "epoch": 0.84, + "learning_rate": 1.8041671293813393e-05, + "loss": 2.1457, + "step": 4135 + }, + { + "epoch": 0.84, + "learning_rate": 1.8040644439611348e-05, + "loss": 2.1425, + "step": 4136 + }, + { + "epoch": 0.84, + "learning_rate": 1.803961734550138e-05, + "loss": 2.1282, + "step": 4137 + }, + { + "epoch": 0.84, + "learning_rate": 1.8038590011514135e-05, + "loss": 2.1251, + "step": 4138 + }, + { + "epoch": 0.84, + "learning_rate": 1.803756243768026e-05, + "loss": 2.186, + "step": 4139 + }, + { + "epoch": 0.84, + "learning_rate": 1.8036534624030428e-05, + "loss": 2.1258, + "step": 4140 + }, + { + "epoch": 0.84, + "learning_rate": 1.8035506570595295e-05, + "loss": 2.0265, + "step": 4141 + }, + { + "epoch": 0.84, + "learning_rate": 1.803447827740554e-05, + "loss": 2.1956, + "step": 4142 + }, + { + "epoch": 0.84, + "learning_rate": 1.803344974449184e-05, + "loss": 2.1738, + "step": 4143 + }, + { + "epoch": 0.84, + "learning_rate": 1.8032420971884884e-05, + "loss": 2.0901, + "step": 4144 + }, + { + "epoch": 0.84, + "learning_rate": 1.8031391959615373e-05, + "loss": 2.1161, + "step": 4145 + }, + { + "epoch": 0.84, + "learning_rate": 1.8030362707713997e-05, + "loss": 2.1908, + "step": 4146 + }, + { + "epoch": 0.84, + "learning_rate": 1.802933321621148e-05, + "loss": 2.1273, + "step": 4147 + }, + { + "epoch": 0.84, + "learning_rate": 1.8028303485138534e-05, + "loss": 2.154, + "step": 4148 + }, + { + "epoch": 0.84, + "learning_rate": 1.8027273514525877e-05, + "loss": 2.1907, + "step": 4149 + }, + { + "epoch": 0.84, + "learning_rate": 1.8026243304404245e-05, + "loss": 2.1287, + "step": 4150 + }, + { + "epoch": 0.84, + "learning_rate": 1.8025212854804377e-05, + "loss": 2.1623, + "step": 4151 + }, + { + "epoch": 0.84, + "learning_rate": 1.802418216575702e-05, + "loss": 2.1233, + "step": 4152 + }, + { + "epoch": 0.84, + "learning_rate": 1.802315123729292e-05, + "loss": 2.0644, + "step": 4153 + }, + { + "epoch": 0.84, + "learning_rate": 1.8022120069442844e-05, + "loss": 2.124, + "step": 4154 + }, + { + "epoch": 0.84, + "learning_rate": 1.802108866223755e-05, + "loss": 2.2022, + "step": 4155 + }, + { + "epoch": 0.84, + "learning_rate": 1.8020057015707822e-05, + "loss": 2.2015, + "step": 4156 + }, + { + "epoch": 0.84, + "learning_rate": 1.801902512988444e-05, + "loss": 2.2146, + "step": 4157 + }, + { + "epoch": 0.84, + "learning_rate": 1.8017993004798184e-05, + "loss": 2.1404, + "step": 4158 + }, + { + "epoch": 0.84, + "learning_rate": 1.8016960640479854e-05, + "loss": 2.0551, + "step": 4159 + }, + { + "epoch": 0.84, + "learning_rate": 1.8015928036960254e-05, + "loss": 2.2461, + "step": 4160 + }, + { + "epoch": 0.84, + "learning_rate": 1.8014895194270194e-05, + "loss": 2.1424, + "step": 4161 + }, + { + "epoch": 0.84, + "learning_rate": 1.801386211244049e-05, + "loss": 2.1647, + "step": 4162 + }, + { + "epoch": 0.85, + "learning_rate": 1.8012828791501963e-05, + "loss": 2.2075, + "step": 4163 + }, + { + "epoch": 0.85, + "learning_rate": 1.8011795231485447e-05, + "loss": 2.1406, + "step": 4164 + }, + { + "epoch": 0.85, + "learning_rate": 1.8010761432421782e-05, + "loss": 2.1361, + "step": 4165 + }, + { + "epoch": 0.85, + "learning_rate": 1.8009727394341808e-05, + "loss": 2.1888, + "step": 4166 + }, + { + "epoch": 0.85, + "learning_rate": 1.800869311727638e-05, + "loss": 2.1213, + "step": 4167 + }, + { + "epoch": 0.85, + "learning_rate": 1.8007658601256365e-05, + "loss": 2.1674, + "step": 4168 + }, + { + "epoch": 0.85, + "learning_rate": 1.8006623846312618e-05, + "loss": 2.1483, + "step": 4169 + }, + { + "epoch": 0.85, + "learning_rate": 1.8005588852476018e-05, + "loss": 2.1617, + "step": 4170 + }, + { + "epoch": 0.85, + "learning_rate": 1.800455361977744e-05, + "loss": 2.1168, + "step": 4171 + }, + { + "epoch": 0.85, + "learning_rate": 1.800351814824779e-05, + "loss": 2.1631, + "step": 4172 + }, + { + "epoch": 0.85, + "learning_rate": 1.8002482437917945e-05, + "loss": 2.1163, + "step": 4173 + }, + { + "epoch": 0.85, + "learning_rate": 1.8001446488818813e-05, + "loss": 2.1197, + "step": 4174 + }, + { + "epoch": 0.85, + "learning_rate": 1.8000410300981305e-05, + "loss": 2.1602, + "step": 4175 + }, + { + "epoch": 0.85, + "learning_rate": 1.7999373874436335e-05, + "loss": 2.1297, + "step": 4176 + }, + { + "epoch": 0.85, + "learning_rate": 1.7998337209214832e-05, + "loss": 2.1582, + "step": 4177 + }, + { + "epoch": 0.85, + "learning_rate": 1.799730030534772e-05, + "loss": 2.1587, + "step": 4178 + }, + { + "epoch": 0.85, + "learning_rate": 1.7996263162865945e-05, + "loss": 2.1232, + "step": 4179 + }, + { + "epoch": 0.85, + "learning_rate": 1.799522578180044e-05, + "loss": 2.1417, + "step": 4180 + }, + { + "epoch": 0.85, + "learning_rate": 1.7994188162182173e-05, + "loss": 2.165, + "step": 4181 + }, + { + "epoch": 0.85, + "learning_rate": 1.799315030404209e-05, + "loss": 2.1917, + "step": 4182 + }, + { + "epoch": 0.85, + "learning_rate": 1.799211220741116e-05, + "loss": 2.1089, + "step": 4183 + }, + { + "epoch": 0.85, + "learning_rate": 1.7991073872320368e-05, + "loss": 2.141, + "step": 4184 + }, + { + "epoch": 0.85, + "learning_rate": 1.7990035298800682e-05, + "loss": 2.1617, + "step": 4185 + }, + { + "epoch": 0.85, + "learning_rate": 1.7988996486883092e-05, + "loss": 2.1229, + "step": 4186 + }, + { + "epoch": 0.85, + "learning_rate": 1.7987957436598596e-05, + "loss": 2.1962, + "step": 4187 + }, + { + "epoch": 0.85, + "learning_rate": 1.7986918147978196e-05, + "loss": 2.1488, + "step": 4188 + }, + { + "epoch": 0.85, + "learning_rate": 1.79858786210529e-05, + "loss": 2.2012, + "step": 4189 + }, + { + "epoch": 0.85, + "learning_rate": 1.798483885585372e-05, + "loss": 2.2323, + "step": 4190 + }, + { + "epoch": 0.85, + "learning_rate": 1.7983798852411688e-05, + "loss": 2.1869, + "step": 4191 + }, + { + "epoch": 0.85, + "learning_rate": 1.7982758610757828e-05, + "loss": 2.1892, + "step": 4192 + }, + { + "epoch": 0.85, + "learning_rate": 1.7981718130923176e-05, + "loss": 2.2127, + "step": 4193 + }, + { + "epoch": 0.85, + "learning_rate": 1.7980677412938786e-05, + "loss": 2.0956, + "step": 4194 + }, + { + "epoch": 0.85, + "learning_rate": 1.7979636456835702e-05, + "loss": 2.2154, + "step": 4195 + }, + { + "epoch": 0.85, + "learning_rate": 1.7978595262644982e-05, + "loss": 2.185, + "step": 4196 + }, + { + "epoch": 0.85, + "learning_rate": 1.7977553830397698e-05, + "loss": 2.1595, + "step": 4197 + }, + { + "epoch": 0.85, + "learning_rate": 1.797651216012492e-05, + "loss": 2.1774, + "step": 4198 + }, + { + "epoch": 0.85, + "learning_rate": 1.797547025185773e-05, + "loss": 2.1524, + "step": 4199 + }, + { + "epoch": 0.85, + "learning_rate": 1.797442810562721e-05, + "loss": 2.1568, + "step": 4200 + }, + { + "epoch": 0.85, + "learning_rate": 1.797338572146446e-05, + "loss": 2.1096, + "step": 4201 + }, + { + "epoch": 0.85, + "learning_rate": 1.797234309940058e-05, + "loss": 2.126, + "step": 4202 + }, + { + "epoch": 0.85, + "learning_rate": 1.7971300239466675e-05, + "loss": 2.1712, + "step": 4203 + }, + { + "epoch": 0.85, + "learning_rate": 1.7970257141693866e-05, + "loss": 2.1506, + "step": 4204 + }, + { + "epoch": 0.85, + "learning_rate": 1.7969213806113276e-05, + "loss": 2.1476, + "step": 4205 + }, + { + "epoch": 0.85, + "learning_rate": 1.7968170232756028e-05, + "loss": 2.1073, + "step": 4206 + }, + { + "epoch": 0.85, + "learning_rate": 1.7967126421653266e-05, + "loss": 2.1309, + "step": 4207 + }, + { + "epoch": 0.85, + "learning_rate": 1.7966082372836132e-05, + "loss": 2.0885, + "step": 4208 + }, + { + "epoch": 0.85, + "learning_rate": 1.7965038086335777e-05, + "loss": 2.1121, + "step": 4209 + }, + { + "epoch": 0.85, + "learning_rate": 1.796399356218336e-05, + "loss": 2.1389, + "step": 4210 + }, + { + "epoch": 0.85, + "learning_rate": 1.7962948800410043e-05, + "loss": 2.1832, + "step": 4211 + }, + { + "epoch": 0.85, + "learning_rate": 1.7961903801047003e-05, + "loss": 2.2442, + "step": 4212 + }, + { + "epoch": 0.86, + "learning_rate": 1.7960858564125418e-05, + "loss": 2.1748, + "step": 4213 + }, + { + "epoch": 0.86, + "learning_rate": 1.795981308967647e-05, + "loss": 2.1467, + "step": 4214 + }, + { + "epoch": 0.86, + "learning_rate": 1.795876737773136e-05, + "loss": 2.143, + "step": 4215 + }, + { + "epoch": 0.86, + "learning_rate": 1.7957721428321285e-05, + "loss": 2.0822, + "step": 4216 + }, + { + "epoch": 0.86, + "learning_rate": 1.7956675241477454e-05, + "loss": 2.1575, + "step": 4217 + }, + { + "epoch": 0.86, + "learning_rate": 1.795562881723108e-05, + "loss": 2.1581, + "step": 4218 + }, + { + "epoch": 0.86, + "learning_rate": 1.7954582155613386e-05, + "loss": 2.162, + "step": 4219 + }, + { + "epoch": 0.86, + "learning_rate": 1.7953535256655607e-05, + "loss": 2.1006, + "step": 4220 + }, + { + "epoch": 0.86, + "learning_rate": 1.7952488120388967e-05, + "loss": 2.1216, + "step": 4221 + }, + { + "epoch": 0.86, + "learning_rate": 1.7951440746844723e-05, + "loss": 2.1567, + "step": 4222 + }, + { + "epoch": 0.86, + "learning_rate": 1.7950393136054112e-05, + "loss": 2.1738, + "step": 4223 + }, + { + "epoch": 0.86, + "learning_rate": 1.79493452880484e-05, + "loss": 2.1484, + "step": 4224 + }, + { + "epoch": 0.86, + "learning_rate": 1.7948297202858852e-05, + "loss": 2.1443, + "step": 4225 + }, + { + "epoch": 0.86, + "learning_rate": 1.7947248880516737e-05, + "loss": 2.0444, + "step": 4226 + }, + { + "epoch": 0.86, + "learning_rate": 1.7946200321053332e-05, + "loss": 2.1094, + "step": 4227 + }, + { + "epoch": 0.86, + "learning_rate": 1.7945151524499923e-05, + "loss": 2.1384, + "step": 4228 + }, + { + "epoch": 0.86, + "learning_rate": 1.794410249088781e-05, + "loss": 2.2222, + "step": 4229 + }, + { + "epoch": 0.86, + "learning_rate": 1.7943053220248284e-05, + "loss": 2.1905, + "step": 4230 + }, + { + "epoch": 0.86, + "learning_rate": 1.7942003712612654e-05, + "loss": 2.18, + "step": 4231 + }, + { + "epoch": 0.86, + "learning_rate": 1.7940953968012242e-05, + "loss": 2.2076, + "step": 4232 + }, + { + "epoch": 0.86, + "learning_rate": 1.7939903986478354e-05, + "loss": 2.1084, + "step": 4233 + }, + { + "epoch": 0.86, + "learning_rate": 1.793885376804233e-05, + "loss": 2.1286, + "step": 4234 + }, + { + "epoch": 0.86, + "learning_rate": 1.7937803312735506e-05, + "loss": 2.203, + "step": 4235 + }, + { + "epoch": 0.86, + "learning_rate": 1.7936752620589215e-05, + "loss": 2.1781, + "step": 4236 + }, + { + "epoch": 0.86, + "learning_rate": 1.7935701691634813e-05, + "loss": 2.1637, + "step": 4237 + }, + { + "epoch": 0.86, + "learning_rate": 1.7934650525903658e-05, + "loss": 2.1781, + "step": 4238 + }, + { + "epoch": 0.86, + "learning_rate": 1.793359912342711e-05, + "loss": 2.1227, + "step": 4239 + }, + { + "epoch": 0.86, + "learning_rate": 1.793254748423654e-05, + "loss": 2.1639, + "step": 4240 + }, + { + "epoch": 0.86, + "learning_rate": 1.7931495608363324e-05, + "loss": 2.1165, + "step": 4241 + }, + { + "epoch": 0.86, + "learning_rate": 1.7930443495838848e-05, + "loss": 2.1236, + "step": 4242 + }, + { + "epoch": 0.86, + "learning_rate": 1.792939114669451e-05, + "loss": 2.1086, + "step": 4243 + }, + { + "epoch": 0.86, + "learning_rate": 1.7928338560961698e-05, + "loss": 2.214, + "step": 4244 + }, + { + "epoch": 0.86, + "learning_rate": 1.7927285738671825e-05, + "loss": 2.1264, + "step": 4245 + }, + { + "epoch": 0.86, + "learning_rate": 1.7926232679856303e-05, + "loss": 2.2263, + "step": 4246 + }, + { + "epoch": 0.86, + "learning_rate": 1.792517938454655e-05, + "loss": 2.1472, + "step": 4247 + }, + { + "epoch": 0.86, + "learning_rate": 1.7924125852773996e-05, + "loss": 2.1428, + "step": 4248 + }, + { + "epoch": 0.86, + "learning_rate": 1.792307208457007e-05, + "loss": 2.1232, + "step": 4249 + }, + { + "epoch": 0.86, + "learning_rate": 1.792201807996622e-05, + "loss": 2.1209, + "step": 4250 + }, + { + "epoch": 0.86, + "learning_rate": 1.792096383899389e-05, + "loss": 2.1225, + "step": 4251 + }, + { + "epoch": 0.86, + "learning_rate": 1.7919909361684534e-05, + "loss": 2.0844, + "step": 4252 + }, + { + "epoch": 0.86, + "learning_rate": 1.791885464806962e-05, + "loss": 2.1615, + "step": 4253 + }, + { + "epoch": 0.86, + "learning_rate": 1.7917799698180612e-05, + "loss": 2.1751, + "step": 4254 + }, + { + "epoch": 0.86, + "learning_rate": 1.791674451204899e-05, + "loss": 2.169, + "step": 4255 + }, + { + "epoch": 0.86, + "learning_rate": 1.7915689089706232e-05, + "loss": 2.1319, + "step": 4256 + }, + { + "epoch": 0.86, + "learning_rate": 1.7914633431183836e-05, + "loss": 2.173, + "step": 4257 + }, + { + "epoch": 0.86, + "learning_rate": 1.7913577536513296e-05, + "loss": 2.1685, + "step": 4258 + }, + { + "epoch": 0.86, + "learning_rate": 1.7912521405726114e-05, + "loss": 2.0543, + "step": 4259 + }, + { + "epoch": 0.86, + "learning_rate": 1.7911465038853805e-05, + "loss": 2.1026, + "step": 4260 + }, + { + "epoch": 0.86, + "learning_rate": 1.7910408435927887e-05, + "loss": 2.1242, + "step": 4261 + }, + { + "epoch": 0.87, + "learning_rate": 1.7909351596979884e-05, + "loss": 2.1833, + "step": 4262 + }, + { + "epoch": 0.87, + "learning_rate": 1.7908294522041333e-05, + "loss": 2.1977, + "step": 4263 + }, + { + "epoch": 0.87, + "learning_rate": 1.790723721114377e-05, + "loss": 2.1665, + "step": 4264 + }, + { + "epoch": 0.87, + "learning_rate": 1.7906179664318742e-05, + "loss": 2.1928, + "step": 4265 + }, + { + "epoch": 0.87, + "learning_rate": 1.790512188159781e-05, + "loss": 2.1868, + "step": 4266 + }, + { + "epoch": 0.87, + "learning_rate": 1.7904063863012524e-05, + "loss": 2.1087, + "step": 4267 + }, + { + "epoch": 0.87, + "learning_rate": 1.7903005608594458e-05, + "loss": 2.1477, + "step": 4268 + }, + { + "epoch": 0.87, + "learning_rate": 1.7901947118375186e-05, + "loss": 2.1821, + "step": 4269 + }, + { + "epoch": 0.87, + "learning_rate": 1.790088839238629e-05, + "loss": 2.2004, + "step": 4270 + }, + { + "epoch": 0.87, + "learning_rate": 1.789982943065936e-05, + "loss": 2.2368, + "step": 4271 + }, + { + "epoch": 0.87, + "learning_rate": 1.789877023322599e-05, + "loss": 2.1051, + "step": 4272 + }, + { + "epoch": 0.87, + "learning_rate": 1.7897710800117785e-05, + "loss": 2.1693, + "step": 4273 + }, + { + "epoch": 0.87, + "learning_rate": 1.7896651131366357e-05, + "loss": 2.1758, + "step": 4274 + }, + { + "epoch": 0.87, + "learning_rate": 1.7895591227003316e-05, + "loss": 2.1867, + "step": 4275 + }, + { + "epoch": 0.87, + "learning_rate": 1.7894531087060297e-05, + "loss": 2.0315, + "step": 4276 + }, + { + "epoch": 0.87, + "learning_rate": 1.7893470711568918e-05, + "loss": 2.1364, + "step": 4277 + }, + { + "epoch": 0.87, + "learning_rate": 1.7892410100560835e-05, + "loss": 2.1915, + "step": 4278 + }, + { + "epoch": 0.87, + "learning_rate": 1.7891349254067675e-05, + "loss": 2.1594, + "step": 4279 + }, + { + "epoch": 0.87, + "learning_rate": 1.78902881721211e-05, + "loss": 2.1464, + "step": 4280 + }, + { + "epoch": 0.87, + "learning_rate": 1.788922685475277e-05, + "loss": 2.162, + "step": 4281 + }, + { + "epoch": 0.87, + "learning_rate": 1.7888165301994348e-05, + "loss": 2.1273, + "step": 4282 + }, + { + "epoch": 0.87, + "learning_rate": 1.788710351387751e-05, + "loss": 2.1802, + "step": 4283 + }, + { + "epoch": 0.87, + "learning_rate": 1.788604149043394e-05, + "loss": 2.1913, + "step": 4284 + }, + { + "epoch": 0.87, + "learning_rate": 1.788497923169531e-05, + "loss": 2.2527, + "step": 4285 + }, + { + "epoch": 0.87, + "learning_rate": 1.7883916737693337e-05, + "loss": 2.1724, + "step": 4286 + }, + { + "epoch": 0.87, + "learning_rate": 1.7882854008459703e-05, + "loss": 2.1949, + "step": 4287 + }, + { + "epoch": 0.87, + "learning_rate": 1.7881791044026127e-05, + "loss": 2.1662, + "step": 4288 + }, + { + "epoch": 0.87, + "learning_rate": 1.788072784442432e-05, + "loss": 2.1214, + "step": 4289 + }, + { + "epoch": 0.87, + "learning_rate": 1.7879664409686007e-05, + "loss": 2.0448, + "step": 4290 + }, + { + "epoch": 0.87, + "learning_rate": 1.787860073984292e-05, + "loss": 2.1868, + "step": 4291 + }, + { + "epoch": 0.87, + "learning_rate": 1.787753683492679e-05, + "loss": 2.1683, + "step": 4292 + }, + { + "epoch": 0.87, + "learning_rate": 1.787647269496937e-05, + "loss": 2.1835, + "step": 4293 + }, + { + "epoch": 0.87, + "learning_rate": 1.7875408320002393e-05, + "loss": 2.2167, + "step": 4294 + }, + { + "epoch": 0.87, + "learning_rate": 1.7874343710057633e-05, + "loss": 2.1555, + "step": 4295 + }, + { + "epoch": 0.87, + "learning_rate": 1.7873278865166852e-05, + "loss": 2.1318, + "step": 4296 + }, + { + "epoch": 0.87, + "learning_rate": 1.7872213785361814e-05, + "loss": 2.0833, + "step": 4297 + }, + { + "epoch": 0.87, + "learning_rate": 1.7871148470674308e-05, + "loss": 2.1291, + "step": 4298 + }, + { + "epoch": 0.87, + "learning_rate": 1.787008292113611e-05, + "loss": 2.1759, + "step": 4299 + }, + { + "epoch": 0.87, + "learning_rate": 1.786901713677902e-05, + "loss": 2.1878, + "step": 4300 + }, + { + "epoch": 0.87, + "learning_rate": 1.7867951117634833e-05, + "loss": 2.1413, + "step": 4301 + }, + { + "epoch": 0.87, + "learning_rate": 1.7866884863735356e-05, + "loss": 2.1576, + "step": 4302 + }, + { + "epoch": 0.87, + "learning_rate": 1.786581837511241e-05, + "loss": 2.1114, + "step": 4303 + }, + { + "epoch": 0.87, + "learning_rate": 1.7864751651797804e-05, + "loss": 2.0968, + "step": 4304 + }, + { + "epoch": 0.87, + "learning_rate": 1.7863684693823375e-05, + "loss": 2.1118, + "step": 4305 + }, + { + "epoch": 0.87, + "learning_rate": 1.7862617501220953e-05, + "loss": 2.125, + "step": 4306 + }, + { + "epoch": 0.87, + "learning_rate": 1.786155007402238e-05, + "loss": 2.1567, + "step": 4307 + }, + { + "epoch": 0.87, + "learning_rate": 1.7860482412259508e-05, + "loss": 2.1085, + "step": 4308 + }, + { + "epoch": 0.87, + "learning_rate": 1.785941451596419e-05, + "loss": 2.1574, + "step": 4309 + }, + { + "epoch": 0.87, + "learning_rate": 1.7858346385168287e-05, + "loss": 2.1041, + "step": 4310 + }, + { + "epoch": 0.88, + "learning_rate": 1.7857278019903674e-05, + "loss": 2.1173, + "step": 4311 + }, + { + "epoch": 0.88, + "learning_rate": 1.785620942020222e-05, + "loss": 2.1633, + "step": 4312 + }, + { + "epoch": 0.88, + "learning_rate": 1.785514058609582e-05, + "loss": 2.1373, + "step": 4313 + }, + { + "epoch": 0.88, + "learning_rate": 1.7854071517616355e-05, + "loss": 2.0909, + "step": 4314 + }, + { + "epoch": 0.88, + "learning_rate": 1.7853002214795726e-05, + "loss": 2.1746, + "step": 4315 + }, + { + "epoch": 0.88, + "learning_rate": 1.7851932677665838e-05, + "loss": 2.098, + "step": 4316 + }, + { + "epoch": 0.88, + "learning_rate": 1.78508629062586e-05, + "loss": 2.117, + "step": 4317 + }, + { + "epoch": 0.88, + "learning_rate": 1.7849792900605936e-05, + "loss": 2.1132, + "step": 4318 + }, + { + "epoch": 0.88, + "learning_rate": 1.7848722660739764e-05, + "loss": 2.0583, + "step": 4319 + }, + { + "epoch": 0.88, + "learning_rate": 1.7847652186692025e-05, + "loss": 2.2538, + "step": 4320 + }, + { + "epoch": 0.88, + "learning_rate": 1.7846581478494656e-05, + "loss": 2.0673, + "step": 4321 + }, + { + "epoch": 0.88, + "learning_rate": 1.78455105361796e-05, + "loss": 2.1847, + "step": 4322 + }, + { + "epoch": 0.88, + "learning_rate": 1.784443935977881e-05, + "loss": 2.1262, + "step": 4323 + }, + { + "epoch": 0.88, + "learning_rate": 1.7843367949324253e-05, + "loss": 2.1797, + "step": 4324 + }, + { + "epoch": 0.88, + "learning_rate": 1.7842296304847892e-05, + "loss": 2.1339, + "step": 4325 + }, + { + "epoch": 0.88, + "learning_rate": 1.78412244263817e-05, + "loss": 2.151, + "step": 4326 + }, + { + "epoch": 0.88, + "learning_rate": 1.784015231395767e-05, + "loss": 2.1634, + "step": 4327 + }, + { + "epoch": 0.88, + "learning_rate": 1.783907996760777e-05, + "loss": 2.2154, + "step": 4328 + }, + { + "epoch": 0.88, + "learning_rate": 1.7838007387364017e-05, + "loss": 2.1409, + "step": 4329 + }, + { + "epoch": 0.88, + "learning_rate": 1.78369345732584e-05, + "loss": 2.1052, + "step": 4330 + }, + { + "epoch": 0.88, + "learning_rate": 1.7835861525322932e-05, + "loss": 2.1828, + "step": 4331 + }, + { + "epoch": 0.88, + "learning_rate": 1.7834788243589628e-05, + "loss": 2.1404, + "step": 4332 + }, + { + "epoch": 0.88, + "learning_rate": 1.7833714728090514e-05, + "loss": 2.1726, + "step": 4333 + }, + { + "epoch": 0.88, + "learning_rate": 1.7832640978857622e-05, + "loss": 2.197, + "step": 4334 + }, + { + "epoch": 0.88, + "learning_rate": 1.7831566995922983e-05, + "loss": 2.1697, + "step": 4335 + }, + { + "epoch": 0.88, + "learning_rate": 1.7830492779318646e-05, + "loss": 2.1616, + "step": 4336 + }, + { + "epoch": 0.88, + "learning_rate": 1.7829418329076662e-05, + "loss": 2.1659, + "step": 4337 + }, + { + "epoch": 0.88, + "learning_rate": 1.7828343645229087e-05, + "loss": 2.0925, + "step": 4338 + }, + { + "epoch": 0.88, + "learning_rate": 1.7827268727807988e-05, + "loss": 2.1311, + "step": 4339 + }, + { + "epoch": 0.88, + "learning_rate": 1.782619357684544e-05, + "loss": 2.1536, + "step": 4340 + }, + { + "epoch": 0.88, + "learning_rate": 1.7825118192373518e-05, + "loss": 2.1653, + "step": 4341 + }, + { + "epoch": 0.88, + "learning_rate": 1.7824042574424306e-05, + "loss": 2.1385, + "step": 4342 + }, + { + "epoch": 0.88, + "learning_rate": 1.7822966723029904e-05, + "loss": 2.1217, + "step": 4343 + }, + { + "epoch": 0.88, + "learning_rate": 1.7821890638222408e-05, + "loss": 2.1489, + "step": 4344 + }, + { + "epoch": 0.88, + "learning_rate": 1.7820814320033925e-05, + "loss": 2.1021, + "step": 4345 + }, + { + "epoch": 0.88, + "learning_rate": 1.781973776849657e-05, + "loss": 2.1286, + "step": 4346 + }, + { + "epoch": 0.88, + "learning_rate": 1.781866098364246e-05, + "loss": 2.1688, + "step": 4347 + }, + { + "epoch": 0.88, + "learning_rate": 1.781758396550373e-05, + "loss": 2.1437, + "step": 4348 + }, + { + "epoch": 0.88, + "learning_rate": 1.7816506714112513e-05, + "loss": 2.1503, + "step": 4349 + }, + { + "epoch": 0.88, + "learning_rate": 1.7815429229500946e-05, + "loss": 2.1499, + "step": 4350 + }, + { + "epoch": 0.88, + "learning_rate": 1.781435151170118e-05, + "loss": 2.1426, + "step": 4351 + }, + { + "epoch": 0.88, + "learning_rate": 1.7813273560745373e-05, + "loss": 2.1864, + "step": 4352 + }, + { + "epoch": 0.88, + "learning_rate": 1.7812195376665687e-05, + "loss": 2.1587, + "step": 4353 + }, + { + "epoch": 0.88, + "learning_rate": 1.781111695949429e-05, + "loss": 2.1444, + "step": 4354 + }, + { + "epoch": 0.88, + "learning_rate": 1.7810038309263362e-05, + "loss": 2.0806, + "step": 4355 + }, + { + "epoch": 0.88, + "learning_rate": 1.7808959426005083e-05, + "loss": 2.0925, + "step": 4356 + }, + { + "epoch": 0.88, + "learning_rate": 1.7807880309751646e-05, + "loss": 2.1779, + "step": 4357 + }, + { + "epoch": 0.88, + "learning_rate": 1.7806800960535247e-05, + "loss": 2.1607, + "step": 4358 + }, + { + "epoch": 0.88, + "learning_rate": 1.780572137838809e-05, + "loss": 2.1272, + "step": 4359 + }, + { + "epoch": 0.89, + "learning_rate": 1.7804641563342383e-05, + "loss": 2.1522, + "step": 4360 + }, + { + "epoch": 0.89, + "learning_rate": 1.7803561515430353e-05, + "loss": 2.2142, + "step": 4361 + }, + { + "epoch": 0.89, + "learning_rate": 1.7802481234684223e-05, + "loss": 2.1492, + "step": 4362 + }, + { + "epoch": 0.89, + "learning_rate": 1.780140072113622e-05, + "loss": 2.1834, + "step": 4363 + }, + { + "epoch": 0.89, + "learning_rate": 1.7800319974818586e-05, + "loss": 2.1673, + "step": 4364 + }, + { + "epoch": 0.89, + "learning_rate": 1.779923899576357e-05, + "loss": 2.1766, + "step": 4365 + }, + { + "epoch": 0.89, + "learning_rate": 1.7798157784003417e-05, + "loss": 2.1063, + "step": 4366 + }, + { + "epoch": 0.89, + "learning_rate": 1.7797076339570396e-05, + "loss": 2.1309, + "step": 4367 + }, + { + "epoch": 0.89, + "learning_rate": 1.7795994662496768e-05, + "loss": 2.15, + "step": 4368 + }, + { + "epoch": 0.89, + "learning_rate": 1.7794912752814814e-05, + "loss": 2.0168, + "step": 4369 + }, + { + "epoch": 0.89, + "learning_rate": 1.779383061055681e-05, + "loss": 2.1661, + "step": 4370 + }, + { + "epoch": 0.89, + "learning_rate": 1.7792748235755037e-05, + "loss": 2.1788, + "step": 4371 + }, + { + "epoch": 0.89, + "learning_rate": 1.77916656284418e-05, + "loss": 2.1448, + "step": 4372 + }, + { + "epoch": 0.89, + "learning_rate": 1.77905827886494e-05, + "loss": 2.2088, + "step": 4373 + }, + { + "epoch": 0.89, + "learning_rate": 1.778949971641014e-05, + "loss": 2.2544, + "step": 4374 + }, + { + "epoch": 0.89, + "learning_rate": 1.778841641175634e-05, + "loss": 2.1031, + "step": 4375 + }, + { + "epoch": 0.89, + "learning_rate": 1.778733287472032e-05, + "loss": 2.2276, + "step": 4376 + }, + { + "epoch": 0.89, + "learning_rate": 1.7786249105334405e-05, + "loss": 2.1918, + "step": 4377 + }, + { + "epoch": 0.89, + "learning_rate": 1.7785165103630945e-05, + "loss": 2.0993, + "step": 4378 + }, + { + "epoch": 0.89, + "learning_rate": 1.778408086964227e-05, + "loss": 2.163, + "step": 4379 + }, + { + "epoch": 0.89, + "learning_rate": 1.7782996403400737e-05, + "loss": 2.1206, + "step": 4380 + }, + { + "epoch": 0.89, + "learning_rate": 1.7781911704938698e-05, + "loss": 2.0833, + "step": 4381 + }, + { + "epoch": 0.89, + "learning_rate": 1.7780826774288523e-05, + "loss": 2.1244, + "step": 4382 + }, + { + "epoch": 0.89, + "learning_rate": 1.7779741611482575e-05, + "loss": 2.1726, + "step": 4383 + }, + { + "epoch": 0.89, + "learning_rate": 1.7778656216553242e-05, + "loss": 2.2068, + "step": 4384 + }, + { + "epoch": 0.89, + "learning_rate": 1.77775705895329e-05, + "loss": 2.1742, + "step": 4385 + }, + { + "epoch": 0.89, + "learning_rate": 1.777648473045395e-05, + "loss": 2.2498, + "step": 4386 + }, + { + "epoch": 0.89, + "learning_rate": 1.7775398639348784e-05, + "loss": 2.1521, + "step": 4387 + }, + { + "epoch": 0.89, + "learning_rate": 1.7774312316249807e-05, + "loss": 2.1117, + "step": 4388 + }, + { + "epoch": 0.89, + "learning_rate": 1.777322576118943e-05, + "loss": 2.1884, + "step": 4389 + }, + { + "epoch": 0.89, + "learning_rate": 1.777213897420008e-05, + "loss": 2.088, + "step": 4390 + }, + { + "epoch": 0.89, + "learning_rate": 1.777105195531418e-05, + "loss": 2.0322, + "step": 4391 + }, + { + "epoch": 0.89, + "learning_rate": 1.7769964704564156e-05, + "loss": 2.2066, + "step": 4392 + }, + { + "epoch": 0.89, + "learning_rate": 1.7768877221982462e-05, + "loss": 2.146, + "step": 4393 + }, + { + "epoch": 0.89, + "learning_rate": 1.7767789507601535e-05, + "loss": 2.1956, + "step": 4394 + }, + { + "epoch": 0.89, + "learning_rate": 1.776670156145383e-05, + "loss": 2.1518, + "step": 4395 + }, + { + "epoch": 0.89, + "learning_rate": 1.7765613383571813e-05, + "loss": 2.2137, + "step": 4396 + }, + { + "epoch": 0.89, + "learning_rate": 1.7764524973987944e-05, + "loss": 2.162, + "step": 4397 + }, + { + "epoch": 0.89, + "learning_rate": 1.7763436332734706e-05, + "loss": 2.1955, + "step": 4398 + }, + { + "epoch": 0.89, + "learning_rate": 1.7762347459844574e-05, + "loss": 2.1836, + "step": 4399 + }, + { + "epoch": 0.89, + "learning_rate": 1.776125835535004e-05, + "loss": 2.1883, + "step": 4400 + }, + { + "epoch": 0.89, + "learning_rate": 1.77601690192836e-05, + "loss": 2.1327, + "step": 4401 + }, + { + "epoch": 0.89, + "learning_rate": 1.7759079451677758e-05, + "loss": 2.1663, + "step": 4402 + }, + { + "epoch": 0.89, + "learning_rate": 1.7757989652565016e-05, + "loss": 2.2197, + "step": 4403 + }, + { + "epoch": 0.89, + "learning_rate": 1.77568996219779e-05, + "loss": 2.1194, + "step": 4404 + }, + { + "epoch": 0.89, + "learning_rate": 1.7755809359948923e-05, + "loss": 2.0754, + "step": 4405 + }, + { + "epoch": 0.89, + "learning_rate": 1.7754718866510623e-05, + "loss": 2.1883, + "step": 4406 + }, + { + "epoch": 0.89, + "learning_rate": 1.7753628141695533e-05, + "loss": 2.2114, + "step": 4407 + }, + { + "epoch": 0.89, + "learning_rate": 1.77525371855362e-05, + "loss": 2.197, + "step": 4408 + }, + { + "epoch": 0.89, + "learning_rate": 1.775144599806517e-05, + "loss": 2.1855, + "step": 4409 + }, + { + "epoch": 0.9, + "learning_rate": 1.7750354579315004e-05, + "loss": 2.2406, + "step": 4410 + }, + { + "epoch": 0.9, + "learning_rate": 1.7749262929318267e-05, + "loss": 2.1594, + "step": 4411 + }, + { + "epoch": 0.9, + "learning_rate": 1.774817104810753e-05, + "loss": 2.2019, + "step": 4412 + }, + { + "epoch": 0.9, + "learning_rate": 1.7747078935715367e-05, + "loss": 2.1903, + "step": 4413 + }, + { + "epoch": 0.9, + "learning_rate": 1.774598659217437e-05, + "loss": 2.1287, + "step": 4414 + }, + { + "epoch": 0.9, + "learning_rate": 1.774489401751713e-05, + "loss": 2.1325, + "step": 4415 + }, + { + "epoch": 0.9, + "learning_rate": 1.7743801211776242e-05, + "loss": 2.1562, + "step": 4416 + }, + { + "epoch": 0.9, + "learning_rate": 1.7742708174984312e-05, + "loss": 2.1232, + "step": 4417 + }, + { + "epoch": 0.9, + "learning_rate": 1.7741614907173955e-05, + "loss": 2.1831, + "step": 4418 + }, + { + "epoch": 0.9, + "learning_rate": 1.7740521408377794e-05, + "loss": 2.1225, + "step": 4419 + }, + { + "epoch": 0.9, + "learning_rate": 1.7739427678628454e-05, + "loss": 2.2584, + "step": 4420 + }, + { + "epoch": 0.9, + "learning_rate": 1.7738333717958563e-05, + "loss": 2.1596, + "step": 4421 + }, + { + "epoch": 0.9, + "learning_rate": 1.773723952640077e-05, + "loss": 2.1652, + "step": 4422 + }, + { + "epoch": 0.9, + "learning_rate": 1.773614510398771e-05, + "loss": 2.1313, + "step": 4423 + }, + { + "epoch": 0.9, + "learning_rate": 1.7735050450752048e-05, + "loss": 2.1052, + "step": 4424 + }, + { + "epoch": 0.9, + "learning_rate": 1.773395556672644e-05, + "loss": 2.0763, + "step": 4425 + }, + { + "epoch": 0.9, + "learning_rate": 1.7732860451943558e-05, + "loss": 2.1946, + "step": 4426 + }, + { + "epoch": 0.9, + "learning_rate": 1.7731765106436073e-05, + "loss": 2.114, + "step": 4427 + }, + { + "epoch": 0.9, + "learning_rate": 1.773066953023667e-05, + "loss": 2.2144, + "step": 4428 + }, + { + "epoch": 0.9, + "learning_rate": 1.7729573723378036e-05, + "loss": 2.0502, + "step": 4429 + }, + { + "epoch": 0.9, + "learning_rate": 1.7728477685892866e-05, + "loss": 2.1415, + "step": 4430 + }, + { + "epoch": 0.9, + "learning_rate": 1.7727381417813862e-05, + "loss": 2.1801, + "step": 4431 + }, + { + "epoch": 0.9, + "learning_rate": 1.7726284919173733e-05, + "loss": 2.1396, + "step": 4432 + }, + { + "epoch": 0.9, + "learning_rate": 1.7725188190005196e-05, + "loss": 2.1363, + "step": 4433 + }, + { + "epoch": 0.9, + "learning_rate": 1.7724091230340975e-05, + "loss": 2.1686, + "step": 4434 + }, + { + "epoch": 0.9, + "learning_rate": 1.77229940402138e-05, + "loss": 2.1524, + "step": 4435 + }, + { + "epoch": 0.9, + "learning_rate": 1.77218966196564e-05, + "loss": 2.1093, + "step": 4436 + }, + { + "epoch": 0.9, + "learning_rate": 1.7720798968701535e-05, + "loss": 2.1255, + "step": 4437 + }, + { + "epoch": 0.9, + "learning_rate": 1.7719701087381943e-05, + "loss": 2.1246, + "step": 4438 + }, + { + "epoch": 0.9, + "learning_rate": 1.771860297573038e-05, + "loss": 2.0794, + "step": 4439 + }, + { + "epoch": 0.9, + "learning_rate": 1.7717504633779618e-05, + "loss": 2.1719, + "step": 4440 + }, + { + "epoch": 0.9, + "learning_rate": 1.7716406061562424e-05, + "loss": 2.1164, + "step": 4441 + }, + { + "epoch": 0.9, + "learning_rate": 1.7715307259111575e-05, + "loss": 2.2739, + "step": 4442 + }, + { + "epoch": 0.9, + "learning_rate": 1.7714208226459858e-05, + "loss": 2.1272, + "step": 4443 + }, + { + "epoch": 0.9, + "learning_rate": 1.7713108963640067e-05, + "loss": 2.2279, + "step": 4444 + }, + { + "epoch": 0.9, + "learning_rate": 1.7712009470684995e-05, + "loss": 2.112, + "step": 4445 + }, + { + "epoch": 0.9, + "learning_rate": 1.771090974762745e-05, + "loss": 2.1687, + "step": 4446 + }, + { + "epoch": 0.9, + "learning_rate": 1.7709809794500244e-05, + "loss": 2.0754, + "step": 4447 + }, + { + "epoch": 0.9, + "learning_rate": 1.7708709611336197e-05, + "loss": 2.1528, + "step": 4448 + }, + { + "epoch": 0.9, + "learning_rate": 1.7707609198168136e-05, + "loss": 2.0846, + "step": 4449 + }, + { + "epoch": 0.9, + "learning_rate": 1.7706508555028895e-05, + "loss": 2.1743, + "step": 4450 + }, + { + "epoch": 0.9, + "learning_rate": 1.770540768195131e-05, + "loss": 2.1563, + "step": 4451 + }, + { + "epoch": 0.9, + "learning_rate": 1.7704306578968228e-05, + "loss": 2.1647, + "step": 4452 + }, + { + "epoch": 0.9, + "learning_rate": 1.77032052461125e-05, + "loss": 2.039, + "step": 4453 + }, + { + "epoch": 0.9, + "learning_rate": 1.7702103683416996e-05, + "loss": 2.0135, + "step": 4454 + }, + { + "epoch": 0.9, + "learning_rate": 1.770100189091457e-05, + "loss": 2.259, + "step": 4455 + }, + { + "epoch": 0.9, + "learning_rate": 1.7699899868638112e-05, + "loss": 2.1693, + "step": 4456 + }, + { + "epoch": 0.9, + "learning_rate": 1.769879761662049e-05, + "loss": 2.115, + "step": 4457 + }, + { + "epoch": 0.9, + "learning_rate": 1.7697695134894596e-05, + "loss": 2.0927, + "step": 4458 + }, + { + "epoch": 0.91, + "learning_rate": 1.7696592423493323e-05, + "loss": 2.1909, + "step": 4459 + }, + { + "epoch": 0.91, + "learning_rate": 1.7695489482449577e-05, + "loss": 2.0738, + "step": 4460 + }, + { + "epoch": 0.91, + "learning_rate": 1.769438631179626e-05, + "loss": 2.0846, + "step": 4461 + }, + { + "epoch": 0.91, + "learning_rate": 1.7693282911566293e-05, + "loss": 2.2055, + "step": 4462 + }, + { + "epoch": 0.91, + "learning_rate": 1.7692179281792597e-05, + "loss": 2.0597, + "step": 4463 + }, + { + "epoch": 0.91, + "learning_rate": 1.7691075422508097e-05, + "loss": 2.1549, + "step": 4464 + }, + { + "epoch": 0.91, + "learning_rate": 1.7689971333745736e-05, + "loss": 2.2512, + "step": 4465 + }, + { + "epoch": 0.91, + "learning_rate": 1.7688867015538446e-05, + "loss": 2.2042, + "step": 4466 + }, + { + "epoch": 0.91, + "learning_rate": 1.7687762467919184e-05, + "loss": 2.1536, + "step": 4467 + }, + { + "epoch": 0.91, + "learning_rate": 1.7686657690920906e-05, + "loss": 2.101, + "step": 4468 + }, + { + "epoch": 0.91, + "learning_rate": 1.7685552684576572e-05, + "loss": 2.2062, + "step": 4469 + }, + { + "epoch": 0.91, + "learning_rate": 1.7684447448919156e-05, + "loss": 2.1086, + "step": 4470 + }, + { + "epoch": 0.91, + "learning_rate": 1.7683341983981633e-05, + "loss": 2.1553, + "step": 4471 + }, + { + "epoch": 0.91, + "learning_rate": 1.7682236289796985e-05, + "loss": 2.127, + "step": 4472 + }, + { + "epoch": 0.91, + "learning_rate": 1.7681130366398204e-05, + "loss": 2.1909, + "step": 4473 + }, + { + "epoch": 0.91, + "learning_rate": 1.7680024213818284e-05, + "loss": 2.093, + "step": 4474 + }, + { + "epoch": 0.91, + "learning_rate": 1.7678917832090233e-05, + "loss": 2.1194, + "step": 4475 + }, + { + "epoch": 0.91, + "learning_rate": 1.7677811221247065e-05, + "loss": 2.0608, + "step": 4476 + }, + { + "epoch": 0.91, + "learning_rate": 1.767670438132179e-05, + "loss": 2.1063, + "step": 4477 + }, + { + "epoch": 0.91, + "learning_rate": 1.767559731234744e-05, + "loss": 2.2127, + "step": 4478 + }, + { + "epoch": 0.91, + "learning_rate": 1.7674490014357043e-05, + "loss": 2.1201, + "step": 4479 + }, + { + "epoch": 0.91, + "learning_rate": 1.7673382487383635e-05, + "loss": 2.1627, + "step": 4480 + }, + { + "epoch": 0.91, + "learning_rate": 1.7672274731460264e-05, + "loss": 2.1489, + "step": 4481 + }, + { + "epoch": 0.91, + "learning_rate": 1.767116674661998e-05, + "loss": 2.1022, + "step": 4482 + }, + { + "epoch": 0.91, + "learning_rate": 1.7670058532895848e-05, + "loss": 2.0746, + "step": 4483 + }, + { + "epoch": 0.91, + "learning_rate": 1.766895009032093e-05, + "loss": 2.0927, + "step": 4484 + }, + { + "epoch": 0.91, + "learning_rate": 1.7667841418928292e-05, + "loss": 2.1712, + "step": 4485 + }, + { + "epoch": 0.91, + "learning_rate": 1.7666732518751023e-05, + "loss": 2.051, + "step": 4486 + }, + { + "epoch": 0.91, + "learning_rate": 1.7665623389822203e-05, + "loss": 2.1797, + "step": 4487 + }, + { + "epoch": 0.91, + "learning_rate": 1.7664514032174928e-05, + "loss": 2.0941, + "step": 4488 + }, + { + "epoch": 0.91, + "learning_rate": 1.76634044458423e-05, + "loss": 2.1899, + "step": 4489 + }, + { + "epoch": 0.91, + "learning_rate": 1.766229463085742e-05, + "loss": 2.12, + "step": 4490 + }, + { + "epoch": 0.91, + "learning_rate": 1.7661184587253403e-05, + "loss": 2.1625, + "step": 4491 + }, + { + "epoch": 0.91, + "learning_rate": 1.766007431506337e-05, + "loss": 2.2193, + "step": 4492 + }, + { + "epoch": 0.91, + "learning_rate": 1.765896381432045e-05, + "loss": 2.152, + "step": 4493 + }, + { + "epoch": 0.91, + "learning_rate": 1.7657853085057775e-05, + "loss": 2.1633, + "step": 4494 + }, + { + "epoch": 0.91, + "learning_rate": 1.7656742127308484e-05, + "loss": 2.1388, + "step": 4495 + }, + { + "epoch": 0.91, + "learning_rate": 1.765563094110573e-05, + "loss": 2.0684, + "step": 4496 + }, + { + "epoch": 0.91, + "learning_rate": 1.765451952648266e-05, + "loss": 2.0927, + "step": 4497 + }, + { + "epoch": 0.91, + "learning_rate": 1.7653407883472442e-05, + "loss": 2.1091, + "step": 4498 + }, + { + "epoch": 0.91, + "learning_rate": 1.765229601210824e-05, + "loss": 2.0604, + "step": 4499 + }, + { + "epoch": 0.91, + "learning_rate": 1.7651183912423228e-05, + "loss": 2.1045, + "step": 4500 + }, + { + "epoch": 0.91, + "learning_rate": 1.7650071584450592e-05, + "loss": 2.1019, + "step": 4501 + }, + { + "epoch": 0.91, + "learning_rate": 1.764895902822352e-05, + "loss": 2.1589, + "step": 4502 + }, + { + "epoch": 0.91, + "learning_rate": 1.76478462437752e-05, + "loss": 2.1223, + "step": 4503 + }, + { + "epoch": 0.91, + "learning_rate": 1.764673323113884e-05, + "loss": 2.1532, + "step": 4504 + }, + { + "epoch": 0.91, + "learning_rate": 1.764561999034765e-05, + "loss": 2.1544, + "step": 4505 + }, + { + "epoch": 0.91, + "learning_rate": 1.7644506521434844e-05, + "loss": 2.1785, + "step": 4506 + }, + { + "epoch": 0.91, + "learning_rate": 1.7643392824433643e-05, + "loss": 2.1671, + "step": 4507 + }, + { + "epoch": 0.92, + "learning_rate": 1.7642278899377278e-05, + "loss": 2.1401, + "step": 4508 + }, + { + "epoch": 0.92, + "learning_rate": 1.7641164746298983e-05, + "loss": 2.2292, + "step": 4509 + }, + { + "epoch": 0.92, + "learning_rate": 1.7640050365232e-05, + "loss": 2.127, + "step": 4510 + }, + { + "epoch": 0.92, + "learning_rate": 1.7638935756209586e-05, + "loss": 2.0956, + "step": 4511 + }, + { + "epoch": 0.92, + "learning_rate": 1.763782091926499e-05, + "loss": 2.174, + "step": 4512 + }, + { + "epoch": 0.92, + "learning_rate": 1.7636705854431482e-05, + "loss": 2.1441, + "step": 4513 + }, + { + "epoch": 0.92, + "learning_rate": 1.763559056174232e-05, + "loss": 2.1897, + "step": 4514 + }, + { + "epoch": 0.92, + "learning_rate": 1.7634475041230796e-05, + "loss": 2.1247, + "step": 4515 + }, + { + "epoch": 0.92, + "learning_rate": 1.7633359292930184e-05, + "loss": 2.1525, + "step": 4516 + }, + { + "epoch": 0.92, + "learning_rate": 1.7632243316873773e-05, + "loss": 2.1255, + "step": 4517 + }, + { + "epoch": 0.92, + "learning_rate": 1.763112711309487e-05, + "loss": 2.1125, + "step": 4518 + }, + { + "epoch": 0.92, + "learning_rate": 1.763001068162677e-05, + "loss": 2.1339, + "step": 4519 + }, + { + "epoch": 0.92, + "learning_rate": 1.7628894022502786e-05, + "loss": 2.0714, + "step": 4520 + }, + { + "epoch": 0.92, + "learning_rate": 1.762777713575624e-05, + "loss": 2.1606, + "step": 4521 + }, + { + "epoch": 0.92, + "learning_rate": 1.7626660021420446e-05, + "loss": 2.1132, + "step": 4522 + }, + { + "epoch": 0.92, + "learning_rate": 1.762554267952875e-05, + "loss": 2.1801, + "step": 4523 + }, + { + "epoch": 0.92, + "learning_rate": 1.762442511011448e-05, + "loss": 2.1833, + "step": 4524 + }, + { + "epoch": 0.92, + "learning_rate": 1.7623307313210985e-05, + "loss": 2.1878, + "step": 4525 + }, + { + "epoch": 0.92, + "learning_rate": 1.7622189288851613e-05, + "loss": 2.1172, + "step": 4526 + }, + { + "epoch": 0.92, + "learning_rate": 1.7621071037069723e-05, + "loss": 2.1233, + "step": 4527 + }, + { + "epoch": 0.92, + "learning_rate": 1.7619952557898684e-05, + "loss": 2.231, + "step": 4528 + }, + { + "epoch": 0.92, + "learning_rate": 1.7618833851371862e-05, + "loss": 2.2031, + "step": 4529 + }, + { + "epoch": 0.92, + "learning_rate": 1.761771491752264e-05, + "loss": 2.0763, + "step": 4530 + }, + { + "epoch": 0.92, + "learning_rate": 1.7616595756384406e-05, + "loss": 2.0856, + "step": 4531 + }, + { + "epoch": 0.92, + "learning_rate": 1.7615476367990542e-05, + "loss": 2.0635, + "step": 4532 + }, + { + "epoch": 0.92, + "learning_rate": 1.761435675237446e-05, + "loss": 2.2057, + "step": 4533 + }, + { + "epoch": 0.92, + "learning_rate": 1.7613236909569557e-05, + "loss": 2.1771, + "step": 4534 + }, + { + "epoch": 0.92, + "learning_rate": 1.761211683960925e-05, + "loss": 2.2185, + "step": 4535 + }, + { + "epoch": 0.92, + "learning_rate": 1.7610996542526954e-05, + "loss": 2.1965, + "step": 4536 + }, + { + "epoch": 0.92, + "learning_rate": 1.7609876018356102e-05, + "loss": 2.1581, + "step": 4537 + }, + { + "epoch": 0.92, + "learning_rate": 1.760875526713012e-05, + "loss": 2.1077, + "step": 4538 + }, + { + "epoch": 0.92, + "learning_rate": 1.7607634288882454e-05, + "loss": 2.0683, + "step": 4539 + }, + { + "epoch": 0.92, + "learning_rate": 1.7606513083646544e-05, + "loss": 2.1019, + "step": 4540 + }, + { + "epoch": 0.92, + "learning_rate": 1.760539165145585e-05, + "loss": 2.1867, + "step": 4541 + }, + { + "epoch": 0.92, + "learning_rate": 1.7604269992343823e-05, + "loss": 2.1219, + "step": 4542 + }, + { + "epoch": 0.92, + "learning_rate": 1.7603148106343942e-05, + "loss": 2.1872, + "step": 4543 + }, + { + "epoch": 0.92, + "learning_rate": 1.7602025993489667e-05, + "loss": 2.1063, + "step": 4544 + }, + { + "epoch": 0.92, + "learning_rate": 1.760090365381449e-05, + "loss": 2.1782, + "step": 4545 + }, + { + "epoch": 0.92, + "learning_rate": 1.7599781087351894e-05, + "loss": 2.1409, + "step": 4546 + }, + { + "epoch": 0.92, + "learning_rate": 1.7598658294135372e-05, + "loss": 2.209, + "step": 4547 + }, + { + "epoch": 0.92, + "learning_rate": 1.7597535274198424e-05, + "loss": 2.13, + "step": 4548 + }, + { + "epoch": 0.92, + "learning_rate": 1.759641202757456e-05, + "loss": 2.0841, + "step": 4549 + }, + { + "epoch": 0.92, + "learning_rate": 1.7595288554297295e-05, + "loss": 2.1068, + "step": 4550 + }, + { + "epoch": 0.92, + "learning_rate": 1.7594164854400145e-05, + "loss": 2.1017, + "step": 4551 + }, + { + "epoch": 0.92, + "learning_rate": 1.7593040927916637e-05, + "loss": 2.1247, + "step": 4552 + }, + { + "epoch": 0.92, + "learning_rate": 1.7591916774880313e-05, + "loss": 2.1216, + "step": 4553 + }, + { + "epoch": 0.92, + "learning_rate": 1.759079239532471e-05, + "loss": 2.128, + "step": 4554 + }, + { + "epoch": 0.92, + "learning_rate": 1.7589667789283374e-05, + "loss": 2.1328, + "step": 4555 + }, + { + "epoch": 0.92, + "learning_rate": 1.7588542956789868e-05, + "loss": 2.1602, + "step": 4556 + }, + { + "epoch": 0.92, + "learning_rate": 1.758741789787774e-05, + "loss": 2.1305, + "step": 4557 + }, + { + "epoch": 0.93, + "learning_rate": 1.758629261258057e-05, + "loss": 2.1546, + "step": 4558 + }, + { + "epoch": 0.93, + "learning_rate": 1.7585167100931926e-05, + "loss": 2.1744, + "step": 4559 + }, + { + "epoch": 0.93, + "learning_rate": 1.7584041362965397e-05, + "loss": 2.1623, + "step": 4560 + }, + { + "epoch": 0.93, + "learning_rate": 1.7582915398714565e-05, + "loss": 2.1456, + "step": 4561 + }, + { + "epoch": 0.93, + "learning_rate": 1.7581789208213026e-05, + "loss": 2.1063, + "step": 4562 + }, + { + "epoch": 0.93, + "learning_rate": 1.758066279149438e-05, + "loss": 2.1769, + "step": 4563 + }, + { + "epoch": 0.93, + "learning_rate": 1.7579536148592247e-05, + "loss": 2.1757, + "step": 4564 + }, + { + "epoch": 0.93, + "learning_rate": 1.7578409279540228e-05, + "loss": 2.1412, + "step": 4565 + }, + { + "epoch": 0.93, + "learning_rate": 1.7577282184371957e-05, + "loss": 2.1144, + "step": 4566 + }, + { + "epoch": 0.93, + "learning_rate": 1.7576154863121058e-05, + "loss": 2.1685, + "step": 4567 + }, + { + "epoch": 0.93, + "learning_rate": 1.7575027315821165e-05, + "loss": 2.0887, + "step": 4568 + }, + { + "epoch": 0.93, + "learning_rate": 1.7573899542505922e-05, + "loss": 2.1614, + "step": 4569 + }, + { + "epoch": 0.93, + "learning_rate": 1.7572771543208978e-05, + "loss": 2.1091, + "step": 4570 + }, + { + "epoch": 0.93, + "learning_rate": 1.7571643317963992e-05, + "loss": 2.0964, + "step": 4571 + }, + { + "epoch": 0.93, + "learning_rate": 1.7570514866804623e-05, + "loss": 2.118, + "step": 4572 + }, + { + "epoch": 0.93, + "learning_rate": 1.7569386189764542e-05, + "loss": 2.182, + "step": 4573 + }, + { + "epoch": 0.93, + "learning_rate": 1.7568257286877423e-05, + "loss": 2.1544, + "step": 4574 + }, + { + "epoch": 0.93, + "learning_rate": 1.7567128158176955e-05, + "loss": 2.2205, + "step": 4575 + }, + { + "epoch": 0.93, + "learning_rate": 1.756599880369682e-05, + "loss": 2.2122, + "step": 4576 + }, + { + "epoch": 0.93, + "learning_rate": 1.756486922347072e-05, + "loss": 2.144, + "step": 4577 + }, + { + "epoch": 0.93, + "learning_rate": 1.7563739417532358e-05, + "loss": 2.1153, + "step": 4578 + }, + { + "epoch": 0.93, + "learning_rate": 1.756260938591544e-05, + "loss": 2.1524, + "step": 4579 + }, + { + "epoch": 0.93, + "learning_rate": 1.7561479128653687e-05, + "loss": 2.1423, + "step": 4580 + }, + { + "epoch": 0.93, + "learning_rate": 1.756034864578082e-05, + "loss": 2.2294, + "step": 4581 + }, + { + "epoch": 0.93, + "learning_rate": 1.7559217937330565e-05, + "loss": 2.0893, + "step": 4582 + }, + { + "epoch": 0.93, + "learning_rate": 1.7558087003336668e-05, + "loss": 2.0733, + "step": 4583 + }, + { + "epoch": 0.93, + "learning_rate": 1.7556955843832862e-05, + "loss": 2.2577, + "step": 4584 + }, + { + "epoch": 0.93, + "learning_rate": 1.755582445885291e-05, + "loss": 2.1648, + "step": 4585 + }, + { + "epoch": 0.93, + "learning_rate": 1.755469284843056e-05, + "loss": 2.1578, + "step": 4586 + }, + { + "epoch": 0.93, + "learning_rate": 1.755356101259958e-05, + "loss": 2.1128, + "step": 4587 + }, + { + "epoch": 0.93, + "learning_rate": 1.7552428951393733e-05, + "loss": 2.0782, + "step": 4588 + }, + { + "epoch": 0.93, + "learning_rate": 1.75512966648468e-05, + "loss": 2.1996, + "step": 4589 + }, + { + "epoch": 0.93, + "learning_rate": 1.7550164152992573e-05, + "loss": 2.1827, + "step": 4590 + }, + { + "epoch": 0.93, + "learning_rate": 1.7549031415864834e-05, + "loss": 2.121, + "step": 4591 + }, + { + "epoch": 0.93, + "learning_rate": 1.7547898453497384e-05, + "loss": 2.1067, + "step": 4592 + }, + { + "epoch": 0.93, + "learning_rate": 1.7546765265924022e-05, + "loss": 2.1367, + "step": 4593 + }, + { + "epoch": 0.93, + "learning_rate": 1.7545631853178563e-05, + "loss": 2.0883, + "step": 4594 + }, + { + "epoch": 0.93, + "learning_rate": 1.754449821529483e-05, + "loss": 2.1484, + "step": 4595 + }, + { + "epoch": 0.93, + "learning_rate": 1.754336435230663e-05, + "loss": 2.158, + "step": 4596 + }, + { + "epoch": 0.93, + "learning_rate": 1.7542230264247816e-05, + "loss": 2.1426, + "step": 4597 + }, + { + "epoch": 0.93, + "learning_rate": 1.754109595115221e-05, + "loss": 2.1221, + "step": 4598 + }, + { + "epoch": 0.93, + "learning_rate": 1.753996141305366e-05, + "loss": 2.1091, + "step": 4599 + }, + { + "epoch": 0.93, + "learning_rate": 1.753882664998602e-05, + "loss": 2.1269, + "step": 4600 + }, + { + "epoch": 0.93, + "learning_rate": 1.7537691661983145e-05, + "loss": 2.2031, + "step": 4601 + }, + { + "epoch": 0.93, + "learning_rate": 1.7536556449078902e-05, + "loss": 2.1488, + "step": 4602 + }, + { + "epoch": 0.93, + "learning_rate": 1.753542101130716e-05, + "loss": 2.117, + "step": 4603 + }, + { + "epoch": 0.93, + "learning_rate": 1.75342853487018e-05, + "loss": 2.1886, + "step": 4604 + }, + { + "epoch": 0.93, + "learning_rate": 1.75331494612967e-05, + "loss": 2.1584, + "step": 4605 + }, + { + "epoch": 0.93, + "learning_rate": 1.7532013349125757e-05, + "loss": 2.1502, + "step": 4606 + }, + { + "epoch": 0.94, + "learning_rate": 1.7530877012222867e-05, + "loss": 2.1154, + "step": 4607 + }, + { + "epoch": 0.94, + "learning_rate": 1.752974045062194e-05, + "loss": 2.1904, + "step": 4608 + }, + { + "epoch": 0.94, + "learning_rate": 1.752860366435688e-05, + "loss": 2.1171, + "step": 4609 + }, + { + "epoch": 0.94, + "learning_rate": 1.752746665346161e-05, + "loss": 2.0684, + "step": 4610 + }, + { + "epoch": 0.94, + "learning_rate": 1.7526329417970052e-05, + "loss": 2.1877, + "step": 4611 + }, + { + "epoch": 0.94, + "learning_rate": 1.7525191957916138e-05, + "loss": 2.0902, + "step": 4612 + }, + { + "epoch": 0.94, + "learning_rate": 1.7524054273333806e-05, + "loss": 2.0956, + "step": 4613 + }, + { + "epoch": 0.94, + "learning_rate": 1.7522916364257007e-05, + "loss": 2.1718, + "step": 4614 + }, + { + "epoch": 0.94, + "learning_rate": 1.7521778230719685e-05, + "loss": 2.0063, + "step": 4615 + }, + { + "epoch": 0.94, + "learning_rate": 1.75206398727558e-05, + "loss": 2.1545, + "step": 4616 + }, + { + "epoch": 0.94, + "learning_rate": 1.751950129039932e-05, + "loss": 2.2452, + "step": 4617 + }, + { + "epoch": 0.94, + "learning_rate": 1.751836248368421e-05, + "loss": 2.1449, + "step": 4618 + }, + { + "epoch": 0.94, + "learning_rate": 1.751722345264446e-05, + "loss": 2.245, + "step": 4619 + }, + { + "epoch": 0.94, + "learning_rate": 1.7516084197314044e-05, + "loss": 2.0619, + "step": 4620 + }, + { + "epoch": 0.94, + "learning_rate": 1.7514944717726962e-05, + "loss": 2.1855, + "step": 4621 + }, + { + "epoch": 0.94, + "learning_rate": 1.7513805013917207e-05, + "loss": 2.1707, + "step": 4622 + }, + { + "epoch": 0.94, + "learning_rate": 1.7512665085918787e-05, + "loss": 2.1566, + "step": 4623 + }, + { + "epoch": 0.94, + "learning_rate": 1.7511524933765715e-05, + "loss": 2.1142, + "step": 4624 + }, + { + "epoch": 0.94, + "learning_rate": 1.7510384557492e-05, + "loss": 2.0924, + "step": 4625 + }, + { + "epoch": 0.94, + "learning_rate": 1.750924395713168e-05, + "loss": 2.1394, + "step": 4626 + }, + { + "epoch": 0.94, + "learning_rate": 1.7508103132718785e-05, + "loss": 2.1693, + "step": 4627 + }, + { + "epoch": 0.94, + "learning_rate": 1.750696208428735e-05, + "loss": 2.1276, + "step": 4628 + }, + { + "epoch": 0.94, + "learning_rate": 1.7505820811871416e-05, + "loss": 2.1651, + "step": 4629 + }, + { + "epoch": 0.94, + "learning_rate": 1.7504679315505046e-05, + "loss": 2.2158, + "step": 4630 + }, + { + "epoch": 0.94, + "learning_rate": 1.7503537595222287e-05, + "loss": 2.2669, + "step": 4631 + }, + { + "epoch": 0.94, + "learning_rate": 1.7502395651057214e-05, + "loss": 2.1454, + "step": 4632 + }, + { + "epoch": 0.94, + "learning_rate": 1.7501253483043895e-05, + "loss": 2.1201, + "step": 4633 + }, + { + "epoch": 0.94, + "learning_rate": 1.750011109121641e-05, + "loss": 2.2083, + "step": 4634 + }, + { + "epoch": 0.94, + "learning_rate": 1.749896847560884e-05, + "loss": 2.0656, + "step": 4635 + }, + { + "epoch": 0.94, + "learning_rate": 1.7497825636255282e-05, + "loss": 2.1883, + "step": 4636 + }, + { + "epoch": 0.94, + "learning_rate": 1.7496682573189834e-05, + "loss": 2.1162, + "step": 4637 + }, + { + "epoch": 0.94, + "learning_rate": 1.74955392864466e-05, + "loss": 2.116, + "step": 4638 + }, + { + "epoch": 0.94, + "learning_rate": 1.749439577605969e-05, + "loss": 2.1231, + "step": 4639 + }, + { + "epoch": 0.94, + "learning_rate": 1.7493252042063232e-05, + "loss": 2.0958, + "step": 4640 + }, + { + "epoch": 0.94, + "learning_rate": 1.749210808449134e-05, + "loss": 2.1475, + "step": 4641 + }, + { + "epoch": 0.94, + "learning_rate": 1.7490963903378156e-05, + "loss": 2.0922, + "step": 4642 + }, + { + "epoch": 0.94, + "learning_rate": 1.748981949875781e-05, + "loss": 2.1752, + "step": 4643 + }, + { + "epoch": 0.94, + "learning_rate": 1.748867487066445e-05, + "loss": 2.1288, + "step": 4644 + }, + { + "epoch": 0.94, + "learning_rate": 1.748753001913224e-05, + "loss": 2.1162, + "step": 4645 + }, + { + "epoch": 0.94, + "learning_rate": 1.748638494419532e-05, + "loss": 2.1507, + "step": 4646 + }, + { + "epoch": 0.94, + "learning_rate": 1.7485239645887864e-05, + "loss": 2.2091, + "step": 4647 + }, + { + "epoch": 0.94, + "learning_rate": 1.7484094124244047e-05, + "loss": 2.1565, + "step": 4648 + }, + { + "epoch": 0.94, + "learning_rate": 1.7482948379298043e-05, + "loss": 2.1182, + "step": 4649 + }, + { + "epoch": 0.94, + "learning_rate": 1.748180241108404e-05, + "loss": 2.1777, + "step": 4650 + }, + { + "epoch": 0.94, + "learning_rate": 1.7480656219636233e-05, + "loss": 2.0802, + "step": 4651 + }, + { + "epoch": 0.94, + "learning_rate": 1.7479509804988816e-05, + "loss": 2.1657, + "step": 4652 + }, + { + "epoch": 0.94, + "learning_rate": 1.7478363167175993e-05, + "loss": 2.1461, + "step": 4653 + }, + { + "epoch": 0.94, + "learning_rate": 1.7477216306231982e-05, + "loss": 2.0752, + "step": 4654 + }, + { + "epoch": 0.94, + "learning_rate": 1.7476069222190998e-05, + "loss": 2.1394, + "step": 4655 + }, + { + "epoch": 0.95, + "learning_rate": 1.7474921915087266e-05, + "loss": 2.1754, + "step": 4656 + }, + { + "epoch": 0.95, + "learning_rate": 1.7473774384955022e-05, + "loss": 2.2375, + "step": 4657 + }, + { + "epoch": 0.95, + "learning_rate": 1.74726266318285e-05, + "loss": 2.2272, + "step": 4658 + }, + { + "epoch": 0.95, + "learning_rate": 1.7471478655741947e-05, + "loss": 2.107, + "step": 4659 + }, + { + "epoch": 0.95, + "learning_rate": 1.7470330456729618e-05, + "loss": 2.2046, + "step": 4660 + }, + { + "epoch": 0.95, + "learning_rate": 1.746918203482577e-05, + "loss": 2.1825, + "step": 4661 + }, + { + "epoch": 0.95, + "learning_rate": 1.7468033390064666e-05, + "loss": 2.1856, + "step": 4662 + }, + { + "epoch": 0.95, + "learning_rate": 1.7466884522480578e-05, + "loss": 2.1379, + "step": 4663 + }, + { + "epoch": 0.95, + "learning_rate": 1.746573543210779e-05, + "loss": 2.2385, + "step": 4664 + }, + { + "epoch": 0.95, + "learning_rate": 1.746458611898058e-05, + "loss": 2.0806, + "step": 4665 + }, + { + "epoch": 0.95, + "learning_rate": 1.7463436583133244e-05, + "loss": 2.1462, + "step": 4666 + }, + { + "epoch": 0.95, + "learning_rate": 1.746228682460008e-05, + "loss": 2.1718, + "step": 4667 + }, + { + "epoch": 0.95, + "learning_rate": 1.7461136843415398e-05, + "loss": 2.0831, + "step": 4668 + }, + { + "epoch": 0.95, + "learning_rate": 1.7459986639613502e-05, + "loss": 2.1306, + "step": 4669 + }, + { + "epoch": 0.95, + "learning_rate": 1.745883621322871e-05, + "loss": 2.0902, + "step": 4670 + }, + { + "epoch": 0.95, + "learning_rate": 1.7457685564295358e-05, + "loss": 2.2179, + "step": 4671 + }, + { + "epoch": 0.95, + "learning_rate": 1.7456534692847768e-05, + "loss": 2.1292, + "step": 4672 + }, + { + "epoch": 0.95, + "learning_rate": 1.7455383598920276e-05, + "loss": 2.0939, + "step": 4673 + }, + { + "epoch": 0.95, + "learning_rate": 1.7454232282547237e-05, + "loss": 2.211, + "step": 4674 + }, + { + "epoch": 0.95, + "learning_rate": 1.7453080743763e-05, + "loss": 2.1008, + "step": 4675 + }, + { + "epoch": 0.95, + "learning_rate": 1.745192898260192e-05, + "loss": 2.1525, + "step": 4676 + }, + { + "epoch": 0.95, + "learning_rate": 1.7450776999098363e-05, + "loss": 2.0997, + "step": 4677 + }, + { + "epoch": 0.95, + "learning_rate": 1.74496247932867e-05, + "loss": 2.1038, + "step": 4678 + }, + { + "epoch": 0.95, + "learning_rate": 1.744847236520131e-05, + "loss": 2.1292, + "step": 4679 + }, + { + "epoch": 0.95, + "learning_rate": 1.744731971487658e-05, + "loss": 2.0847, + "step": 4680 + }, + { + "epoch": 0.95, + "learning_rate": 1.7446166842346893e-05, + "loss": 2.1119, + "step": 4681 + }, + { + "epoch": 0.95, + "learning_rate": 1.744501374764666e-05, + "loss": 2.1921, + "step": 4682 + }, + { + "epoch": 0.95, + "learning_rate": 1.744386043081028e-05, + "loss": 2.15, + "step": 4683 + }, + { + "epoch": 0.95, + "learning_rate": 1.7442706891872162e-05, + "loss": 2.1129, + "step": 4684 + }, + { + "epoch": 0.95, + "learning_rate": 1.7441553130866723e-05, + "loss": 2.1155, + "step": 4685 + }, + { + "epoch": 0.95, + "learning_rate": 1.7440399147828392e-05, + "loss": 2.1482, + "step": 4686 + }, + { + "epoch": 0.95, + "learning_rate": 1.7439244942791602e-05, + "loss": 2.1572, + "step": 4687 + }, + { + "epoch": 0.95, + "learning_rate": 1.7438090515790787e-05, + "loss": 2.1196, + "step": 4688 + }, + { + "epoch": 0.95, + "learning_rate": 1.743693586686039e-05, + "loss": 2.1592, + "step": 4689 + }, + { + "epoch": 0.95, + "learning_rate": 1.7435780996034867e-05, + "loss": 2.1366, + "step": 4690 + }, + { + "epoch": 0.95, + "learning_rate": 1.743462590334867e-05, + "loss": 2.1232, + "step": 4691 + }, + { + "epoch": 0.95, + "learning_rate": 1.743347058883627e-05, + "loss": 2.0543, + "step": 4692 + }, + { + "epoch": 0.95, + "learning_rate": 1.7432315052532133e-05, + "loss": 2.099, + "step": 4693 + }, + { + "epoch": 0.95, + "learning_rate": 1.7431159294470738e-05, + "loss": 2.1129, + "step": 4694 + }, + { + "epoch": 0.95, + "learning_rate": 1.743000331468657e-05, + "loss": 2.1613, + "step": 4695 + }, + { + "epoch": 0.95, + "learning_rate": 1.742884711321412e-05, + "loss": 2.1145, + "step": 4696 + }, + { + "epoch": 0.95, + "learning_rate": 1.742769069008788e-05, + "loss": 2.2573, + "step": 4697 + }, + { + "epoch": 0.95, + "learning_rate": 1.7426534045342364e-05, + "loss": 2.0635, + "step": 4698 + }, + { + "epoch": 0.95, + "learning_rate": 1.7425377179012077e-05, + "loss": 2.123, + "step": 4699 + }, + { + "epoch": 0.95, + "learning_rate": 1.7424220091131536e-05, + "loss": 2.1722, + "step": 4700 + }, + { + "epoch": 0.95, + "learning_rate": 1.7423062781735267e-05, + "loss": 2.1334, + "step": 4701 + }, + { + "epoch": 0.95, + "learning_rate": 1.7421905250857796e-05, + "loss": 2.1361, + "step": 4702 + }, + { + "epoch": 0.95, + "learning_rate": 1.7420747498533665e-05, + "loss": 2.1963, + "step": 4703 + }, + { + "epoch": 0.95, + "learning_rate": 1.7419589524797418e-05, + "loss": 2.1598, + "step": 4704 + }, + { + "epoch": 0.96, + "learning_rate": 1.7418431329683604e-05, + "loss": 2.2115, + "step": 4705 + }, + { + "epoch": 0.96, + "learning_rate": 1.7417272913226777e-05, + "loss": 2.145, + "step": 4706 + }, + { + "epoch": 0.96, + "learning_rate": 1.7416114275461504e-05, + "loss": 2.0671, + "step": 4707 + }, + { + "epoch": 0.96, + "learning_rate": 1.741495541642235e-05, + "loss": 2.1066, + "step": 4708 + }, + { + "epoch": 0.96, + "learning_rate": 1.7413796336143902e-05, + "loss": 2.1088, + "step": 4709 + }, + { + "epoch": 0.96, + "learning_rate": 1.7412637034660735e-05, + "loss": 2.1895, + "step": 4710 + }, + { + "epoch": 0.96, + "learning_rate": 1.7411477512007444e-05, + "loss": 2.1803, + "step": 4711 + }, + { + "epoch": 0.96, + "learning_rate": 1.741031776821862e-05, + "loss": 2.1021, + "step": 4712 + }, + { + "epoch": 0.96, + "learning_rate": 1.7409157803328868e-05, + "loss": 2.126, + "step": 4713 + }, + { + "epoch": 0.96, + "learning_rate": 1.74079976173728e-05, + "loss": 2.1059, + "step": 4714 + }, + { + "epoch": 0.96, + "learning_rate": 1.740683721038503e-05, + "loss": 2.2015, + "step": 4715 + }, + { + "epoch": 0.96, + "learning_rate": 1.7405676582400186e-05, + "loss": 2.1323, + "step": 4716 + }, + { + "epoch": 0.96, + "learning_rate": 1.740451573345289e-05, + "loss": 2.1463, + "step": 4717 + }, + { + "epoch": 0.96, + "learning_rate": 1.7403354663577782e-05, + "loss": 2.1786, + "step": 4718 + }, + { + "epoch": 0.96, + "learning_rate": 1.7402193372809505e-05, + "loss": 2.1379, + "step": 4719 + }, + { + "epoch": 0.96, + "learning_rate": 1.740103186118271e-05, + "loss": 2.2035, + "step": 4720 + }, + { + "epoch": 0.96, + "learning_rate": 1.7399870128732047e-05, + "loss": 2.1244, + "step": 4721 + }, + { + "epoch": 0.96, + "learning_rate": 1.7398708175492184e-05, + "loss": 2.1103, + "step": 4722 + }, + { + "epoch": 0.96, + "learning_rate": 1.7397546001497788e-05, + "loss": 2.0825, + "step": 4723 + }, + { + "epoch": 0.96, + "learning_rate": 1.739638360678354e-05, + "loss": 2.1318, + "step": 4724 + }, + { + "epoch": 0.96, + "learning_rate": 1.739522099138411e-05, + "loss": 2.1344, + "step": 4725 + }, + { + "epoch": 0.96, + "learning_rate": 1.7394058155334196e-05, + "loss": 2.1023, + "step": 4726 + }, + { + "epoch": 0.96, + "learning_rate": 1.7392895098668493e-05, + "loss": 2.2053, + "step": 4727 + }, + { + "epoch": 0.96, + "learning_rate": 1.73917318214217e-05, + "loss": 2.1059, + "step": 4728 + }, + { + "epoch": 0.96, + "learning_rate": 1.7390568323628526e-05, + "loss": 2.1673, + "step": 4729 + }, + { + "epoch": 0.96, + "learning_rate": 1.738940460532369e-05, + "loss": 2.1664, + "step": 4730 + }, + { + "epoch": 0.96, + "learning_rate": 1.7388240666541912e-05, + "loss": 2.0927, + "step": 4731 + }, + { + "epoch": 0.96, + "learning_rate": 1.7387076507317915e-05, + "loss": 2.1673, + "step": 4732 + }, + { + "epoch": 0.96, + "learning_rate": 1.738591212768644e-05, + "loss": 2.1183, + "step": 4733 + }, + { + "epoch": 0.96, + "learning_rate": 1.7384747527682232e-05, + "loss": 2.1748, + "step": 4734 + }, + { + "epoch": 0.96, + "learning_rate": 1.7383582707340028e-05, + "loss": 2.1374, + "step": 4735 + }, + { + "epoch": 0.96, + "learning_rate": 1.738241766669459e-05, + "loss": 2.1748, + "step": 4736 + }, + { + "epoch": 0.96, + "learning_rate": 1.7381252405780676e-05, + "loss": 2.1629, + "step": 4737 + }, + { + "epoch": 0.96, + "learning_rate": 1.7380086924633056e-05, + "loss": 2.1786, + "step": 4738 + }, + { + "epoch": 0.96, + "learning_rate": 1.7378921223286506e-05, + "loss": 2.1034, + "step": 4739 + }, + { + "epoch": 0.96, + "learning_rate": 1.73777553017758e-05, + "loss": 2.1492, + "step": 4740 + }, + { + "epoch": 0.96, + "learning_rate": 1.7376589160135735e-05, + "loss": 2.1182, + "step": 4741 + }, + { + "epoch": 0.96, + "learning_rate": 1.73754227984011e-05, + "loss": 2.1362, + "step": 4742 + }, + { + "epoch": 0.96, + "learning_rate": 1.7374256216606694e-05, + "loss": 2.1066, + "step": 4743 + }, + { + "epoch": 0.96, + "learning_rate": 1.7373089414787326e-05, + "loss": 2.1141, + "step": 4744 + }, + { + "epoch": 0.96, + "learning_rate": 1.7371922392977808e-05, + "loss": 2.1665, + "step": 4745 + }, + { + "epoch": 0.96, + "learning_rate": 1.7370755151212962e-05, + "loss": 2.1646, + "step": 4746 + }, + { + "epoch": 0.96, + "learning_rate": 1.736958768952762e-05, + "loss": 2.1946, + "step": 4747 + }, + { + "epoch": 0.96, + "learning_rate": 1.7368420007956607e-05, + "loss": 2.1712, + "step": 4748 + }, + { + "epoch": 0.96, + "learning_rate": 1.7367252106534767e-05, + "loss": 2.2095, + "step": 4749 + }, + { + "epoch": 0.96, + "learning_rate": 1.7366083985296947e-05, + "loss": 2.0691, + "step": 4750 + }, + { + "epoch": 0.96, + "learning_rate": 1.7364915644277998e-05, + "loss": 2.1988, + "step": 4751 + }, + { + "epoch": 0.96, + "learning_rate": 1.736374708351278e-05, + "loss": 2.1213, + "step": 4752 + }, + { + "epoch": 0.96, + "learning_rate": 1.736257830303616e-05, + "loss": 2.1613, + "step": 4753 + }, + { + "epoch": 0.96, + "learning_rate": 1.736140930288301e-05, + "loss": 2.1254, + "step": 4754 + }, + { + "epoch": 0.97, + "learning_rate": 1.7360240083088213e-05, + "loss": 2.1555, + "step": 4755 + }, + { + "epoch": 0.97, + "learning_rate": 1.735907064368665e-05, + "loss": 2.1592, + "step": 4756 + }, + { + "epoch": 0.97, + "learning_rate": 1.7357900984713215e-05, + "loss": 2.1167, + "step": 4757 + }, + { + "epoch": 0.97, + "learning_rate": 1.735673110620281e-05, + "loss": 2.085, + "step": 4758 + }, + { + "epoch": 0.97, + "learning_rate": 1.7355561008190335e-05, + "loss": 2.1982, + "step": 4759 + }, + { + "epoch": 0.97, + "learning_rate": 1.7354390690710707e-05, + "loss": 2.0999, + "step": 4760 + }, + { + "epoch": 0.97, + "learning_rate": 1.7353220153798845e-05, + "loss": 2.1522, + "step": 4761 + }, + { + "epoch": 0.97, + "learning_rate": 1.735204939748967e-05, + "loss": 2.1307, + "step": 4762 + }, + { + "epoch": 0.97, + "learning_rate": 1.7350878421818115e-05, + "loss": 2.1932, + "step": 4763 + }, + { + "epoch": 0.97, + "learning_rate": 1.734970722681912e-05, + "loss": 2.1855, + "step": 4764 + }, + { + "epoch": 0.97, + "learning_rate": 1.7348535812527628e-05, + "loss": 2.1347, + "step": 4765 + }, + { + "epoch": 0.97, + "learning_rate": 1.734736417897859e-05, + "loss": 2.1538, + "step": 4766 + }, + { + "epoch": 0.97, + "learning_rate": 1.7346192326206965e-05, + "loss": 2.1061, + "step": 4767 + }, + { + "epoch": 0.97, + "learning_rate": 1.734502025424772e-05, + "loss": 2.1438, + "step": 4768 + }, + { + "epoch": 0.97, + "learning_rate": 1.734384796313582e-05, + "loss": 2.1627, + "step": 4769 + }, + { + "epoch": 0.97, + "learning_rate": 1.734267545290625e-05, + "loss": 2.1192, + "step": 4770 + }, + { + "epoch": 0.97, + "learning_rate": 1.7341502723593988e-05, + "loss": 2.0929, + "step": 4771 + }, + { + "epoch": 0.97, + "learning_rate": 1.7340329775234027e-05, + "loss": 2.0474, + "step": 4772 + }, + { + "epoch": 0.97, + "learning_rate": 1.7339156607861368e-05, + "loss": 2.1665, + "step": 4773 + }, + { + "epoch": 0.97, + "learning_rate": 1.7337983221511004e-05, + "loss": 2.158, + "step": 4774 + }, + { + "epoch": 0.97, + "learning_rate": 1.7336809616217954e-05, + "loss": 2.1664, + "step": 4775 + }, + { + "epoch": 0.97, + "learning_rate": 1.7335635792017235e-05, + "loss": 2.1294, + "step": 4776 + }, + { + "epoch": 0.97, + "learning_rate": 1.7334461748943864e-05, + "loss": 2.1136, + "step": 4777 + }, + { + "epoch": 0.97, + "learning_rate": 1.733328748703288e-05, + "loss": 2.1368, + "step": 4778 + }, + { + "epoch": 0.97, + "learning_rate": 1.7332113006319308e-05, + "loss": 2.118, + "step": 4779 + }, + { + "epoch": 0.97, + "learning_rate": 1.7330938306838205e-05, + "loss": 2.0661, + "step": 4780 + }, + { + "epoch": 0.97, + "learning_rate": 1.73297633886246e-05, + "loss": 2.171, + "step": 4781 + }, + { + "epoch": 0.97, + "learning_rate": 1.7328588251713573e-05, + "loss": 2.1631, + "step": 4782 + }, + { + "epoch": 0.97, + "learning_rate": 1.732741289614017e-05, + "loss": 2.1301, + "step": 4783 + }, + { + "epoch": 0.97, + "learning_rate": 1.7326237321939464e-05, + "loss": 2.1405, + "step": 4784 + }, + { + "epoch": 0.97, + "learning_rate": 1.7325061529146528e-05, + "loss": 2.1204, + "step": 4785 + }, + { + "epoch": 0.97, + "learning_rate": 1.7323885517796456e-05, + "loss": 2.219, + "step": 4786 + }, + { + "epoch": 0.97, + "learning_rate": 1.732270928792432e-05, + "loss": 2.1662, + "step": 4787 + }, + { + "epoch": 0.97, + "learning_rate": 1.732153283956522e-05, + "loss": 2.0862, + "step": 4788 + }, + { + "epoch": 0.97, + "learning_rate": 1.7320356172754268e-05, + "loss": 2.0821, + "step": 4789 + }, + { + "epoch": 0.97, + "learning_rate": 1.7319179287526565e-05, + "loss": 2.1596, + "step": 4790 + }, + { + "epoch": 0.97, + "learning_rate": 1.731800218391722e-05, + "loss": 2.1982, + "step": 4791 + }, + { + "epoch": 0.97, + "learning_rate": 1.731682486196136e-05, + "loss": 2.1571, + "step": 4792 + }, + { + "epoch": 0.97, + "learning_rate": 1.731564732169411e-05, + "loss": 2.1028, + "step": 4793 + }, + { + "epoch": 0.97, + "learning_rate": 1.731446956315061e-05, + "loss": 2.1239, + "step": 4794 + }, + { + "epoch": 0.97, + "learning_rate": 1.7313291586365996e-05, + "loss": 2.1127, + "step": 4795 + }, + { + "epoch": 0.97, + "learning_rate": 1.7312113391375414e-05, + "loss": 2.1056, + "step": 4796 + }, + { + "epoch": 0.97, + "learning_rate": 1.731093497821402e-05, + "loss": 2.0994, + "step": 4797 + }, + { + "epoch": 0.97, + "learning_rate": 1.730975634691697e-05, + "loss": 2.311, + "step": 4798 + }, + { + "epoch": 0.97, + "learning_rate": 1.730857749751944e-05, + "loss": 2.1756, + "step": 4799 + }, + { + "epoch": 0.97, + "learning_rate": 1.7307398430056595e-05, + "loss": 2.1163, + "step": 4800 + }, + { + "epoch": 0.97, + "learning_rate": 1.7306219144563617e-05, + "loss": 2.1016, + "step": 4801 + }, + { + "epoch": 0.97, + "learning_rate": 1.7305039641075695e-05, + "loss": 2.1958, + "step": 4802 + }, + { + "epoch": 0.97, + "learning_rate": 1.7303859919628018e-05, + "loss": 2.1081, + "step": 4803 + }, + { + "epoch": 0.98, + "learning_rate": 1.7302679980255782e-05, + "loss": 2.0862, + "step": 4804 + }, + { + "epoch": 0.98, + "learning_rate": 1.7301499822994204e-05, + "loss": 2.1867, + "step": 4805 + }, + { + "epoch": 0.98, + "learning_rate": 1.7300319447878483e-05, + "loss": 2.1638, + "step": 4806 + }, + { + "epoch": 0.98, + "learning_rate": 1.729913885494385e-05, + "loss": 2.0841, + "step": 4807 + }, + { + "epoch": 0.98, + "learning_rate": 1.7297958044225523e-05, + "loss": 2.1475, + "step": 4808 + }, + { + "epoch": 0.98, + "learning_rate": 1.7296777015758733e-05, + "loss": 2.0632, + "step": 4809 + }, + { + "epoch": 0.98, + "learning_rate": 1.729559576957872e-05, + "loss": 2.1112, + "step": 4810 + }, + { + "epoch": 0.98, + "learning_rate": 1.7294414305720733e-05, + "loss": 2.1565, + "step": 4811 + }, + { + "epoch": 0.98, + "learning_rate": 1.7293232624220016e-05, + "loss": 2.1373, + "step": 4812 + }, + { + "epoch": 0.98, + "learning_rate": 1.729205072511183e-05, + "loss": 2.1568, + "step": 4813 + }, + { + "epoch": 0.98, + "learning_rate": 1.729086860843144e-05, + "loss": 2.0914, + "step": 4814 + }, + { + "epoch": 0.98, + "learning_rate": 1.7289686274214116e-05, + "loss": 2.227, + "step": 4815 + }, + { + "epoch": 0.98, + "learning_rate": 1.7288503722495137e-05, + "loss": 2.1628, + "step": 4816 + }, + { + "epoch": 0.98, + "learning_rate": 1.7287320953309783e-05, + "loss": 2.2083, + "step": 4817 + }, + { + "epoch": 0.98, + "learning_rate": 1.7286137966693344e-05, + "loss": 2.1813, + "step": 4818 + }, + { + "epoch": 0.98, + "learning_rate": 1.7284954762681123e-05, + "loss": 2.1398, + "step": 4819 + }, + { + "epoch": 0.98, + "learning_rate": 1.7283771341308412e-05, + "loss": 2.1304, + "step": 4820 + }, + { + "epoch": 0.98, + "learning_rate": 1.7282587702610534e-05, + "loss": 2.1596, + "step": 4821 + }, + { + "epoch": 0.98, + "learning_rate": 1.72814038466228e-05, + "loss": 2.1524, + "step": 4822 + }, + { + "epoch": 0.98, + "learning_rate": 1.7280219773380525e-05, + "loss": 2.0928, + "step": 4823 + }, + { + "epoch": 0.98, + "learning_rate": 1.7279035482919044e-05, + "loss": 2.2324, + "step": 4824 + }, + { + "epoch": 0.98, + "learning_rate": 1.7277850975273697e-05, + "loss": 2.1422, + "step": 4825 + }, + { + "epoch": 0.98, + "learning_rate": 1.727666625047982e-05, + "loss": 2.0467, + "step": 4826 + }, + { + "epoch": 0.98, + "learning_rate": 1.7275481308572763e-05, + "loss": 2.1557, + "step": 4827 + }, + { + "epoch": 0.98, + "learning_rate": 1.727429614958788e-05, + "loss": 2.1379, + "step": 4828 + }, + { + "epoch": 0.98, + "learning_rate": 1.7273110773560537e-05, + "loss": 2.144, + "step": 4829 + }, + { + "epoch": 0.98, + "learning_rate": 1.7271925180526094e-05, + "loss": 2.0931, + "step": 4830 + }, + { + "epoch": 0.98, + "learning_rate": 1.7270739370519933e-05, + "loss": 2.1201, + "step": 4831 + }, + { + "epoch": 0.98, + "learning_rate": 1.7269553343577434e-05, + "loss": 2.0725, + "step": 4832 + }, + { + "epoch": 0.98, + "learning_rate": 1.726836709973398e-05, + "loss": 2.1476, + "step": 4833 + }, + { + "epoch": 0.98, + "learning_rate": 1.7267180639024967e-05, + "loss": 2.1467, + "step": 4834 + }, + { + "epoch": 0.98, + "learning_rate": 1.72659939614858e-05, + "loss": 2.0825, + "step": 4835 + }, + { + "epoch": 0.98, + "learning_rate": 1.7264807067151874e-05, + "loss": 2.1779, + "step": 4836 + }, + { + "epoch": 0.98, + "learning_rate": 1.7263619956058614e-05, + "loss": 2.1532, + "step": 4837 + }, + { + "epoch": 0.98, + "learning_rate": 1.7262432628241433e-05, + "loss": 2.136, + "step": 4838 + }, + { + "epoch": 0.98, + "learning_rate": 1.7261245083735763e-05, + "loss": 2.1406, + "step": 4839 + }, + { + "epoch": 0.98, + "learning_rate": 1.7260057322577035e-05, + "loss": 2.0938, + "step": 4840 + }, + { + "epoch": 0.98, + "learning_rate": 1.725886934480068e-05, + "loss": 2.1634, + "step": 4841 + }, + { + "epoch": 0.98, + "learning_rate": 1.7257681150442152e-05, + "loss": 2.1626, + "step": 4842 + }, + { + "epoch": 0.98, + "learning_rate": 1.7256492739536902e-05, + "loss": 2.1293, + "step": 4843 + }, + { + "epoch": 0.98, + "learning_rate": 1.7255304112120384e-05, + "loss": 2.1539, + "step": 4844 + }, + { + "epoch": 0.98, + "learning_rate": 1.7254115268228073e-05, + "loss": 2.1333, + "step": 4845 + }, + { + "epoch": 0.98, + "learning_rate": 1.7252926207895428e-05, + "loss": 2.2026, + "step": 4846 + }, + { + "epoch": 0.98, + "learning_rate": 1.7251736931157933e-05, + "loss": 2.1306, + "step": 4847 + }, + { + "epoch": 0.98, + "learning_rate": 1.7250547438051073e-05, + "loss": 2.1631, + "step": 4848 + }, + { + "epoch": 0.98, + "learning_rate": 1.724935772861034e-05, + "loss": 2.1745, + "step": 4849 + }, + { + "epoch": 0.98, + "learning_rate": 1.7248167802871226e-05, + "loss": 2.0974, + "step": 4850 + }, + { + "epoch": 0.98, + "learning_rate": 1.7246977660869237e-05, + "loss": 2.1482, + "step": 4851 + }, + { + "epoch": 0.98, + "learning_rate": 1.7245787302639886e-05, + "loss": 2.1406, + "step": 4852 + }, + { + "epoch": 0.99, + "learning_rate": 1.7244596728218687e-05, + "loss": 2.0936, + "step": 4853 + }, + { + "epoch": 0.99, + "learning_rate": 1.7243405937641165e-05, + "loss": 2.0435, + "step": 4854 + }, + { + "epoch": 0.99, + "learning_rate": 1.7242214930942847e-05, + "loss": 2.1779, + "step": 4855 + }, + { + "epoch": 0.99, + "learning_rate": 1.724102370815927e-05, + "loss": 2.1235, + "step": 4856 + }, + { + "epoch": 0.99, + "learning_rate": 1.7239832269325972e-05, + "loss": 2.0972, + "step": 4857 + }, + { + "epoch": 0.99, + "learning_rate": 1.723864061447851e-05, + "loss": 2.0788, + "step": 4858 + }, + { + "epoch": 0.99, + "learning_rate": 1.7237448743652437e-05, + "loss": 2.1582, + "step": 4859 + }, + { + "epoch": 0.99, + "learning_rate": 1.7236256656883313e-05, + "loss": 2.1954, + "step": 4860 + }, + { + "epoch": 0.99, + "learning_rate": 1.7235064354206704e-05, + "loss": 2.1002, + "step": 4861 + }, + { + "epoch": 0.99, + "learning_rate": 1.7233871835658188e-05, + "loss": 2.1781, + "step": 4862 + }, + { + "epoch": 0.99, + "learning_rate": 1.7232679101273344e-05, + "loss": 2.1129, + "step": 4863 + }, + { + "epoch": 0.99, + "learning_rate": 1.723148615108776e-05, + "loss": 2.1576, + "step": 4864 + }, + { + "epoch": 0.99, + "learning_rate": 1.7230292985137036e-05, + "loss": 2.1249, + "step": 4865 + }, + { + "epoch": 0.99, + "learning_rate": 1.722909960345676e-05, + "loss": 2.1805, + "step": 4866 + }, + { + "epoch": 0.99, + "learning_rate": 1.722790600608255e-05, + "loss": 2.1429, + "step": 4867 + }, + { + "epoch": 0.99, + "learning_rate": 1.7226712193050015e-05, + "loss": 2.1247, + "step": 4868 + }, + { + "epoch": 0.99, + "learning_rate": 1.7225518164394772e-05, + "loss": 2.0683, + "step": 4869 + }, + { + "epoch": 0.99, + "learning_rate": 1.7224323920152453e-05, + "loss": 2.0908, + "step": 4870 + }, + { + "epoch": 0.99, + "learning_rate": 1.7223129460358684e-05, + "loss": 2.1852, + "step": 4871 + }, + { + "epoch": 0.99, + "learning_rate": 1.7221934785049107e-05, + "loss": 2.1685, + "step": 4872 + }, + { + "epoch": 0.99, + "learning_rate": 1.722073989425937e-05, + "loss": 2.1608, + "step": 4873 + }, + { + "epoch": 0.99, + "learning_rate": 1.7219544788025122e-05, + "loss": 2.1225, + "step": 4874 + }, + { + "epoch": 0.99, + "learning_rate": 1.7218349466382024e-05, + "loss": 2.1491, + "step": 4875 + }, + { + "epoch": 0.99, + "learning_rate": 1.7217153929365732e-05, + "loss": 2.2005, + "step": 4876 + }, + { + "epoch": 0.99, + "learning_rate": 1.721595817701193e-05, + "loss": 2.1161, + "step": 4877 + }, + { + "epoch": 0.99, + "learning_rate": 1.7214762209356284e-05, + "loss": 2.1858, + "step": 4878 + }, + { + "epoch": 0.99, + "learning_rate": 1.7213566026434486e-05, + "loss": 2.1198, + "step": 4879 + }, + { + "epoch": 0.99, + "learning_rate": 1.721236962828223e-05, + "loss": 2.168, + "step": 4880 + }, + { + "epoch": 0.99, + "learning_rate": 1.7211173014935195e-05, + "loss": 2.1242, + "step": 4881 + }, + { + "epoch": 0.99, + "learning_rate": 1.72099761864291e-05, + "loss": 2.1559, + "step": 4882 + }, + { + "epoch": 0.99, + "learning_rate": 1.720877914279965e-05, + "loss": 2.152, + "step": 4883 + }, + { + "epoch": 0.99, + "learning_rate": 1.7207581884082564e-05, + "loss": 2.1541, + "step": 4884 + }, + { + "epoch": 0.99, + "learning_rate": 1.720638441031356e-05, + "loss": 2.1107, + "step": 4885 + }, + { + "epoch": 0.99, + "learning_rate": 1.7205186721528368e-05, + "loss": 2.1396, + "step": 4886 + }, + { + "epoch": 0.99, + "learning_rate": 1.7203988817762725e-05, + "loss": 2.1051, + "step": 4887 + }, + { + "epoch": 0.99, + "learning_rate": 1.720279069905237e-05, + "loss": 2.1357, + "step": 4888 + }, + { + "epoch": 0.99, + "learning_rate": 1.7201592365433052e-05, + "loss": 2.1567, + "step": 4889 + }, + { + "epoch": 0.99, + "learning_rate": 1.720039381694053e-05, + "loss": 2.1404, + "step": 4890 + }, + { + "epoch": 0.99, + "learning_rate": 1.719919505361056e-05, + "loss": 2.1305, + "step": 4891 + }, + { + "epoch": 0.99, + "learning_rate": 1.7197996075478912e-05, + "loss": 2.0875, + "step": 4892 + }, + { + "epoch": 0.99, + "learning_rate": 1.7196796882581358e-05, + "loss": 2.1321, + "step": 4893 + }, + { + "epoch": 0.99, + "learning_rate": 1.7195597474953678e-05, + "loss": 2.1731, + "step": 4894 + }, + { + "epoch": 0.99, + "learning_rate": 1.7194397852631657e-05, + "loss": 2.0406, + "step": 4895 + }, + { + "epoch": 0.99, + "learning_rate": 1.7193198015651097e-05, + "loss": 2.1246, + "step": 4896 + }, + { + "epoch": 0.99, + "learning_rate": 1.7191997964047783e-05, + "loss": 2.1564, + "step": 4897 + }, + { + "epoch": 0.99, + "learning_rate": 1.7190797697857533e-05, + "loss": 2.1586, + "step": 4898 + }, + { + "epoch": 0.99, + "learning_rate": 1.7189597217116155e-05, + "loss": 2.136, + "step": 4899 + }, + { + "epoch": 0.99, + "learning_rate": 1.7188396521859467e-05, + "loss": 2.0774, + "step": 4900 + }, + { + "epoch": 0.99, + "learning_rate": 1.7187195612123294e-05, + "loss": 2.1697, + "step": 4901 + }, + { + "epoch": 1.0, + "learning_rate": 1.7185994487943468e-05, + "loss": 2.0564, + "step": 4902 + }, + { + "epoch": 1.0, + "learning_rate": 1.7184793149355828e-05, + "loss": 2.1245, + "step": 4903 + }, + { + "epoch": 1.0, + "learning_rate": 1.7183591596396218e-05, + "loss": 2.1279, + "step": 4904 + }, + { + "epoch": 1.0, + "learning_rate": 1.7182389829100484e-05, + "loss": 2.1642, + "step": 4905 + }, + { + "epoch": 1.0, + "learning_rate": 1.7181187847504492e-05, + "loss": 2.1541, + "step": 4906 + }, + { + "epoch": 1.0, + "learning_rate": 1.7179985651644097e-05, + "loss": 2.0786, + "step": 4907 + }, + { + "epoch": 1.0, + "learning_rate": 1.717878324155517e-05, + "loss": 2.0606, + "step": 4908 + }, + { + "epoch": 1.0, + "learning_rate": 1.717758061727359e-05, + "loss": 2.1169, + "step": 4909 + }, + { + "epoch": 1.0, + "learning_rate": 1.717637777883524e-05, + "loss": 2.2114, + "step": 4910 + }, + { + "epoch": 1.0, + "learning_rate": 1.7175174726276007e-05, + "loss": 2.1624, + "step": 4911 + }, + { + "epoch": 1.0, + "learning_rate": 1.717397145963179e-05, + "loss": 2.1298, + "step": 4912 + }, + { + "epoch": 1.0, + "learning_rate": 1.717276797893848e-05, + "loss": 2.1704, + "step": 4913 + }, + { + "epoch": 1.0, + "learning_rate": 1.7171564284232e-05, + "loss": 2.1132, + "step": 4914 + }, + { + "epoch": 1.0, + "learning_rate": 1.7170360375548254e-05, + "loss": 2.1721, + "step": 4915 + }, + { + "epoch": 1.0, + "learning_rate": 1.716915625292317e-05, + "loss": 2.0873, + "step": 4916 + }, + { + "epoch": 1.0, + "learning_rate": 1.7167951916392665e-05, + "loss": 2.1747, + "step": 4917 + }, + { + "epoch": 1.0, + "learning_rate": 1.7166747365992685e-05, + "loss": 2.0712, + "step": 4918 + }, + { + "epoch": 1.0, + "learning_rate": 1.7165542601759164e-05, + "loss": 2.0899, + "step": 4919 + }, + { + "epoch": 1.0, + "learning_rate": 1.7164337623728044e-05, + "loss": 2.1993, + "step": 4920 + }, + { + "epoch": 1.0, + "learning_rate": 1.716313243193529e-05, + "loss": 2.1281, + "step": 4921 + }, + { + "epoch": 1.0, + "learning_rate": 1.716192702641685e-05, + "loss": 2.0287, + "step": 4922 + }, + { + "epoch": 1.0, + "learning_rate": 1.7160721407208688e-05, + "loss": 2.136, + "step": 4923 + }, + { + "epoch": 1.0, + "learning_rate": 1.715951557434679e-05, + "loss": 2.1442, + "step": 4924 + }, + { + "epoch": 1.0, + "learning_rate": 1.7158309527867117e-05, + "loss": 2.1751, + "step": 4925 + }, + { + "epoch": 1.0, + "learning_rate": 1.7157103267805672e-05, + "loss": 2.1077, + "step": 4926 + }, + { + "epoch": 1.0, + "learning_rate": 1.715589679419843e-05, + "loss": 2.138, + "step": 4927 + }, + { + "epoch": 1.0, + "learning_rate": 1.7154690107081397e-05, + "loss": 2.2173, + "step": 4928 + }, + { + "epoch": 1.0, + "learning_rate": 1.715348320649057e-05, + "loss": 2.0554, + "step": 4929 + }, + { + "epoch": 1.0, + "learning_rate": 1.7152276092461967e-05, + "loss": 2.1941, + "step": 4930 + }, + { + "epoch": 1.0, + "learning_rate": 1.7151068765031605e-05, + "loss": 2.0852, + "step": 4931 + }, + { + "epoch": 1.0, + "learning_rate": 1.7149861224235496e-05, + "loss": 2.1236, + "step": 4932 + }, + { + "epoch": 1.0, + "learning_rate": 1.714865347010968e-05, + "loss": 2.1161, + "step": 4933 + }, + { + "epoch": 1.0, + "learning_rate": 1.7147445502690193e-05, + "loss": 2.0671, + "step": 4934 + }, + { + "epoch": 1.0, + "learning_rate": 1.714623732201307e-05, + "loss": 2.1468, + "step": 4935 + }, + { + "epoch": 1.0, + "learning_rate": 1.714502892811436e-05, + "loss": 2.1133, + "step": 4936 + }, + { + "epoch": 1.0, + "learning_rate": 1.714382032103012e-05, + "loss": 2.0339, + "step": 4937 + }, + { + "epoch": 1.0, + "learning_rate": 1.7142611500796417e-05, + "loss": 2.1479, + "step": 4938 + }, + { + "epoch": 1.0, + "learning_rate": 1.7141402467449308e-05, + "loss": 2.1538, + "step": 4939 + }, + { + "epoch": 1.0, + "learning_rate": 1.7140193221024872e-05, + "loss": 2.1313, + "step": 4940 + }, + { + "epoch": 1.0, + "learning_rate": 1.7138983761559188e-05, + "loss": 2.1264, + "step": 4941 + }, + { + "epoch": 1.0, + "learning_rate": 1.713777408908835e-05, + "loss": 2.2068, + "step": 4942 + }, + { + "epoch": 1.0, + "learning_rate": 1.7136564203648434e-05, + "loss": 2.1212, + "step": 4943 + }, + { + "epoch": 1.0, + "learning_rate": 1.7135354105275556e-05, + "loss": 2.1121, + "step": 4944 + }, + { + "epoch": 1.0, + "learning_rate": 1.7134143794005812e-05, + "loss": 2.0655, + "step": 4945 + }, + { + "epoch": 1.0, + "learning_rate": 1.713293326987532e-05, + "loss": 2.1531, + "step": 4946 + }, + { + "epoch": 1.0, + "learning_rate": 1.7131722532920192e-05, + "loss": 2.1139, + "step": 4947 + }, + { + "epoch": 1.0, + "learning_rate": 1.7130511583176558e-05, + "loss": 2.1471, + "step": 4948 + }, + { + "epoch": 1.0, + "learning_rate": 1.7129300420680545e-05, + "loss": 2.1535, + "step": 4949 + }, + { + "epoch": 1.0, + "learning_rate": 1.7128089045468294e-05, + "loss": 2.1737, + "step": 4950 + }, + { + "epoch": 1.0, + "learning_rate": 1.7126877457575947e-05, + "loss": 2.1792, + "step": 4951 + }, + { + "epoch": 1.01, + "learning_rate": 1.712566565703965e-05, + "loss": 2.0948, + "step": 4952 + }, + { + "epoch": 1.01, + "learning_rate": 1.7124453643895568e-05, + "loss": 2.1865, + "step": 4953 + }, + { + "epoch": 1.01, + "learning_rate": 1.7123241418179855e-05, + "loss": 2.1411, + "step": 4954 + }, + { + "epoch": 1.01, + "learning_rate": 1.7122028979928686e-05, + "loss": 2.184, + "step": 4955 + }, + { + "epoch": 1.01, + "learning_rate": 1.7120816329178234e-05, + "loss": 2.011, + "step": 4956 + }, + { + "epoch": 1.01, + "learning_rate": 1.7119603465964686e-05, + "loss": 2.1305, + "step": 4957 + }, + { + "epoch": 1.01, + "learning_rate": 1.711839039032422e-05, + "loss": 2.2091, + "step": 4958 + }, + { + "epoch": 1.01, + "learning_rate": 1.7117177102293038e-05, + "loss": 2.1261, + "step": 4959 + }, + { + "epoch": 1.01, + "learning_rate": 1.7115963601907334e-05, + "loss": 2.1671, + "step": 4960 + }, + { + "epoch": 1.01, + "learning_rate": 1.7114749889203324e-05, + "loss": 2.132, + "step": 4961 + }, + { + "epoch": 1.01, + "learning_rate": 1.7113535964217215e-05, + "loss": 2.1803, + "step": 4962 + }, + { + "epoch": 1.01, + "learning_rate": 1.711232182698523e-05, + "loss": 2.1949, + "step": 4963 + }, + { + "epoch": 1.01, + "learning_rate": 1.711110747754359e-05, + "loss": 2.1432, + "step": 4964 + }, + { + "epoch": 1.01, + "learning_rate": 1.7109892915928535e-05, + "loss": 2.1607, + "step": 4965 + }, + { + "epoch": 1.01, + "learning_rate": 1.7108678142176304e-05, + "loss": 2.1334, + "step": 4966 + }, + { + "epoch": 1.01, + "learning_rate": 1.710746315632313e-05, + "loss": 2.1095, + "step": 4967 + }, + { + "epoch": 1.01, + "learning_rate": 1.7106247958405275e-05, + "loss": 2.133, + "step": 4968 + }, + { + "epoch": 1.01, + "learning_rate": 1.7105032548458994e-05, + "loss": 2.1679, + "step": 4969 + }, + { + "epoch": 1.01, + "learning_rate": 1.7103816926520553e-05, + "loss": 2.0922, + "step": 4970 + }, + { + "epoch": 1.01, + "learning_rate": 1.710260109262622e-05, + "loss": 2.0347, + "step": 4971 + }, + { + "epoch": 1.01, + "learning_rate": 1.7101385046812273e-05, + "loss": 2.1448, + "step": 4972 + }, + { + "epoch": 1.01, + "learning_rate": 1.710016878911499e-05, + "loss": 2.1668, + "step": 4973 + }, + { + "epoch": 1.01, + "learning_rate": 1.7098952319570673e-05, + "loss": 2.173, + "step": 4974 + }, + { + "epoch": 1.01, + "learning_rate": 1.7097735638215605e-05, + "loss": 2.1659, + "step": 4975 + }, + { + "epoch": 1.01, + "learning_rate": 1.7096518745086092e-05, + "loss": 2.1546, + "step": 4976 + }, + { + "epoch": 1.01, + "learning_rate": 1.7095301640218444e-05, + "loss": 2.1627, + "step": 4977 + }, + { + "epoch": 1.01, + "learning_rate": 1.709408432364897e-05, + "loss": 2.1201, + "step": 4978 + }, + { + "epoch": 1.01, + "learning_rate": 1.7092866795414e-05, + "loss": 2.1466, + "step": 4979 + }, + { + "epoch": 1.01, + "learning_rate": 1.709164905554986e-05, + "loss": 2.1946, + "step": 4980 + }, + { + "epoch": 1.01, + "learning_rate": 1.7090431104092875e-05, + "loss": 2.118, + "step": 4981 + }, + { + "epoch": 1.01, + "learning_rate": 1.7089212941079393e-05, + "loss": 2.186, + "step": 4982 + }, + { + "epoch": 1.01, + "learning_rate": 1.7087994566545752e-05, + "loss": 2.165, + "step": 4983 + }, + { + "epoch": 1.01, + "learning_rate": 1.7086775980528313e-05, + "loss": 2.1846, + "step": 4984 + }, + { + "epoch": 1.01, + "learning_rate": 1.7085557183063434e-05, + "loss": 2.1494, + "step": 4985 + }, + { + "epoch": 1.01, + "learning_rate": 1.7084338174187476e-05, + "loss": 2.181, + "step": 4986 + }, + { + "epoch": 1.01, + "learning_rate": 1.708311895393681e-05, + "loss": 2.1466, + "step": 4987 + }, + { + "epoch": 1.01, + "learning_rate": 1.708189952234782e-05, + "loss": 2.1784, + "step": 4988 + }, + { + "epoch": 1.01, + "learning_rate": 1.7080679879456884e-05, + "loss": 2.1171, + "step": 4989 + }, + { + "epoch": 1.01, + "learning_rate": 1.7079460025300395e-05, + "loss": 2.094, + "step": 4990 + }, + { + "epoch": 1.01, + "learning_rate": 1.707823995991475e-05, + "loss": 2.0735, + "step": 4991 + }, + { + "epoch": 1.01, + "learning_rate": 1.707701968333635e-05, + "loss": 2.1284, + "step": 4992 + }, + { + "epoch": 1.01, + "learning_rate": 1.7075799195601606e-05, + "loss": 2.2006, + "step": 4993 + }, + { + "epoch": 1.01, + "learning_rate": 1.707457849674693e-05, + "loss": 2.1911, + "step": 4994 + }, + { + "epoch": 1.01, + "learning_rate": 1.7073357586808753e-05, + "loss": 2.1058, + "step": 4995 + }, + { + "epoch": 1.01, + "learning_rate": 1.7072136465823493e-05, + "loss": 2.1989, + "step": 4996 + }, + { + "epoch": 1.01, + "learning_rate": 1.7070915133827588e-05, + "loss": 2.1157, + "step": 4997 + }, + { + "epoch": 1.01, + "learning_rate": 1.7069693590857483e-05, + "loss": 2.1362, + "step": 4998 + }, + { + "epoch": 1.01, + "learning_rate": 1.7068471836949618e-05, + "loss": 2.1178, + "step": 4999 + }, + { + "epoch": 1.01, + "learning_rate": 1.706724987214045e-05, + "loss": 2.1472, + "step": 5000 + }, + { + "epoch": 1.02, + "learning_rate": 1.706602769646644e-05, + "loss": 2.1109, + "step": 5001 + }, + { + "epoch": 1.02, + "learning_rate": 1.706480530996405e-05, + "loss": 2.0548, + "step": 5002 + }, + { + "epoch": 1.02, + "learning_rate": 1.7063582712669755e-05, + "loss": 2.1181, + "step": 5003 + }, + { + "epoch": 1.02, + "learning_rate": 1.7062359904620033e-05, + "loss": 2.1025, + "step": 5004 + }, + { + "epoch": 1.02, + "learning_rate": 1.706113688585137e-05, + "loss": 2.1678, + "step": 5005 + }, + { + "epoch": 1.02, + "learning_rate": 1.7059913656400256e-05, + "loss": 2.13, + "step": 5006 + }, + { + "epoch": 1.02, + "learning_rate": 1.7058690216303184e-05, + "loss": 2.1328, + "step": 5007 + }, + { + "epoch": 1.02, + "learning_rate": 1.7057466565596666e-05, + "loss": 2.1379, + "step": 5008 + }, + { + "epoch": 1.02, + "learning_rate": 1.705624270431721e-05, + "loss": 2.2064, + "step": 5009 + }, + { + "epoch": 1.02, + "learning_rate": 1.7055018632501326e-05, + "loss": 2.0994, + "step": 5010 + }, + { + "epoch": 1.02, + "learning_rate": 1.705379435018554e-05, + "loss": 2.1359, + "step": 5011 + }, + { + "epoch": 1.02, + "learning_rate": 1.7052569857406385e-05, + "loss": 2.1459, + "step": 5012 + }, + { + "epoch": 1.02, + "learning_rate": 1.705134515420039e-05, + "loss": 2.0692, + "step": 5013 + }, + { + "epoch": 1.02, + "learning_rate": 1.70501202406041e-05, + "loss": 2.1303, + "step": 5014 + }, + { + "epoch": 1.02, + "learning_rate": 1.704889511665406e-05, + "loss": 2.1058, + "step": 5015 + }, + { + "epoch": 1.02, + "learning_rate": 1.7047669782386828e-05, + "loss": 2.1384, + "step": 5016 + }, + { + "epoch": 1.02, + "learning_rate": 1.7046444237838958e-05, + "loss": 2.1447, + "step": 5017 + }, + { + "epoch": 1.02, + "learning_rate": 1.7045218483047023e-05, + "loss": 2.0509, + "step": 5018 + }, + { + "epoch": 1.02, + "learning_rate": 1.704399251804759e-05, + "loss": 2.1827, + "step": 5019 + }, + { + "epoch": 1.02, + "learning_rate": 1.7042766342877242e-05, + "loss": 2.1084, + "step": 5020 + }, + { + "epoch": 1.02, + "learning_rate": 1.7041539957572562e-05, + "loss": 2.1224, + "step": 5021 + }, + { + "epoch": 1.02, + "learning_rate": 1.7040313362170145e-05, + "loss": 2.153, + "step": 5022 + }, + { + "epoch": 1.02, + "learning_rate": 1.7039086556706586e-05, + "loss": 2.0406, + "step": 5023 + }, + { + "epoch": 1.02, + "learning_rate": 1.7037859541218488e-05, + "loss": 2.1783, + "step": 5024 + }, + { + "epoch": 1.02, + "learning_rate": 1.7036632315742464e-05, + "loss": 2.1303, + "step": 5025 + }, + { + "epoch": 1.02, + "learning_rate": 1.703540488031513e-05, + "loss": 2.0978, + "step": 5026 + }, + { + "epoch": 1.02, + "learning_rate": 1.7034177234973107e-05, + "loss": 2.1543, + "step": 5027 + }, + { + "epoch": 1.02, + "learning_rate": 1.7032949379753024e-05, + "loss": 2.1222, + "step": 5028 + }, + { + "epoch": 1.02, + "learning_rate": 1.7031721314691516e-05, + "loss": 2.1633, + "step": 5029 + }, + { + "epoch": 1.02, + "learning_rate": 1.7030493039825233e-05, + "loss": 2.1141, + "step": 5030 + }, + { + "epoch": 1.02, + "learning_rate": 1.702926455519081e-05, + "loss": 2.1547, + "step": 5031 + }, + { + "epoch": 1.02, + "learning_rate": 1.702803586082491e-05, + "loss": 2.0895, + "step": 5032 + }, + { + "epoch": 1.02, + "learning_rate": 1.7026806956764193e-05, + "loss": 2.1368, + "step": 5033 + }, + { + "epoch": 1.02, + "learning_rate": 1.702557784304532e-05, + "loss": 2.1509, + "step": 5034 + }, + { + "epoch": 1.02, + "learning_rate": 1.7024348519704973e-05, + "loss": 2.2233, + "step": 5035 + }, + { + "epoch": 1.02, + "learning_rate": 1.702311898677982e-05, + "loss": 2.1799, + "step": 5036 + }, + { + "epoch": 1.02, + "learning_rate": 1.7021889244306552e-05, + "loss": 2.1272, + "step": 5037 + }, + { + "epoch": 1.02, + "learning_rate": 1.7020659292321867e-05, + "loss": 2.1038, + "step": 5038 + }, + { + "epoch": 1.02, + "learning_rate": 1.701942913086245e-05, + "loss": 2.1293, + "step": 5039 + }, + { + "epoch": 1.02, + "learning_rate": 1.7018198759965018e-05, + "loss": 2.0972, + "step": 5040 + }, + { + "epoch": 1.02, + "learning_rate": 1.7016968179666268e-05, + "loss": 2.1251, + "step": 5041 + }, + { + "epoch": 1.02, + "learning_rate": 1.7015737390002932e-05, + "loss": 2.0893, + "step": 5042 + }, + { + "epoch": 1.02, + "learning_rate": 1.701450639101172e-05, + "loss": 2.0579, + "step": 5043 + }, + { + "epoch": 1.02, + "learning_rate": 1.701327518272937e-05, + "loss": 2.0581, + "step": 5044 + }, + { + "epoch": 1.02, + "learning_rate": 1.701204376519261e-05, + "loss": 2.1639, + "step": 5045 + }, + { + "epoch": 1.02, + "learning_rate": 1.701081213843819e-05, + "loss": 2.1944, + "step": 5046 + }, + { + "epoch": 1.02, + "learning_rate": 1.700958030250285e-05, + "loss": 2.1203, + "step": 5047 + }, + { + "epoch": 1.02, + "learning_rate": 1.700834825742335e-05, + "loss": 2.0676, + "step": 5048 + }, + { + "epoch": 1.02, + "learning_rate": 1.7007116003236447e-05, + "loss": 2.2014, + "step": 5049 + }, + { + "epoch": 1.03, + "learning_rate": 1.700588353997891e-05, + "loss": 2.184, + "step": 5050 + }, + { + "epoch": 1.03, + "learning_rate": 1.700465086768751e-05, + "loss": 2.1272, + "step": 5051 + }, + { + "epoch": 1.03, + "learning_rate": 1.7003417986399022e-05, + "loss": 2.1349, + "step": 5052 + }, + { + "epoch": 1.03, + "learning_rate": 1.700218489615024e-05, + "loss": 2.1307, + "step": 5053 + }, + { + "epoch": 1.03, + "learning_rate": 1.7000951596977953e-05, + "loss": 2.1711, + "step": 5054 + }, + { + "epoch": 1.03, + "learning_rate": 1.6999718088918956e-05, + "loss": 2.1576, + "step": 5055 + }, + { + "epoch": 1.03, + "learning_rate": 1.6998484372010056e-05, + "loss": 2.1812, + "step": 5056 + }, + { + "epoch": 1.03, + "learning_rate": 1.699725044628806e-05, + "loss": 2.1156, + "step": 5057 + }, + { + "epoch": 1.03, + "learning_rate": 1.6996016311789784e-05, + "loss": 2.1234, + "step": 5058 + }, + { + "epoch": 1.03, + "learning_rate": 1.6994781968552058e-05, + "loss": 2.1588, + "step": 5059 + }, + { + "epoch": 1.03, + "learning_rate": 1.6993547416611703e-05, + "loss": 2.112, + "step": 5060 + }, + { + "epoch": 1.03, + "learning_rate": 1.699231265600556e-05, + "loss": 2.1345, + "step": 5061 + }, + { + "epoch": 1.03, + "learning_rate": 1.6991077686770462e-05, + "loss": 2.1685, + "step": 5062 + }, + { + "epoch": 1.03, + "learning_rate": 1.698984250894327e-05, + "loss": 2.2145, + "step": 5063 + }, + { + "epoch": 1.03, + "learning_rate": 1.6988607122560824e-05, + "loss": 2.1428, + "step": 5064 + }, + { + "epoch": 1.03, + "learning_rate": 1.6987371527659994e-05, + "loss": 2.1884, + "step": 5065 + }, + { + "epoch": 1.03, + "learning_rate": 1.6986135724277638e-05, + "loss": 2.1563, + "step": 5066 + }, + { + "epoch": 1.03, + "learning_rate": 1.698489971245064e-05, + "loss": 2.1747, + "step": 5067 + }, + { + "epoch": 1.03, + "learning_rate": 1.698366349221587e-05, + "loss": 2.1717, + "step": 5068 + }, + { + "epoch": 1.03, + "learning_rate": 1.698242706361021e-05, + "loss": 2.2212, + "step": 5069 + }, + { + "epoch": 1.03, + "learning_rate": 1.6981190426670562e-05, + "loss": 2.1425, + "step": 5070 + }, + { + "epoch": 1.03, + "learning_rate": 1.6979953581433815e-05, + "loss": 2.2214, + "step": 5071 + }, + { + "epoch": 1.03, + "learning_rate": 1.6978716527936874e-05, + "loss": 2.1666, + "step": 5072 + }, + { + "epoch": 1.03, + "learning_rate": 1.6977479266216655e-05, + "loss": 2.1496, + "step": 5073 + }, + { + "epoch": 1.03, + "learning_rate": 1.6976241796310064e-05, + "loss": 2.1266, + "step": 5074 + }, + { + "epoch": 1.03, + "learning_rate": 1.697500411825403e-05, + "loss": 2.134, + "step": 5075 + }, + { + "epoch": 1.03, + "learning_rate": 1.697376623208548e-05, + "loss": 2.1048, + "step": 5076 + }, + { + "epoch": 1.03, + "learning_rate": 1.6972528137841348e-05, + "loss": 2.1313, + "step": 5077 + }, + { + "epoch": 1.03, + "learning_rate": 1.6971289835558576e-05, + "loss": 2.1135, + "step": 5078 + }, + { + "epoch": 1.03, + "learning_rate": 1.697005132527411e-05, + "loss": 2.1266, + "step": 5079 + }, + { + "epoch": 1.03, + "learning_rate": 1.6968812607024903e-05, + "loss": 2.1727, + "step": 5080 + }, + { + "epoch": 1.03, + "learning_rate": 1.696757368084792e-05, + "loss": 2.1072, + "step": 5081 + }, + { + "epoch": 1.03, + "learning_rate": 1.6966334546780117e-05, + "loss": 2.1933, + "step": 5082 + }, + { + "epoch": 1.03, + "learning_rate": 1.6965095204858473e-05, + "loss": 2.039, + "step": 5083 + }, + { + "epoch": 1.03, + "learning_rate": 1.6963855655119963e-05, + "loss": 2.0768, + "step": 5084 + }, + { + "epoch": 1.03, + "learning_rate": 1.6962615897601577e-05, + "loss": 2.1548, + "step": 5085 + }, + { + "epoch": 1.03, + "learning_rate": 1.6961375932340293e-05, + "loss": 2.1069, + "step": 5086 + }, + { + "epoch": 1.03, + "learning_rate": 1.696013575937312e-05, + "loss": 2.1494, + "step": 5087 + }, + { + "epoch": 1.03, + "learning_rate": 1.6958895378737058e-05, + "loss": 2.0923, + "step": 5088 + }, + { + "epoch": 1.03, + "learning_rate": 1.6957654790469113e-05, + "loss": 2.1649, + "step": 5089 + }, + { + "epoch": 1.03, + "learning_rate": 1.69564139946063e-05, + "loss": 2.1941, + "step": 5090 + }, + { + "epoch": 1.03, + "learning_rate": 1.6955172991185646e-05, + "loss": 2.1147, + "step": 5091 + }, + { + "epoch": 1.03, + "learning_rate": 1.6953931780244174e-05, + "loss": 2.1594, + "step": 5092 + }, + { + "epoch": 1.03, + "learning_rate": 1.695269036181892e-05, + "loss": 2.1418, + "step": 5093 + }, + { + "epoch": 1.03, + "learning_rate": 1.6951448735946924e-05, + "loss": 2.0711, + "step": 5094 + }, + { + "epoch": 1.03, + "learning_rate": 1.695020690266523e-05, + "loss": 2.133, + "step": 5095 + }, + { + "epoch": 1.03, + "learning_rate": 1.694896486201089e-05, + "loss": 2.1709, + "step": 5096 + }, + { + "epoch": 1.03, + "learning_rate": 1.694772261402097e-05, + "loss": 2.1497, + "step": 5097 + }, + { + "epoch": 1.03, + "learning_rate": 1.6946480158732524e-05, + "loss": 2.1684, + "step": 5098 + }, + { + "epoch": 1.04, + "learning_rate": 1.694523749618263e-05, + "loss": 2.1609, + "step": 5099 + }, + { + "epoch": 1.04, + "learning_rate": 1.6943994626408365e-05, + "loss": 2.1541, + "step": 5100 + }, + { + "epoch": 1.04, + "learning_rate": 1.694275154944681e-05, + "loss": 2.1915, + "step": 5101 + }, + { + "epoch": 1.04, + "learning_rate": 1.6941508265335055e-05, + "loss": 2.1456, + "step": 5102 + }, + { + "epoch": 1.04, + "learning_rate": 1.6940264774110196e-05, + "loss": 2.058, + "step": 5103 + }, + { + "epoch": 1.04, + "learning_rate": 1.6939021075809336e-05, + "loss": 2.1198, + "step": 5104 + }, + { + "epoch": 1.04, + "learning_rate": 1.6937777170469585e-05, + "loss": 2.1328, + "step": 5105 + }, + { + "epoch": 1.04, + "learning_rate": 1.693653305812805e-05, + "loss": 2.1573, + "step": 5106 + }, + { + "epoch": 1.04, + "learning_rate": 1.693528873882186e-05, + "loss": 2.134, + "step": 5107 + }, + { + "epoch": 1.04, + "learning_rate": 1.6934044212588134e-05, + "loss": 2.1162, + "step": 5108 + }, + { + "epoch": 1.04, + "learning_rate": 1.693279947946401e-05, + "loss": 2.1336, + "step": 5109 + }, + { + "epoch": 1.04, + "learning_rate": 1.6931554539486627e-05, + "loss": 2.1643, + "step": 5110 + }, + { + "epoch": 1.04, + "learning_rate": 1.693030939269313e-05, + "loss": 2.1056, + "step": 5111 + }, + { + "epoch": 1.04, + "learning_rate": 1.692906403912067e-05, + "loss": 2.0684, + "step": 5112 + }, + { + "epoch": 1.04, + "learning_rate": 1.6927818478806397e-05, + "loss": 2.1736, + "step": 5113 + }, + { + "epoch": 1.04, + "learning_rate": 1.6926572711787486e-05, + "loss": 2.1311, + "step": 5114 + }, + { + "epoch": 1.04, + "learning_rate": 1.69253267381011e-05, + "loss": 2.0664, + "step": 5115 + }, + { + "epoch": 1.04, + "learning_rate": 1.6924080557784418e-05, + "loss": 2.1489, + "step": 5116 + }, + { + "epoch": 1.04, + "learning_rate": 1.692283417087462e-05, + "loss": 2.1302, + "step": 5117 + }, + { + "epoch": 1.04, + "learning_rate": 1.6921587577408894e-05, + "loss": 2.1012, + "step": 5118 + }, + { + "epoch": 1.04, + "learning_rate": 1.692034077742444e-05, + "loss": 2.0693, + "step": 5119 + }, + { + "epoch": 1.04, + "learning_rate": 1.691909377095845e-05, + "loss": 2.1255, + "step": 5120 + }, + { + "epoch": 1.04, + "learning_rate": 1.6917846558048143e-05, + "loss": 2.1573, + "step": 5121 + }, + { + "epoch": 1.04, + "learning_rate": 1.691659913873072e-05, + "loss": 2.0878, + "step": 5122 + }, + { + "epoch": 1.04, + "learning_rate": 1.6915351513043404e-05, + "loss": 2.1601, + "step": 5123 + }, + { + "epoch": 1.04, + "learning_rate": 1.691410368102342e-05, + "loss": 2.1169, + "step": 5124 + }, + { + "epoch": 1.04, + "learning_rate": 1.6912855642708003e-05, + "loss": 2.1458, + "step": 5125 + }, + { + "epoch": 1.04, + "learning_rate": 1.691160739813438e-05, + "loss": 2.1314, + "step": 5126 + }, + { + "epoch": 1.04, + "learning_rate": 1.691035894733981e-05, + "loss": 2.1864, + "step": 5127 + }, + { + "epoch": 1.04, + "learning_rate": 1.6909110290361533e-05, + "loss": 1.9995, + "step": 5128 + }, + { + "epoch": 1.04, + "learning_rate": 1.6907861427236813e-05, + "loss": 2.1103, + "step": 5129 + }, + { + "epoch": 1.04, + "learning_rate": 1.69066123580029e-05, + "loss": 2.1942, + "step": 5130 + }, + { + "epoch": 1.04, + "learning_rate": 1.690536308269707e-05, + "loss": 2.0541, + "step": 5131 + }, + { + "epoch": 1.04, + "learning_rate": 1.6904113601356603e-05, + "loss": 2.1813, + "step": 5132 + }, + { + "epoch": 1.04, + "learning_rate": 1.6902863914018768e-05, + "loss": 2.0796, + "step": 5133 + }, + { + "epoch": 1.04, + "learning_rate": 1.6901614020720854e-05, + "loss": 2.0825, + "step": 5134 + }, + { + "epoch": 1.04, + "learning_rate": 1.690036392150016e-05, + "loss": 2.1676, + "step": 5135 + }, + { + "epoch": 1.04, + "learning_rate": 1.6899113616393985e-05, + "loss": 2.1108, + "step": 5136 + }, + { + "epoch": 1.04, + "learning_rate": 1.689786310543963e-05, + "loss": 2.1485, + "step": 5137 + }, + { + "epoch": 1.04, + "learning_rate": 1.6896612388674407e-05, + "loss": 2.1749, + "step": 5138 + }, + { + "epoch": 1.04, + "learning_rate": 1.6895361466135632e-05, + "loss": 2.1558, + "step": 5139 + }, + { + "epoch": 1.04, + "learning_rate": 1.689411033786063e-05, + "loss": 2.142, + "step": 5140 + }, + { + "epoch": 1.04, + "learning_rate": 1.689285900388674e-05, + "loss": 2.1131, + "step": 5141 + }, + { + "epoch": 1.04, + "learning_rate": 1.6891607464251283e-05, + "loss": 2.1795, + "step": 5142 + }, + { + "epoch": 1.04, + "learning_rate": 1.6890355718991605e-05, + "loss": 2.1366, + "step": 5143 + }, + { + "epoch": 1.04, + "learning_rate": 1.6889103768145064e-05, + "loss": 2.1699, + "step": 5144 + }, + { + "epoch": 1.04, + "learning_rate": 1.6887851611749005e-05, + "loss": 2.0706, + "step": 5145 + }, + { + "epoch": 1.04, + "learning_rate": 1.6886599249840786e-05, + "loss": 2.0703, + "step": 5146 + }, + { + "epoch": 1.04, + "learning_rate": 1.6885346682457785e-05, + "loss": 2.2145, + "step": 5147 + }, + { + "epoch": 1.04, + "learning_rate": 1.6884093909637363e-05, + "loss": 2.0977, + "step": 5148 + }, + { + "epoch": 1.05, + "learning_rate": 1.6882840931416907e-05, + "loss": 2.2147, + "step": 5149 + }, + { + "epoch": 1.05, + "learning_rate": 1.6881587747833794e-05, + "loss": 2.1509, + "step": 5150 + }, + { + "epoch": 1.05, + "learning_rate": 1.6880334358925425e-05, + "loss": 2.1192, + "step": 5151 + }, + { + "epoch": 1.05, + "learning_rate": 1.6879080764729193e-05, + "loss": 2.1511, + "step": 5152 + }, + { + "epoch": 1.05, + "learning_rate": 1.6877826965282495e-05, + "loss": 2.0825, + "step": 5153 + }, + { + "epoch": 1.05, + "learning_rate": 1.687657296062275e-05, + "loss": 2.0899, + "step": 5154 + }, + { + "epoch": 1.05, + "learning_rate": 1.6875318750787372e-05, + "loss": 2.157, + "step": 5155 + }, + { + "epoch": 1.05, + "learning_rate": 1.6874064335813774e-05, + "loss": 2.133, + "step": 5156 + }, + { + "epoch": 1.05, + "learning_rate": 1.6872809715739395e-05, + "loss": 2.1261, + "step": 5157 + }, + { + "epoch": 1.05, + "learning_rate": 1.687155489060166e-05, + "loss": 2.1676, + "step": 5158 + }, + { + "epoch": 1.05, + "learning_rate": 1.6870299860438017e-05, + "loss": 2.1273, + "step": 5159 + }, + { + "epoch": 1.05, + "learning_rate": 1.686904462528591e-05, + "loss": 2.0411, + "step": 5160 + }, + { + "epoch": 1.05, + "learning_rate": 1.686778918518279e-05, + "loss": 2.1287, + "step": 5161 + }, + { + "epoch": 1.05, + "learning_rate": 1.686653354016611e-05, + "loss": 2.14, + "step": 5162 + }, + { + "epoch": 1.05, + "learning_rate": 1.6865277690273342e-05, + "loss": 2.0756, + "step": 5163 + }, + { + "epoch": 1.05, + "learning_rate": 1.6864021635541957e-05, + "loss": 2.1582, + "step": 5164 + }, + { + "epoch": 1.05, + "learning_rate": 1.6862765376009426e-05, + "loss": 2.1535, + "step": 5165 + }, + { + "epoch": 1.05, + "learning_rate": 1.6861508911713237e-05, + "loss": 2.1165, + "step": 5166 + }, + { + "epoch": 1.05, + "learning_rate": 1.6860252242690878e-05, + "loss": 2.1462, + "step": 5167 + }, + { + "epoch": 1.05, + "learning_rate": 1.6858995368979846e-05, + "loss": 2.1653, + "step": 5168 + }, + { + "epoch": 1.05, + "learning_rate": 1.6857738290617635e-05, + "loss": 2.1362, + "step": 5169 + }, + { + "epoch": 1.05, + "learning_rate": 1.6856481007641753e-05, + "loss": 2.1288, + "step": 5170 + }, + { + "epoch": 1.05, + "learning_rate": 1.6855223520089726e-05, + "loss": 2.1766, + "step": 5171 + }, + { + "epoch": 1.05, + "learning_rate": 1.685396582799906e-05, + "loss": 2.1179, + "step": 5172 + }, + { + "epoch": 1.05, + "learning_rate": 1.685270793140729e-05, + "loss": 2.1586, + "step": 5173 + }, + { + "epoch": 1.05, + "learning_rate": 1.6851449830351938e-05, + "loss": 2.1357, + "step": 5174 + }, + { + "epoch": 1.05, + "learning_rate": 1.6850191524870548e-05, + "loss": 2.1241, + "step": 5175 + }, + { + "epoch": 1.05, + "learning_rate": 1.6848933015000666e-05, + "loss": 2.0728, + "step": 5176 + }, + { + "epoch": 1.05, + "learning_rate": 1.6847674300779834e-05, + "loss": 2.0754, + "step": 5177 + }, + { + "epoch": 1.05, + "learning_rate": 1.6846415382245616e-05, + "loss": 2.137, + "step": 5178 + }, + { + "epoch": 1.05, + "learning_rate": 1.6845156259435572e-05, + "loss": 2.1982, + "step": 5179 + }, + { + "epoch": 1.05, + "learning_rate": 1.684389693238727e-05, + "loss": 2.1565, + "step": 5180 + }, + { + "epoch": 1.05, + "learning_rate": 1.6842637401138285e-05, + "loss": 2.1741, + "step": 5181 + }, + { + "epoch": 1.05, + "learning_rate": 1.6841377665726194e-05, + "loss": 2.0418, + "step": 5182 + }, + { + "epoch": 1.05, + "learning_rate": 1.6840117726188588e-05, + "loss": 2.0268, + "step": 5183 + }, + { + "epoch": 1.05, + "learning_rate": 1.683885758256306e-05, + "loss": 2.1896, + "step": 5184 + }, + { + "epoch": 1.05, + "learning_rate": 1.68375972348872e-05, + "loss": 2.174, + "step": 5185 + }, + { + "epoch": 1.05, + "learning_rate": 1.6836336683198624e-05, + "loss": 2.1889, + "step": 5186 + }, + { + "epoch": 1.05, + "learning_rate": 1.6835075927534937e-05, + "loss": 2.1433, + "step": 5187 + }, + { + "epoch": 1.05, + "learning_rate": 1.683381496793376e-05, + "loss": 2.1421, + "step": 5188 + }, + { + "epoch": 1.05, + "learning_rate": 1.6832553804432713e-05, + "loss": 2.158, + "step": 5189 + }, + { + "epoch": 1.05, + "learning_rate": 1.6831292437069425e-05, + "loss": 2.1158, + "step": 5190 + }, + { + "epoch": 1.05, + "learning_rate": 1.6830030865881533e-05, + "loss": 2.152, + "step": 5191 + }, + { + "epoch": 1.05, + "learning_rate": 1.6828769090906675e-05, + "loss": 2.1774, + "step": 5192 + }, + { + "epoch": 1.05, + "learning_rate": 1.6827507112182507e-05, + "loss": 2.1523, + "step": 5193 + }, + { + "epoch": 1.05, + "learning_rate": 1.6826244929746672e-05, + "loss": 2.1134, + "step": 5194 + }, + { + "epoch": 1.05, + "learning_rate": 1.6824982543636835e-05, + "loss": 2.1229, + "step": 5195 + }, + { + "epoch": 1.05, + "learning_rate": 1.682371995389066e-05, + "loss": 2.1814, + "step": 5196 + }, + { + "epoch": 1.05, + "learning_rate": 1.682245716054582e-05, + "loss": 2.1778, + "step": 5197 + }, + { + "epoch": 1.06, + "learning_rate": 1.6821194163639997e-05, + "loss": 2.138, + "step": 5198 + }, + { + "epoch": 1.06, + "learning_rate": 1.6819930963210865e-05, + "loss": 2.1861, + "step": 5199 + }, + { + "epoch": 1.06, + "learning_rate": 1.681866755929612e-05, + "loss": 2.1166, + "step": 5200 + }, + { + "epoch": 1.06, + "learning_rate": 1.681740395193346e-05, + "loss": 2.1364, + "step": 5201 + }, + { + "epoch": 1.06, + "learning_rate": 1.6816140141160584e-05, + "loss": 2.12, + "step": 5202 + }, + { + "epoch": 1.06, + "learning_rate": 1.6814876127015198e-05, + "loss": 2.0338, + "step": 5203 + }, + { + "epoch": 1.06, + "learning_rate": 1.6813611909535022e-05, + "loss": 2.0733, + "step": 5204 + }, + { + "epoch": 1.06, + "learning_rate": 1.681234748875777e-05, + "loss": 2.0583, + "step": 5205 + }, + { + "epoch": 1.06, + "learning_rate": 1.6811082864721177e-05, + "loss": 2.1736, + "step": 5206 + }, + { + "epoch": 1.06, + "learning_rate": 1.680981803746297e-05, + "loss": 2.0957, + "step": 5207 + }, + { + "epoch": 1.06, + "learning_rate": 1.6808553007020885e-05, + "loss": 2.1013, + "step": 5208 + }, + { + "epoch": 1.06, + "learning_rate": 1.6807287773432667e-05, + "loss": 2.1452, + "step": 5209 + }, + { + "epoch": 1.06, + "learning_rate": 1.6806022336736073e-05, + "loss": 2.141, + "step": 5210 + }, + { + "epoch": 1.06, + "learning_rate": 1.6804756696968855e-05, + "loss": 2.1005, + "step": 5211 + }, + { + "epoch": 1.06, + "learning_rate": 1.6803490854168775e-05, + "loss": 2.1385, + "step": 5212 + }, + { + "epoch": 1.06, + "learning_rate": 1.6802224808373603e-05, + "loss": 2.1065, + "step": 5213 + }, + { + "epoch": 1.06, + "learning_rate": 1.6800958559621118e-05, + "loss": 2.1336, + "step": 5214 + }, + { + "epoch": 1.06, + "learning_rate": 1.6799692107949093e-05, + "loss": 2.1385, + "step": 5215 + }, + { + "epoch": 1.06, + "learning_rate": 1.679842545339532e-05, + "loss": 2.1416, + "step": 5216 + }, + { + "epoch": 1.06, + "learning_rate": 1.6797158595997594e-05, + "loss": 2.1121, + "step": 5217 + }, + { + "epoch": 1.06, + "learning_rate": 1.679589153579371e-05, + "loss": 2.2023, + "step": 5218 + }, + { + "epoch": 1.06, + "learning_rate": 1.6794624272821475e-05, + "loss": 2.1059, + "step": 5219 + }, + { + "epoch": 1.06, + "learning_rate": 1.6793356807118695e-05, + "loss": 2.1334, + "step": 5220 + }, + { + "epoch": 1.06, + "learning_rate": 1.67920891387232e-05, + "loss": 2.1514, + "step": 5221 + }, + { + "epoch": 1.06, + "learning_rate": 1.6790821267672802e-05, + "loss": 2.1246, + "step": 5222 + }, + { + "epoch": 1.06, + "learning_rate": 1.678955319400533e-05, + "loss": 2.074, + "step": 5223 + }, + { + "epoch": 1.06, + "learning_rate": 1.6788284917758626e-05, + "loss": 2.1075, + "step": 5224 + }, + { + "epoch": 1.06, + "learning_rate": 1.6787016438970526e-05, + "loss": 2.0559, + "step": 5225 + }, + { + "epoch": 1.06, + "learning_rate": 1.6785747757678882e-05, + "loss": 2.1284, + "step": 5226 + }, + { + "epoch": 1.06, + "learning_rate": 1.6784478873921545e-05, + "loss": 2.1608, + "step": 5227 + }, + { + "epoch": 1.06, + "learning_rate": 1.6783209787736375e-05, + "loss": 2.1486, + "step": 5228 + }, + { + "epoch": 1.06, + "learning_rate": 1.6781940499161238e-05, + "loss": 2.2156, + "step": 5229 + }, + { + "epoch": 1.06, + "learning_rate": 1.6780671008234005e-05, + "loss": 2.1052, + "step": 5230 + }, + { + "epoch": 1.06, + "learning_rate": 1.6779401314992552e-05, + "loss": 2.1122, + "step": 5231 + }, + { + "epoch": 1.06, + "learning_rate": 1.6778131419474767e-05, + "loss": 2.1476, + "step": 5232 + }, + { + "epoch": 1.06, + "learning_rate": 1.6776861321718535e-05, + "loss": 2.1288, + "step": 5233 + }, + { + "epoch": 1.06, + "learning_rate": 1.6775591021761758e-05, + "loss": 2.0986, + "step": 5234 + }, + { + "epoch": 1.06, + "learning_rate": 1.677432051964233e-05, + "loss": 2.1697, + "step": 5235 + }, + { + "epoch": 1.06, + "learning_rate": 1.6773049815398165e-05, + "loss": 2.1421, + "step": 5236 + }, + { + "epoch": 1.06, + "learning_rate": 1.6771778909067175e-05, + "loss": 2.1587, + "step": 5237 + }, + { + "epoch": 1.06, + "learning_rate": 1.677050780068728e-05, + "loss": 2.1092, + "step": 5238 + }, + { + "epoch": 1.06, + "learning_rate": 1.6769236490296403e-05, + "loss": 2.1788, + "step": 5239 + }, + { + "epoch": 1.06, + "learning_rate": 1.6767964977932478e-05, + "loss": 2.0282, + "step": 5240 + }, + { + "epoch": 1.06, + "learning_rate": 1.6766693263633446e-05, + "loss": 2.0807, + "step": 5241 + }, + { + "epoch": 1.06, + "learning_rate": 1.6765421347437246e-05, + "loss": 2.1453, + "step": 5242 + }, + { + "epoch": 1.06, + "learning_rate": 1.6764149229381833e-05, + "loss": 2.1267, + "step": 5243 + }, + { + "epoch": 1.06, + "learning_rate": 1.676287690950516e-05, + "loss": 2.2106, + "step": 5244 + }, + { + "epoch": 1.06, + "learning_rate": 1.676160438784519e-05, + "loss": 2.0789, + "step": 5245 + }, + { + "epoch": 1.06, + "learning_rate": 1.676033166443989e-05, + "loss": 2.1651, + "step": 5246 + }, + { + "epoch": 1.07, + "learning_rate": 1.675905873932723e-05, + "loss": 2.1229, + "step": 5247 + }, + { + "epoch": 1.07, + "learning_rate": 1.67577856125452e-05, + "loss": 2.0893, + "step": 5248 + }, + { + "epoch": 1.07, + "learning_rate": 1.6756512284131782e-05, + "loss": 2.0728, + "step": 5249 + }, + { + "epoch": 1.07, + "learning_rate": 1.6755238754124965e-05, + "loss": 2.1943, + "step": 5250 + }, + { + "epoch": 1.07, + "learning_rate": 1.675396502256275e-05, + "loss": 2.1414, + "step": 5251 + }, + { + "epoch": 1.07, + "learning_rate": 1.675269108948314e-05, + "loss": 2.1601, + "step": 5252 + }, + { + "epoch": 1.07, + "learning_rate": 1.675141695492415e-05, + "loss": 2.1612, + "step": 5253 + }, + { + "epoch": 1.07, + "learning_rate": 1.675014261892379e-05, + "loss": 2.0369, + "step": 5254 + }, + { + "epoch": 1.07, + "learning_rate": 1.6748868081520085e-05, + "loss": 2.0614, + "step": 5255 + }, + { + "epoch": 1.07, + "learning_rate": 1.6747593342751062e-05, + "loss": 2.0972, + "step": 5256 + }, + { + "epoch": 1.07, + "learning_rate": 1.6746318402654752e-05, + "loss": 2.0548, + "step": 5257 + }, + { + "epoch": 1.07, + "learning_rate": 1.6745043261269204e-05, + "loss": 2.1363, + "step": 5258 + }, + { + "epoch": 1.07, + "learning_rate": 1.674376791863246e-05, + "loss": 2.1701, + "step": 5259 + }, + { + "epoch": 1.07, + "learning_rate": 1.6742492374782568e-05, + "loss": 2.1375, + "step": 5260 + }, + { + "epoch": 1.07, + "learning_rate": 1.6741216629757594e-05, + "loss": 2.1183, + "step": 5261 + }, + { + "epoch": 1.07, + "learning_rate": 1.6739940683595594e-05, + "loss": 2.1561, + "step": 5262 + }, + { + "epoch": 1.07, + "learning_rate": 1.6738664536334646e-05, + "loss": 2.1154, + "step": 5263 + }, + { + "epoch": 1.07, + "learning_rate": 1.673738818801282e-05, + "loss": 2.1418, + "step": 5264 + }, + { + "epoch": 1.07, + "learning_rate": 1.6736111638668203e-05, + "loss": 2.1811, + "step": 5265 + }, + { + "epoch": 1.07, + "learning_rate": 1.6734834888338882e-05, + "loss": 2.1117, + "step": 5266 + }, + { + "epoch": 1.07, + "learning_rate": 1.673355793706295e-05, + "loss": 2.131, + "step": 5267 + }, + { + "epoch": 1.07, + "learning_rate": 1.673228078487851e-05, + "loss": 2.1366, + "step": 5268 + }, + { + "epoch": 1.07, + "learning_rate": 1.6731003431823666e-05, + "loss": 2.1457, + "step": 5269 + }, + { + "epoch": 1.07, + "learning_rate": 1.6729725877936526e-05, + "loss": 2.1078, + "step": 5270 + }, + { + "epoch": 1.07, + "learning_rate": 1.6728448123255215e-05, + "loss": 2.1562, + "step": 5271 + }, + { + "epoch": 1.07, + "learning_rate": 1.6727170167817856e-05, + "loss": 2.1491, + "step": 5272 + }, + { + "epoch": 1.07, + "learning_rate": 1.672589201166258e-05, + "loss": 2.09, + "step": 5273 + }, + { + "epoch": 1.07, + "learning_rate": 1.672461365482752e-05, + "loss": 2.1855, + "step": 5274 + }, + { + "epoch": 1.07, + "learning_rate": 1.6723335097350813e-05, + "loss": 2.1701, + "step": 5275 + }, + { + "epoch": 1.07, + "learning_rate": 1.672205633927062e-05, + "loss": 2.1478, + "step": 5276 + }, + { + "epoch": 1.07, + "learning_rate": 1.672077738062509e-05, + "loss": 2.0495, + "step": 5277 + }, + { + "epoch": 1.07, + "learning_rate": 1.6719498221452382e-05, + "loss": 2.1382, + "step": 5278 + }, + { + "epoch": 1.07, + "learning_rate": 1.6718218861790658e-05, + "loss": 2.1428, + "step": 5279 + }, + { + "epoch": 1.07, + "learning_rate": 1.6716939301678098e-05, + "loss": 2.1703, + "step": 5280 + }, + { + "epoch": 1.07, + "learning_rate": 1.6715659541152875e-05, + "loss": 2.2302, + "step": 5281 + }, + { + "epoch": 1.07, + "learning_rate": 1.6714379580253178e-05, + "loss": 2.1658, + "step": 5282 + }, + { + "epoch": 1.07, + "learning_rate": 1.6713099419017192e-05, + "loss": 1.9827, + "step": 5283 + }, + { + "epoch": 1.07, + "learning_rate": 1.6711819057483115e-05, + "loss": 2.0448, + "step": 5284 + }, + { + "epoch": 1.07, + "learning_rate": 1.6710538495689145e-05, + "loss": 2.1277, + "step": 5285 + }, + { + "epoch": 1.07, + "learning_rate": 1.67092577336735e-05, + "loss": 2.116, + "step": 5286 + }, + { + "epoch": 1.07, + "learning_rate": 1.670797677147438e-05, + "loss": 2.1154, + "step": 5287 + }, + { + "epoch": 1.07, + "learning_rate": 1.6706695609130022e-05, + "loss": 2.0863, + "step": 5288 + }, + { + "epoch": 1.07, + "learning_rate": 1.6705414246678636e-05, + "loss": 2.1719, + "step": 5289 + }, + { + "epoch": 1.07, + "learning_rate": 1.670413268415846e-05, + "loss": 2.2122, + "step": 5290 + }, + { + "epoch": 1.07, + "learning_rate": 1.6702850921607736e-05, + "loss": 2.1469, + "step": 5291 + }, + { + "epoch": 1.07, + "learning_rate": 1.6701568959064705e-05, + "loss": 2.0869, + "step": 5292 + }, + { + "epoch": 1.07, + "learning_rate": 1.670028679656761e-05, + "loss": 2.1142, + "step": 5293 + }, + { + "epoch": 1.07, + "learning_rate": 1.6699004434154718e-05, + "loss": 2.1448, + "step": 5294 + }, + { + "epoch": 1.07, + "learning_rate": 1.6697721871864286e-05, + "loss": 2.0761, + "step": 5295 + }, + { + "epoch": 1.08, + "learning_rate": 1.669643910973458e-05, + "loss": 2.0854, + "step": 5296 + }, + { + "epoch": 1.08, + "learning_rate": 1.6695156147803873e-05, + "loss": 2.1549, + "step": 5297 + }, + { + "epoch": 1.08, + "learning_rate": 1.6693872986110444e-05, + "loss": 2.1122, + "step": 5298 + }, + { + "epoch": 1.08, + "learning_rate": 1.6692589624692586e-05, + "loss": 2.2073, + "step": 5299 + }, + { + "epoch": 1.08, + "learning_rate": 1.6691306063588583e-05, + "loss": 2.1783, + "step": 5300 + }, + { + "epoch": 1.08, + "learning_rate": 1.6690022302836733e-05, + "loss": 2.1885, + "step": 5301 + }, + { + "epoch": 1.08, + "learning_rate": 1.668873834247535e-05, + "loss": 2.1952, + "step": 5302 + }, + { + "epoch": 1.08, + "learning_rate": 1.6687454182542725e-05, + "loss": 2.1358, + "step": 5303 + }, + { + "epoch": 1.08, + "learning_rate": 1.668616982307719e-05, + "loss": 2.2274, + "step": 5304 + }, + { + "epoch": 1.08, + "learning_rate": 1.6684885264117056e-05, + "loss": 2.1166, + "step": 5305 + }, + { + "epoch": 1.08, + "learning_rate": 1.6683600505700656e-05, + "loss": 2.1621, + "step": 5306 + }, + { + "epoch": 1.08, + "learning_rate": 1.668231554786632e-05, + "loss": 2.1664, + "step": 5307 + }, + { + "epoch": 1.08, + "learning_rate": 1.6681030390652385e-05, + "loss": 2.104, + "step": 5308 + }, + { + "epoch": 1.08, + "learning_rate": 1.66797450340972e-05, + "loss": 2.0179, + "step": 5309 + }, + { + "epoch": 1.08, + "learning_rate": 1.6678459478239116e-05, + "loss": 2.0149, + "step": 5310 + }, + { + "epoch": 1.08, + "learning_rate": 1.6677173723116492e-05, + "loss": 2.0957, + "step": 5311 + }, + { + "epoch": 1.08, + "learning_rate": 1.6675887768767688e-05, + "loss": 2.1443, + "step": 5312 + }, + { + "epoch": 1.08, + "learning_rate": 1.667460161523107e-05, + "loss": 2.0604, + "step": 5313 + }, + { + "epoch": 1.08, + "learning_rate": 1.6673315262545017e-05, + "loss": 2.0648, + "step": 5314 + }, + { + "epoch": 1.08, + "learning_rate": 1.6672028710747913e-05, + "loss": 2.1015, + "step": 5315 + }, + { + "epoch": 1.08, + "learning_rate": 1.6670741959878134e-05, + "loss": 2.0833, + "step": 5316 + }, + { + "epoch": 1.08, + "learning_rate": 1.666945500997408e-05, + "loss": 2.0528, + "step": 5317 + }, + { + "epoch": 1.08, + "learning_rate": 1.6668167861074152e-05, + "loss": 2.1126, + "step": 5318 + }, + { + "epoch": 1.08, + "learning_rate": 1.666688051321675e-05, + "loss": 2.2437, + "step": 5319 + }, + { + "epoch": 1.08, + "learning_rate": 1.6665592966440286e-05, + "loss": 2.1134, + "step": 5320 + }, + { + "epoch": 1.08, + "learning_rate": 1.6664305220783176e-05, + "loss": 2.1732, + "step": 5321 + }, + { + "epoch": 1.08, + "learning_rate": 1.666301727628384e-05, + "loss": 2.21, + "step": 5322 + }, + { + "epoch": 1.08, + "learning_rate": 1.6661729132980713e-05, + "loss": 2.1902, + "step": 5323 + }, + { + "epoch": 1.08, + "learning_rate": 1.6660440790912224e-05, + "loss": 2.2127, + "step": 5324 + }, + { + "epoch": 1.08, + "learning_rate": 1.6659152250116815e-05, + "loss": 2.146, + "step": 5325 + }, + { + "epoch": 1.08, + "learning_rate": 1.6657863510632925e-05, + "loss": 2.1073, + "step": 5326 + }, + { + "epoch": 1.08, + "learning_rate": 1.665657457249902e-05, + "loss": 2.0346, + "step": 5327 + }, + { + "epoch": 1.08, + "learning_rate": 1.6655285435753544e-05, + "loss": 2.0687, + "step": 5328 + }, + { + "epoch": 1.08, + "learning_rate": 1.665399610043497e-05, + "loss": 1.9825, + "step": 5329 + }, + { + "epoch": 1.08, + "learning_rate": 1.6652706566581765e-05, + "loss": 2.1233, + "step": 5330 + }, + { + "epoch": 1.08, + "learning_rate": 1.6651416834232404e-05, + "loss": 2.1076, + "step": 5331 + }, + { + "epoch": 1.08, + "learning_rate": 1.6650126903425368e-05, + "loss": 2.0591, + "step": 5332 + }, + { + "epoch": 1.08, + "learning_rate": 1.6648836774199146e-05, + "loss": 2.1627, + "step": 5333 + }, + { + "epoch": 1.08, + "learning_rate": 1.664754644659223e-05, + "loss": 2.1054, + "step": 5334 + }, + { + "epoch": 1.08, + "learning_rate": 1.6646255920643123e-05, + "loss": 2.0749, + "step": 5335 + }, + { + "epoch": 1.08, + "learning_rate": 1.6644965196390327e-05, + "loss": 2.139, + "step": 5336 + }, + { + "epoch": 1.08, + "learning_rate": 1.6643674273872352e-05, + "loss": 2.1342, + "step": 5337 + }, + { + "epoch": 1.08, + "learning_rate": 1.6642383153127716e-05, + "loss": 2.2092, + "step": 5338 + }, + { + "epoch": 1.08, + "learning_rate": 1.6641091834194948e-05, + "loss": 2.1909, + "step": 5339 + }, + { + "epoch": 1.08, + "learning_rate": 1.663980031711257e-05, + "loss": 2.159, + "step": 5340 + }, + { + "epoch": 1.08, + "learning_rate": 1.6638508601919117e-05, + "loss": 2.1694, + "step": 5341 + }, + { + "epoch": 1.08, + "learning_rate": 1.6637216688653132e-05, + "loss": 2.2117, + "step": 5342 + }, + { + "epoch": 1.08, + "learning_rate": 1.6635924577353164e-05, + "loss": 2.0323, + "step": 5343 + }, + { + "epoch": 1.08, + "learning_rate": 1.663463226805776e-05, + "loss": 2.156, + "step": 5344 + }, + { + "epoch": 1.08, + "learning_rate": 1.6633339760805483e-05, + "loss": 2.1235, + "step": 5345 + }, + { + "epoch": 1.09, + "learning_rate": 1.6632047055634894e-05, + "loss": 2.1782, + "step": 5346 + }, + { + "epoch": 1.09, + "learning_rate": 1.663075415258457e-05, + "loss": 2.1709, + "step": 5347 + }, + { + "epoch": 1.09, + "learning_rate": 1.662946105169308e-05, + "loss": 2.1245, + "step": 5348 + }, + { + "epoch": 1.09, + "learning_rate": 1.6628167752999006e-05, + "loss": 2.0602, + "step": 5349 + }, + { + "epoch": 1.09, + "learning_rate": 1.662687425654094e-05, + "loss": 2.1666, + "step": 5350 + }, + { + "epoch": 1.09, + "learning_rate": 1.6625580562357473e-05, + "loss": 2.1508, + "step": 5351 + }, + { + "epoch": 1.09, + "learning_rate": 1.662428667048721e-05, + "loss": 2.1443, + "step": 5352 + }, + { + "epoch": 1.09, + "learning_rate": 1.662299258096875e-05, + "loss": 2.1651, + "step": 5353 + }, + { + "epoch": 1.09, + "learning_rate": 1.662169829384071e-05, + "loss": 2.1188, + "step": 5354 + }, + { + "epoch": 1.09, + "learning_rate": 1.6620403809141707e-05, + "loss": 2.1173, + "step": 5355 + }, + { + "epoch": 1.09, + "learning_rate": 1.6619109126910356e-05, + "loss": 2.1002, + "step": 5356 + }, + { + "epoch": 1.09, + "learning_rate": 1.66178142471853e-05, + "loss": 2.143, + "step": 5357 + }, + { + "epoch": 1.09, + "learning_rate": 1.6616519170005162e-05, + "loss": 2.1541, + "step": 5358 + }, + { + "epoch": 1.09, + "learning_rate": 1.661522389540859e-05, + "loss": 2.102, + "step": 5359 + }, + { + "epoch": 1.09, + "learning_rate": 1.661392842343423e-05, + "loss": 2.0955, + "step": 5360 + }, + { + "epoch": 1.09, + "learning_rate": 1.6612632754120733e-05, + "loss": 2.1577, + "step": 5361 + }, + { + "epoch": 1.09, + "learning_rate": 1.661133688750676e-05, + "loss": 2.1666, + "step": 5362 + }, + { + "epoch": 1.09, + "learning_rate": 1.6610040823630973e-05, + "loss": 2.0897, + "step": 5363 + }, + { + "epoch": 1.09, + "learning_rate": 1.6608744562532046e-05, + "loss": 2.0644, + "step": 5364 + }, + { + "epoch": 1.09, + "learning_rate": 1.660744810424865e-05, + "loss": 2.1438, + "step": 5365 + }, + { + "epoch": 1.09, + "learning_rate": 1.6606151448819475e-05, + "loss": 2.2284, + "step": 5366 + }, + { + "epoch": 1.09, + "learning_rate": 1.66048545962832e-05, + "loss": 2.1387, + "step": 5367 + }, + { + "epoch": 1.09, + "learning_rate": 1.6603557546678527e-05, + "loss": 2.0748, + "step": 5368 + }, + { + "epoch": 1.09, + "learning_rate": 1.660226030004415e-05, + "loss": 2.1057, + "step": 5369 + }, + { + "epoch": 1.09, + "learning_rate": 1.6600962856418782e-05, + "loss": 2.1681, + "step": 5370 + }, + { + "epoch": 1.09, + "learning_rate": 1.659966521584113e-05, + "loss": 2.1367, + "step": 5371 + }, + { + "epoch": 1.09, + "learning_rate": 1.6598367378349912e-05, + "loss": 2.1354, + "step": 5372 + }, + { + "epoch": 1.09, + "learning_rate": 1.659706934398385e-05, + "loss": 2.1836, + "step": 5373 + }, + { + "epoch": 1.09, + "learning_rate": 1.6595771112781673e-05, + "loss": 2.1368, + "step": 5374 + }, + { + "epoch": 1.09, + "learning_rate": 1.6594472684782118e-05, + "loss": 2.0806, + "step": 5375 + }, + { + "epoch": 1.09, + "learning_rate": 1.659317406002393e-05, + "loss": 2.0512, + "step": 5376 + }, + { + "epoch": 1.09, + "learning_rate": 1.6591875238545848e-05, + "loss": 2.108, + "step": 5377 + }, + { + "epoch": 1.09, + "learning_rate": 1.659057622038663e-05, + "loss": 2.0711, + "step": 5378 + }, + { + "epoch": 1.09, + "learning_rate": 1.6589277005585035e-05, + "loss": 2.0981, + "step": 5379 + }, + { + "epoch": 1.09, + "learning_rate": 1.6587977594179828e-05, + "loss": 2.1433, + "step": 5380 + }, + { + "epoch": 1.09, + "learning_rate": 1.6586677986209773e-05, + "loss": 2.1281, + "step": 5381 + }, + { + "epoch": 1.09, + "learning_rate": 1.6585378181713655e-05, + "loss": 2.1585, + "step": 5382 + }, + { + "epoch": 1.09, + "learning_rate": 1.6584078180730247e-05, + "loss": 2.1426, + "step": 5383 + }, + { + "epoch": 1.09, + "learning_rate": 1.6582777983298343e-05, + "loss": 2.1691, + "step": 5384 + }, + { + "epoch": 1.09, + "learning_rate": 1.6581477589456737e-05, + "loss": 2.085, + "step": 5385 + }, + { + "epoch": 1.09, + "learning_rate": 1.6580176999244228e-05, + "loss": 2.049, + "step": 5386 + }, + { + "epoch": 1.09, + "learning_rate": 1.6578876212699618e-05, + "loss": 2.0944, + "step": 5387 + }, + { + "epoch": 1.09, + "learning_rate": 1.6577575229861723e-05, + "loss": 2.1088, + "step": 5388 + }, + { + "epoch": 1.09, + "learning_rate": 1.6576274050769364e-05, + "loss": 2.0693, + "step": 5389 + }, + { + "epoch": 1.09, + "learning_rate": 1.6574972675461348e-05, + "loss": 2.1511, + "step": 5390 + }, + { + "epoch": 1.09, + "learning_rate": 1.6573671103976524e-05, + "loss": 2.058, + "step": 5391 + }, + { + "epoch": 1.09, + "learning_rate": 1.6572369336353716e-05, + "loss": 2.1535, + "step": 5392 + }, + { + "epoch": 1.09, + "learning_rate": 1.6571067372631767e-05, + "loss": 2.0851, + "step": 5393 + }, + { + "epoch": 1.09, + "learning_rate": 1.656976521284952e-05, + "loss": 2.152, + "step": 5394 + }, + { + "epoch": 1.1, + "learning_rate": 1.656846285704583e-05, + "loss": 2.116, + "step": 5395 + }, + { + "epoch": 1.1, + "learning_rate": 1.656716030525956e-05, + "loss": 2.1519, + "step": 5396 + }, + { + "epoch": 1.1, + "learning_rate": 1.6565857557529567e-05, + "loss": 2.1093, + "step": 5397 + }, + { + "epoch": 1.1, + "learning_rate": 1.6564554613894723e-05, + "loss": 2.051, + "step": 5398 + }, + { + "epoch": 1.1, + "learning_rate": 1.6563251474393908e-05, + "loss": 2.069, + "step": 5399 + }, + { + "epoch": 1.1, + "learning_rate": 1.6561948139065994e-05, + "loss": 2.0926, + "step": 5400 + }, + { + "epoch": 1.1, + "learning_rate": 1.656064460794988e-05, + "loss": 2.1797, + "step": 5401 + }, + { + "epoch": 1.1, + "learning_rate": 1.6559340881084453e-05, + "loss": 2.1404, + "step": 5402 + }, + { + "epoch": 1.1, + "learning_rate": 1.6558036958508614e-05, + "loss": 2.1648, + "step": 5403 + }, + { + "epoch": 1.1, + "learning_rate": 1.6556732840261266e-05, + "loss": 2.1282, + "step": 5404 + }, + { + "epoch": 1.1, + "learning_rate": 1.6555428526381324e-05, + "loss": 2.1748, + "step": 5405 + }, + { + "epoch": 1.1, + "learning_rate": 1.6554124016907696e-05, + "loss": 2.0489, + "step": 5406 + }, + { + "epoch": 1.1, + "learning_rate": 1.6552819311879314e-05, + "loss": 2.0735, + "step": 5407 + }, + { + "epoch": 1.1, + "learning_rate": 1.6551514411335104e-05, + "loss": 2.1249, + "step": 5408 + }, + { + "epoch": 1.1, + "learning_rate": 1.6550209315313996e-05, + "loss": 2.113, + "step": 5409 + }, + { + "epoch": 1.1, + "learning_rate": 1.6548904023854932e-05, + "loss": 2.0839, + "step": 5410 + }, + { + "epoch": 1.1, + "learning_rate": 1.654759853699686e-05, + "loss": 2.0335, + "step": 5411 + }, + { + "epoch": 1.1, + "learning_rate": 1.654629285477873e-05, + "loss": 2.1436, + "step": 5412 + }, + { + "epoch": 1.1, + "learning_rate": 1.6544986977239502e-05, + "loss": 2.1401, + "step": 5413 + }, + { + "epoch": 1.1, + "learning_rate": 1.654368090441814e-05, + "loss": 2.124, + "step": 5414 + }, + { + "epoch": 1.1, + "learning_rate": 1.6542374636353605e-05, + "loss": 2.0845, + "step": 5415 + }, + { + "epoch": 1.1, + "learning_rate": 1.6541068173084876e-05, + "loss": 2.1843, + "step": 5416 + }, + { + "epoch": 1.1, + "learning_rate": 1.653976151465094e-05, + "loss": 2.2124, + "step": 5417 + }, + { + "epoch": 1.1, + "learning_rate": 1.6538454661090776e-05, + "loss": 2.149, + "step": 5418 + }, + { + "epoch": 1.1, + "learning_rate": 1.6537147612443384e-05, + "loss": 2.1685, + "step": 5419 + }, + { + "epoch": 1.1, + "learning_rate": 1.6535840368747755e-05, + "loss": 2.1893, + "step": 5420 + }, + { + "epoch": 1.1, + "learning_rate": 1.653453293004289e-05, + "loss": 2.1827, + "step": 5421 + }, + { + "epoch": 1.1, + "learning_rate": 1.653322529636781e-05, + "loss": 2.1509, + "step": 5422 + }, + { + "epoch": 1.1, + "learning_rate": 1.6531917467761528e-05, + "loss": 2.1577, + "step": 5423 + }, + { + "epoch": 1.1, + "learning_rate": 1.6530609444263057e-05, + "loss": 2.1047, + "step": 5424 + }, + { + "epoch": 1.1, + "learning_rate": 1.6529301225911433e-05, + "loss": 2.1416, + "step": 5425 + }, + { + "epoch": 1.1, + "learning_rate": 1.6527992812745687e-05, + "loss": 2.078, + "step": 5426 + }, + { + "epoch": 1.1, + "learning_rate": 1.6526684204804857e-05, + "loss": 2.1585, + "step": 5427 + }, + { + "epoch": 1.1, + "learning_rate": 1.6525375402127988e-05, + "loss": 2.1614, + "step": 5428 + }, + { + "epoch": 1.1, + "learning_rate": 1.652406640475413e-05, + "loss": 2.1152, + "step": 5429 + }, + { + "epoch": 1.1, + "learning_rate": 1.6522757212722346e-05, + "loss": 2.1145, + "step": 5430 + }, + { + "epoch": 1.1, + "learning_rate": 1.6521447826071686e-05, + "loss": 2.1078, + "step": 5431 + }, + { + "epoch": 1.1, + "learning_rate": 1.6520138244841225e-05, + "loss": 2.1196, + "step": 5432 + }, + { + "epoch": 1.1, + "learning_rate": 1.651882846907004e-05, + "loss": 2.0786, + "step": 5433 + }, + { + "epoch": 1.1, + "learning_rate": 1.6517518498797206e-05, + "loss": 2.1998, + "step": 5434 + }, + { + "epoch": 1.1, + "learning_rate": 1.6516208334061807e-05, + "loss": 2.1496, + "step": 5435 + }, + { + "epoch": 1.1, + "learning_rate": 1.651489797490294e-05, + "loss": 2.1842, + "step": 5436 + }, + { + "epoch": 1.1, + "learning_rate": 1.65135874213597e-05, + "loss": 2.1439, + "step": 5437 + }, + { + "epoch": 1.1, + "learning_rate": 1.6512276673471182e-05, + "loss": 2.1908, + "step": 5438 + }, + { + "epoch": 1.1, + "learning_rate": 1.6510965731276506e-05, + "loss": 2.1836, + "step": 5439 + }, + { + "epoch": 1.1, + "learning_rate": 1.6509654594814782e-05, + "loss": 2.1581, + "step": 5440 + }, + { + "epoch": 1.1, + "learning_rate": 1.650834326412513e-05, + "loss": 2.1224, + "step": 5441 + }, + { + "epoch": 1.1, + "learning_rate": 1.6507031739246674e-05, + "loss": 2.1676, + "step": 5442 + }, + { + "epoch": 1.1, + "learning_rate": 1.6505720020218553e-05, + "loss": 2.2067, + "step": 5443 + }, + { + "epoch": 1.11, + "learning_rate": 1.6504408107079895e-05, + "loss": 2.1188, + "step": 5444 + }, + { + "epoch": 1.11, + "learning_rate": 1.650309599986985e-05, + "loss": 2.1488, + "step": 5445 + }, + { + "epoch": 1.11, + "learning_rate": 1.6501783698627566e-05, + "loss": 2.1253, + "step": 5446 + }, + { + "epoch": 1.11, + "learning_rate": 1.6500471203392194e-05, + "loss": 2.1516, + "step": 5447 + }, + { + "epoch": 1.11, + "learning_rate": 1.64991585142029e-05, + "loss": 2.1387, + "step": 5448 + }, + { + "epoch": 1.11, + "learning_rate": 1.649784563109885e-05, + "loss": 2.1159, + "step": 5449 + }, + { + "epoch": 1.11, + "learning_rate": 1.6496532554119214e-05, + "loss": 2.1583, + "step": 5450 + }, + { + "epoch": 1.11, + "learning_rate": 1.649521928330317e-05, + "loss": 2.1705, + "step": 5451 + }, + { + "epoch": 1.11, + "learning_rate": 1.6493905818689905e-05, + "loss": 2.1519, + "step": 5452 + }, + { + "epoch": 1.11, + "learning_rate": 1.6492592160318604e-05, + "loss": 2.0968, + "step": 5453 + }, + { + "epoch": 1.11, + "learning_rate": 1.6491278308228468e-05, + "loss": 2.1674, + "step": 5454 + }, + { + "epoch": 1.11, + "learning_rate": 1.6489964262458693e-05, + "loss": 2.0981, + "step": 5455 + }, + { + "epoch": 1.11, + "learning_rate": 1.648865002304849e-05, + "loss": 2.2443, + "step": 5456 + }, + { + "epoch": 1.11, + "learning_rate": 1.648733559003707e-05, + "loss": 2.1896, + "step": 5457 + }, + { + "epoch": 1.11, + "learning_rate": 1.6486020963463652e-05, + "loss": 2.0917, + "step": 5458 + }, + { + "epoch": 1.11, + "learning_rate": 1.6484706143367463e-05, + "loss": 2.1552, + "step": 5459 + }, + { + "epoch": 1.11, + "learning_rate": 1.6483391129787725e-05, + "loss": 2.1613, + "step": 5460 + }, + { + "epoch": 1.11, + "learning_rate": 1.6482075922763687e-05, + "loss": 2.1769, + "step": 5461 + }, + { + "epoch": 1.11, + "learning_rate": 1.6480760522334577e-05, + "loss": 2.1671, + "step": 5462 + }, + { + "epoch": 1.11, + "learning_rate": 1.6479444928539652e-05, + "loss": 2.2551, + "step": 5463 + }, + { + "epoch": 1.11, + "learning_rate": 1.647812914141816e-05, + "loss": 2.1557, + "step": 5464 + }, + { + "epoch": 1.11, + "learning_rate": 1.6476813161009366e-05, + "loss": 2.1836, + "step": 5465 + }, + { + "epoch": 1.11, + "learning_rate": 1.6475496987352528e-05, + "loss": 2.219, + "step": 5466 + }, + { + "epoch": 1.11, + "learning_rate": 1.6474180620486915e-05, + "loss": 1.9496, + "step": 5467 + }, + { + "epoch": 1.11, + "learning_rate": 1.6472864060451814e-05, + "loss": 2.143, + "step": 5468 + }, + { + "epoch": 1.11, + "learning_rate": 1.64715473072865e-05, + "loss": 2.0637, + "step": 5469 + }, + { + "epoch": 1.11, + "learning_rate": 1.647023036103026e-05, + "loss": 2.1627, + "step": 5470 + }, + { + "epoch": 1.11, + "learning_rate": 1.646891322172239e-05, + "loss": 2.1988, + "step": 5471 + }, + { + "epoch": 1.11, + "learning_rate": 1.6467595889402187e-05, + "loss": 2.0158, + "step": 5472 + }, + { + "epoch": 1.11, + "learning_rate": 1.646627836410896e-05, + "loss": 2.1582, + "step": 5473 + }, + { + "epoch": 1.11, + "learning_rate": 1.6464960645882012e-05, + "loss": 2.1594, + "step": 5474 + }, + { + "epoch": 1.11, + "learning_rate": 1.646364273476067e-05, + "loss": 2.2082, + "step": 5475 + }, + { + "epoch": 1.11, + "learning_rate": 1.646232463078425e-05, + "loss": 2.0915, + "step": 5476 + }, + { + "epoch": 1.11, + "learning_rate": 1.646100633399208e-05, + "loss": 2.216, + "step": 5477 + }, + { + "epoch": 1.11, + "learning_rate": 1.64596878444235e-05, + "loss": 2.0412, + "step": 5478 + }, + { + "epoch": 1.11, + "learning_rate": 1.645836916211784e-05, + "loss": 2.1754, + "step": 5479 + }, + { + "epoch": 1.11, + "learning_rate": 1.6457050287114454e-05, + "loss": 2.1483, + "step": 5480 + }, + { + "epoch": 1.11, + "learning_rate": 1.6455731219452688e-05, + "loss": 2.0604, + "step": 5481 + }, + { + "epoch": 1.11, + "learning_rate": 1.64544119591719e-05, + "loss": 2.1322, + "step": 5482 + }, + { + "epoch": 1.11, + "learning_rate": 1.6453092506311455e-05, + "loss": 2.0747, + "step": 5483 + }, + { + "epoch": 1.11, + "learning_rate": 1.645177286091072e-05, + "loss": 2.0894, + "step": 5484 + }, + { + "epoch": 1.11, + "learning_rate": 1.6450453023009066e-05, + "loss": 2.0957, + "step": 5485 + }, + { + "epoch": 1.11, + "learning_rate": 1.644913299264588e-05, + "loss": 2.0818, + "step": 5486 + }, + { + "epoch": 1.11, + "learning_rate": 1.644781276986054e-05, + "loss": 2.1193, + "step": 5487 + }, + { + "epoch": 1.11, + "learning_rate": 1.6446492354692442e-05, + "loss": 2.0765, + "step": 5488 + }, + { + "epoch": 1.11, + "learning_rate": 1.644517174718098e-05, + "loss": 2.1415, + "step": 5489 + }, + { + "epoch": 1.11, + "learning_rate": 1.644385094736556e-05, + "loss": 2.0749, + "step": 5490 + }, + { + "epoch": 1.11, + "learning_rate": 1.6442529955285587e-05, + "loss": 2.2167, + "step": 5491 + }, + { + "epoch": 1.11, + "learning_rate": 1.6441208770980478e-05, + "loss": 2.1781, + "step": 5492 + }, + { + "epoch": 1.11, + "learning_rate": 1.6439887394489657e-05, + "loss": 2.156, + "step": 5493 + }, + { + "epoch": 1.12, + "learning_rate": 1.643856582585254e-05, + "loss": 2.2042, + "step": 5494 + }, + { + "epoch": 1.12, + "learning_rate": 1.6437244065108563e-05, + "loss": 2.1148, + "step": 5495 + }, + { + "epoch": 1.12, + "learning_rate": 1.6435922112297168e-05, + "loss": 2.1559, + "step": 5496 + }, + { + "epoch": 1.12, + "learning_rate": 1.643459996745779e-05, + "loss": 2.0909, + "step": 5497 + }, + { + "epoch": 1.12, + "learning_rate": 1.6433277630629888e-05, + "loss": 2.1413, + "step": 5498 + }, + { + "epoch": 1.12, + "learning_rate": 1.6431955101852905e-05, + "loss": 2.1529, + "step": 5499 + }, + { + "epoch": 1.12, + "learning_rate": 1.6430632381166306e-05, + "loss": 2.1776, + "step": 5500 + }, + { + "epoch": 1.12, + "learning_rate": 1.6429309468609557e-05, + "loss": 2.1265, + "step": 5501 + }, + { + "epoch": 1.12, + "learning_rate": 1.6427986364222132e-05, + "loss": 2.155, + "step": 5502 + }, + { + "epoch": 1.12, + "learning_rate": 1.6426663068043507e-05, + "loss": 2.1278, + "step": 5503 + }, + { + "epoch": 1.12, + "learning_rate": 1.6425339580113164e-05, + "loss": 2.2129, + "step": 5504 + }, + { + "epoch": 1.12, + "learning_rate": 1.642401590047059e-05, + "loss": 2.0934, + "step": 5505 + }, + { + "epoch": 1.12, + "learning_rate": 1.642269202915528e-05, + "loss": 2.1936, + "step": 5506 + }, + { + "epoch": 1.12, + "learning_rate": 1.6421367966206737e-05, + "loss": 2.1277, + "step": 5507 + }, + { + "epoch": 1.12, + "learning_rate": 1.6420043711664468e-05, + "loss": 2.1101, + "step": 5508 + }, + { + "epoch": 1.12, + "learning_rate": 1.6418719265567978e-05, + "loss": 2.1228, + "step": 5509 + }, + { + "epoch": 1.12, + "learning_rate": 1.6417394627956796e-05, + "loss": 2.1229, + "step": 5510 + }, + { + "epoch": 1.12, + "learning_rate": 1.6416069798870432e-05, + "loss": 2.0954, + "step": 5511 + }, + { + "epoch": 1.12, + "learning_rate": 1.6414744778348422e-05, + "loss": 2.1133, + "step": 5512 + }, + { + "epoch": 1.12, + "learning_rate": 1.6413419566430295e-05, + "loss": 2.1534, + "step": 5513 + }, + { + "epoch": 1.12, + "learning_rate": 1.64120941631556e-05, + "loss": 2.1462, + "step": 5514 + }, + { + "epoch": 1.12, + "learning_rate": 1.6410768568563877e-05, + "loss": 2.1327, + "step": 5515 + }, + { + "epoch": 1.12, + "learning_rate": 1.6409442782694677e-05, + "loss": 2.1298, + "step": 5516 + }, + { + "epoch": 1.12, + "learning_rate": 1.640811680558756e-05, + "loss": 2.1332, + "step": 5517 + }, + { + "epoch": 1.12, + "learning_rate": 1.640679063728209e-05, + "loss": 2.1327, + "step": 5518 + }, + { + "epoch": 1.12, + "learning_rate": 1.6405464277817834e-05, + "loss": 2.2041, + "step": 5519 + }, + { + "epoch": 1.12, + "learning_rate": 1.6404137727234366e-05, + "loss": 2.1513, + "step": 5520 + }, + { + "epoch": 1.12, + "learning_rate": 1.6402810985571266e-05, + "loss": 2.2539, + "step": 5521 + }, + { + "epoch": 1.12, + "learning_rate": 1.640148405286812e-05, + "loss": 2.1391, + "step": 5522 + }, + { + "epoch": 1.12, + "learning_rate": 1.640015692916452e-05, + "loss": 2.0856, + "step": 5523 + }, + { + "epoch": 1.12, + "learning_rate": 1.6398829614500064e-05, + "loss": 2.2427, + "step": 5524 + }, + { + "epoch": 1.12, + "learning_rate": 1.6397502108914355e-05, + "loss": 2.0264, + "step": 5525 + }, + { + "epoch": 1.12, + "learning_rate": 1.6396174412446996e-05, + "loss": 2.1067, + "step": 5526 + }, + { + "epoch": 1.12, + "learning_rate": 1.6394846525137612e-05, + "loss": 2.121, + "step": 5527 + }, + { + "epoch": 1.12, + "learning_rate": 1.6393518447025813e-05, + "loss": 2.199, + "step": 5528 + }, + { + "epoch": 1.12, + "learning_rate": 1.639219017815123e-05, + "loss": 2.1329, + "step": 5529 + }, + { + "epoch": 1.12, + "learning_rate": 1.6390861718553498e-05, + "loss": 2.1581, + "step": 5530 + }, + { + "epoch": 1.12, + "learning_rate": 1.6389533068272244e-05, + "loss": 2.095, + "step": 5531 + }, + { + "epoch": 1.12, + "learning_rate": 1.6388204227347117e-05, + "loss": 2.0968, + "step": 5532 + }, + { + "epoch": 1.12, + "learning_rate": 1.638687519581777e-05, + "loss": 2.1493, + "step": 5533 + }, + { + "epoch": 1.12, + "learning_rate": 1.6385545973723845e-05, + "loss": 2.1545, + "step": 5534 + }, + { + "epoch": 1.12, + "learning_rate": 1.6384216561105014e-05, + "loss": 2.1043, + "step": 5535 + }, + { + "epoch": 1.12, + "learning_rate": 1.6382886958000935e-05, + "loss": 2.186, + "step": 5536 + }, + { + "epoch": 1.12, + "learning_rate": 1.638155716445128e-05, + "loss": 2.0842, + "step": 5537 + }, + { + "epoch": 1.12, + "learning_rate": 1.638022718049573e-05, + "loss": 2.1791, + "step": 5538 + }, + { + "epoch": 1.12, + "learning_rate": 1.6378897006173967e-05, + "loss": 2.1394, + "step": 5539 + }, + { + "epoch": 1.12, + "learning_rate": 1.637756664152568e-05, + "loss": 2.1994, + "step": 5540 + }, + { + "epoch": 1.12, + "learning_rate": 1.637623608659055e-05, + "loss": 2.1887, + "step": 5541 + }, + { + "epoch": 1.12, + "learning_rate": 1.6374905341408296e-05, + "loss": 2.0928, + "step": 5542 + }, + { + "epoch": 1.13, + "learning_rate": 1.637357440601861e-05, + "loss": 2.1433, + "step": 5543 + }, + { + "epoch": 1.13, + "learning_rate": 1.6372243280461208e-05, + "loss": 2.176, + "step": 5544 + }, + { + "epoch": 1.13, + "learning_rate": 1.637091196477581e-05, + "loss": 2.1501, + "step": 5545 + }, + { + "epoch": 1.13, + "learning_rate": 1.636958045900213e-05, + "loss": 2.1043, + "step": 5546 + }, + { + "epoch": 1.13, + "learning_rate": 1.6368248763179902e-05, + "loss": 2.1341, + "step": 5547 + }, + { + "epoch": 1.13, + "learning_rate": 1.6366916877348854e-05, + "loss": 2.176, + "step": 5548 + }, + { + "epoch": 1.13, + "learning_rate": 1.6365584801548736e-05, + "loss": 2.1265, + "step": 5549 + }, + { + "epoch": 1.13, + "learning_rate": 1.6364252535819284e-05, + "loss": 2.1681, + "step": 5550 + }, + { + "epoch": 1.13, + "learning_rate": 1.6362920080200248e-05, + "loss": 2.1431, + "step": 5551 + }, + { + "epoch": 1.13, + "learning_rate": 1.636158743473139e-05, + "loss": 2.0928, + "step": 5552 + }, + { + "epoch": 1.13, + "learning_rate": 1.636025459945247e-05, + "loss": 2.1159, + "step": 5553 + }, + { + "epoch": 1.13, + "learning_rate": 1.6358921574403258e-05, + "loss": 2.0997, + "step": 5554 + }, + { + "epoch": 1.13, + "learning_rate": 1.635758835962352e-05, + "loss": 2.1064, + "step": 5555 + }, + { + "epoch": 1.13, + "learning_rate": 1.6356254955153043e-05, + "loss": 2.1514, + "step": 5556 + }, + { + "epoch": 1.13, + "learning_rate": 1.6354921361031604e-05, + "loss": 2.1782, + "step": 5557 + }, + { + "epoch": 1.13, + "learning_rate": 1.6353587577298998e-05, + "loss": 2.1715, + "step": 5558 + }, + { + "epoch": 1.13, + "learning_rate": 1.6352253603995024e-05, + "loss": 2.1123, + "step": 5559 + }, + { + "epoch": 1.13, + "learning_rate": 1.6350919441159477e-05, + "loss": 2.1219, + "step": 5560 + }, + { + "epoch": 1.13, + "learning_rate": 1.6349585088832166e-05, + "loss": 2.1581, + "step": 5561 + }, + { + "epoch": 1.13, + "learning_rate": 1.634825054705291e-05, + "loss": 2.1136, + "step": 5562 + }, + { + "epoch": 1.13, + "learning_rate": 1.634691581586152e-05, + "loss": 2.1053, + "step": 5563 + }, + { + "epoch": 1.13, + "learning_rate": 1.6345580895297827e-05, + "loss": 2.0416, + "step": 5564 + }, + { + "epoch": 1.13, + "learning_rate": 1.6344245785401653e-05, + "loss": 2.1507, + "step": 5565 + }, + { + "epoch": 1.13, + "learning_rate": 1.634291048621284e-05, + "loss": 2.0976, + "step": 5566 + }, + { + "epoch": 1.13, + "learning_rate": 1.6341574997771224e-05, + "loss": 2.1455, + "step": 5567 + }, + { + "epoch": 1.13, + "learning_rate": 1.6340239320116654e-05, + "loss": 2.134, + "step": 5568 + }, + { + "epoch": 1.13, + "learning_rate": 1.6338903453288986e-05, + "loss": 2.0622, + "step": 5569 + }, + { + "epoch": 1.13, + "learning_rate": 1.6337567397328072e-05, + "loss": 2.1537, + "step": 5570 + }, + { + "epoch": 1.13, + "learning_rate": 1.633623115227378e-05, + "loss": 2.0802, + "step": 5571 + }, + { + "epoch": 1.13, + "learning_rate": 1.6334894718165983e-05, + "loss": 2.1557, + "step": 5572 + }, + { + "epoch": 1.13, + "learning_rate": 1.6333558095044545e-05, + "loss": 2.1825, + "step": 5573 + }, + { + "epoch": 1.13, + "learning_rate": 1.6332221282949357e-05, + "loss": 2.1121, + "step": 5574 + }, + { + "epoch": 1.13, + "learning_rate": 1.63308842819203e-05, + "loss": 2.1788, + "step": 5575 + }, + { + "epoch": 1.13, + "learning_rate": 1.632954709199727e-05, + "loss": 2.1558, + "step": 5576 + }, + { + "epoch": 1.13, + "learning_rate": 1.632820971322016e-05, + "loss": 2.1046, + "step": 5577 + }, + { + "epoch": 1.13, + "learning_rate": 1.6326872145628875e-05, + "loss": 2.0921, + "step": 5578 + }, + { + "epoch": 1.13, + "learning_rate": 1.6325534389263323e-05, + "loss": 2.0918, + "step": 5579 + }, + { + "epoch": 1.13, + "learning_rate": 1.632419644416342e-05, + "loss": 2.075, + "step": 5580 + }, + { + "epoch": 1.13, + "learning_rate": 1.632285831036909e-05, + "loss": 2.1532, + "step": 5581 + }, + { + "epoch": 1.13, + "learning_rate": 1.632151998792025e-05, + "loss": 2.1113, + "step": 5582 + }, + { + "epoch": 1.13, + "learning_rate": 1.6320181476856837e-05, + "loss": 2.073, + "step": 5583 + }, + { + "epoch": 1.13, + "learning_rate": 1.631884277721879e-05, + "loss": 2.1885, + "step": 5584 + }, + { + "epoch": 1.13, + "learning_rate": 1.6317503889046045e-05, + "loss": 2.1446, + "step": 5585 + }, + { + "epoch": 1.13, + "learning_rate": 1.6316164812378556e-05, + "loss": 2.1367, + "step": 5586 + }, + { + "epoch": 1.13, + "learning_rate": 1.6314825547256275e-05, + "loss": 2.1365, + "step": 5587 + }, + { + "epoch": 1.13, + "learning_rate": 1.6313486093719158e-05, + "loss": 2.1237, + "step": 5588 + }, + { + "epoch": 1.13, + "learning_rate": 1.631214645180718e-05, + "loss": 2.0802, + "step": 5589 + }, + { + "epoch": 1.13, + "learning_rate": 1.6310806621560294e-05, + "loss": 2.1786, + "step": 5590 + }, + { + "epoch": 1.13, + "learning_rate": 1.63094666030185e-05, + "loss": 2.0974, + "step": 5591 + }, + { + "epoch": 1.14, + "learning_rate": 1.630812639622176e-05, + "loss": 2.1533, + "step": 5592 + }, + { + "epoch": 1.14, + "learning_rate": 1.630678600121007e-05, + "loss": 2.0894, + "step": 5593 + }, + { + "epoch": 1.14, + "learning_rate": 1.6305445418023426e-05, + "loss": 2.096, + "step": 5594 + }, + { + "epoch": 1.14, + "learning_rate": 1.6304104646701818e-05, + "loss": 2.0618, + "step": 5595 + }, + { + "epoch": 1.14, + "learning_rate": 1.6302763687285262e-05, + "loss": 2.0546, + "step": 5596 + }, + { + "epoch": 1.14, + "learning_rate": 1.630142253981376e-05, + "loss": 2.0737, + "step": 5597 + }, + { + "epoch": 1.14, + "learning_rate": 1.630008120432733e-05, + "loss": 2.0989, + "step": 5598 + }, + { + "epoch": 1.14, + "learning_rate": 1.6298739680865988e-05, + "loss": 2.1518, + "step": 5599 + }, + { + "epoch": 1.14, + "learning_rate": 1.6297397969469774e-05, + "loss": 2.1401, + "step": 5600 + }, + { + "epoch": 1.14, + "learning_rate": 1.6296056070178707e-05, + "loss": 2.2067, + "step": 5601 + }, + { + "epoch": 1.14, + "learning_rate": 1.629471398303283e-05, + "loss": 2.211, + "step": 5602 + }, + { + "epoch": 1.14, + "learning_rate": 1.6293371708072188e-05, + "loss": 2.1313, + "step": 5603 + }, + { + "epoch": 1.14, + "learning_rate": 1.629202924533683e-05, + "loss": 2.148, + "step": 5604 + }, + { + "epoch": 1.14, + "learning_rate": 1.6290686594866805e-05, + "loss": 2.1986, + "step": 5605 + }, + { + "epoch": 1.14, + "learning_rate": 1.6289343756702187e-05, + "loss": 2.1192, + "step": 5606 + }, + { + "epoch": 1.14, + "learning_rate": 1.628800073088303e-05, + "loss": 2.0798, + "step": 5607 + }, + { + "epoch": 1.14, + "learning_rate": 1.6286657517449412e-05, + "loss": 2.1155, + "step": 5608 + }, + { + "epoch": 1.14, + "learning_rate": 1.628531411644141e-05, + "loss": 2.1196, + "step": 5609 + }, + { + "epoch": 1.14, + "learning_rate": 1.62839705278991e-05, + "loss": 2.164, + "step": 5610 + }, + { + "epoch": 1.14, + "learning_rate": 1.6282626751862575e-05, + "loss": 2.1576, + "step": 5611 + }, + { + "epoch": 1.14, + "learning_rate": 1.628128278837193e-05, + "loss": 2.0901, + "step": 5612 + }, + { + "epoch": 1.14, + "learning_rate": 1.6279938637467273e-05, + "loss": 2.0787, + "step": 5613 + }, + { + "epoch": 1.14, + "learning_rate": 1.627859429918869e-05, + "loss": 2.1865, + "step": 5614 + }, + { + "epoch": 1.14, + "learning_rate": 1.6277249773576307e-05, + "loss": 2.1429, + "step": 5615 + }, + { + "epoch": 1.14, + "learning_rate": 1.6275905060670237e-05, + "loss": 2.158, + "step": 5616 + }, + { + "epoch": 1.14, + "learning_rate": 1.6274560160510598e-05, + "loss": 2.1765, + "step": 5617 + }, + { + "epoch": 1.14, + "learning_rate": 1.6273215073137524e-05, + "loss": 2.1114, + "step": 5618 + }, + { + "epoch": 1.14, + "learning_rate": 1.6271869798591144e-05, + "loss": 2.1388, + "step": 5619 + }, + { + "epoch": 1.14, + "learning_rate": 1.62705243369116e-05, + "loss": 2.0003, + "step": 5620 + }, + { + "epoch": 1.14, + "learning_rate": 1.626917868813903e-05, + "loss": 2.0777, + "step": 5621 + }, + { + "epoch": 1.14, + "learning_rate": 1.626783285231359e-05, + "loss": 2.128, + "step": 5622 + }, + { + "epoch": 1.14, + "learning_rate": 1.6266486829475434e-05, + "loss": 2.1974, + "step": 5623 + }, + { + "epoch": 1.14, + "learning_rate": 1.626514061966472e-05, + "loss": 2.1724, + "step": 5624 + }, + { + "epoch": 1.14, + "learning_rate": 1.626379422292162e-05, + "loss": 2.1525, + "step": 5625 + }, + { + "epoch": 1.14, + "learning_rate": 1.6262447639286307e-05, + "loss": 2.0547, + "step": 5626 + }, + { + "epoch": 1.14, + "learning_rate": 1.6261100868798952e-05, + "loss": 2.1554, + "step": 5627 + }, + { + "epoch": 1.14, + "learning_rate": 1.6259753911499742e-05, + "loss": 2.1859, + "step": 5628 + }, + { + "epoch": 1.14, + "learning_rate": 1.625840676742887e-05, + "loss": 2.1401, + "step": 5629 + }, + { + "epoch": 1.14, + "learning_rate": 1.6257059436626523e-05, + "loss": 2.1628, + "step": 5630 + }, + { + "epoch": 1.14, + "learning_rate": 1.6255711919132908e-05, + "loss": 2.1119, + "step": 5631 + }, + { + "epoch": 1.14, + "learning_rate": 1.6254364214988224e-05, + "loss": 2.1271, + "step": 5632 + }, + { + "epoch": 1.14, + "learning_rate": 1.625301632423269e-05, + "loss": 2.1246, + "step": 5633 + }, + { + "epoch": 1.14, + "learning_rate": 1.625166824690652e-05, + "loss": 2.1064, + "step": 5634 + }, + { + "epoch": 1.14, + "learning_rate": 1.6250319983049933e-05, + "loss": 2.1193, + "step": 5635 + }, + { + "epoch": 1.14, + "learning_rate": 1.6248971532703158e-05, + "loss": 2.1493, + "step": 5636 + }, + { + "epoch": 1.14, + "learning_rate": 1.6247622895906433e-05, + "loss": 2.1263, + "step": 5637 + }, + { + "epoch": 1.14, + "learning_rate": 1.6246274072699994e-05, + "loss": 2.0677, + "step": 5638 + }, + { + "epoch": 1.14, + "learning_rate": 1.6244925063124086e-05, + "loss": 2.1822, + "step": 5639 + }, + { + "epoch": 1.14, + "learning_rate": 1.624357586721896e-05, + "loss": 2.0777, + "step": 5640 + }, + { + "epoch": 1.15, + "learning_rate": 1.624222648502487e-05, + "loss": 2.0902, + "step": 5641 + }, + { + "epoch": 1.15, + "learning_rate": 1.6240876916582077e-05, + "loss": 2.0991, + "step": 5642 + }, + { + "epoch": 1.15, + "learning_rate": 1.6239527161930853e-05, + "loss": 2.075, + "step": 5643 + }, + { + "epoch": 1.15, + "learning_rate": 1.6238177221111463e-05, + "loss": 2.087, + "step": 5644 + }, + { + "epoch": 1.15, + "learning_rate": 1.6236827094164193e-05, + "loss": 2.1332, + "step": 5645 + }, + { + "epoch": 1.15, + "learning_rate": 1.623547678112932e-05, + "loss": 2.1803, + "step": 5646 + }, + { + "epoch": 1.15, + "learning_rate": 1.6234126282047134e-05, + "loss": 2.0702, + "step": 5647 + }, + { + "epoch": 1.15, + "learning_rate": 1.6232775596957935e-05, + "loss": 2.1838, + "step": 5648 + }, + { + "epoch": 1.15, + "learning_rate": 1.623142472590202e-05, + "loss": 2.0724, + "step": 5649 + }, + { + "epoch": 1.15, + "learning_rate": 1.623007366891969e-05, + "loss": 2.0283, + "step": 5650 + }, + { + "epoch": 1.15, + "learning_rate": 1.6228722426051263e-05, + "loss": 2.0706, + "step": 5651 + }, + { + "epoch": 1.15, + "learning_rate": 1.6227370997337054e-05, + "loss": 2.1001, + "step": 5652 + }, + { + "epoch": 1.15, + "learning_rate": 1.6226019382817386e-05, + "loss": 2.1113, + "step": 5653 + }, + { + "epoch": 1.15, + "learning_rate": 1.6224667582532588e-05, + "loss": 2.0394, + "step": 5654 + }, + { + "epoch": 1.15, + "learning_rate": 1.622331559652299e-05, + "loss": 2.1526, + "step": 5655 + }, + { + "epoch": 1.15, + "learning_rate": 1.622196342482893e-05, + "loss": 2.1356, + "step": 5656 + }, + { + "epoch": 1.15, + "learning_rate": 1.622061106749076e-05, + "loss": 2.155, + "step": 5657 + }, + { + "epoch": 1.15, + "learning_rate": 1.621925852454882e-05, + "loss": 2.0736, + "step": 5658 + }, + { + "epoch": 1.15, + "learning_rate": 1.6217905796043474e-05, + "loss": 2.1046, + "step": 5659 + }, + { + "epoch": 1.15, + "learning_rate": 1.621655288201508e-05, + "loss": 2.1117, + "step": 5660 + }, + { + "epoch": 1.15, + "learning_rate": 1.621519978250401e-05, + "loss": 2.1639, + "step": 5661 + }, + { + "epoch": 1.15, + "learning_rate": 1.6213846497550626e-05, + "loss": 2.1893, + "step": 5662 + }, + { + "epoch": 1.15, + "learning_rate": 1.6212493027195316e-05, + "loss": 2.0838, + "step": 5663 + }, + { + "epoch": 1.15, + "learning_rate": 1.6211139371478454e-05, + "loss": 2.279, + "step": 5664 + }, + { + "epoch": 1.15, + "learning_rate": 1.6209785530440435e-05, + "loss": 2.0879, + "step": 5665 + }, + { + "epoch": 1.15, + "learning_rate": 1.6208431504121654e-05, + "loss": 2.1217, + "step": 5666 + }, + { + "epoch": 1.15, + "learning_rate": 1.6207077292562507e-05, + "loss": 2.0933, + "step": 5667 + }, + { + "epoch": 1.15, + "learning_rate": 1.6205722895803403e-05, + "loss": 2.0998, + "step": 5668 + }, + { + "epoch": 1.15, + "learning_rate": 1.6204368313884748e-05, + "loss": 2.1011, + "step": 5669 + }, + { + "epoch": 1.15, + "learning_rate": 1.6203013546846967e-05, + "loss": 2.1937, + "step": 5670 + }, + { + "epoch": 1.15, + "learning_rate": 1.6201658594730475e-05, + "loss": 2.1582, + "step": 5671 + }, + { + "epoch": 1.15, + "learning_rate": 1.6200303457575702e-05, + "loss": 2.1353, + "step": 5672 + }, + { + "epoch": 1.15, + "learning_rate": 1.619894813542308e-05, + "loss": 2.186, + "step": 5673 + }, + { + "epoch": 1.15, + "learning_rate": 1.6197592628313045e-05, + "loss": 2.1101, + "step": 5674 + }, + { + "epoch": 1.15, + "learning_rate": 1.619623693628605e-05, + "loss": 2.0819, + "step": 5675 + }, + { + "epoch": 1.15, + "learning_rate": 1.619488105938254e-05, + "loss": 2.0971, + "step": 5676 + }, + { + "epoch": 1.15, + "learning_rate": 1.6193524997642963e-05, + "loss": 2.0866, + "step": 5677 + }, + { + "epoch": 1.15, + "learning_rate": 1.6192168751107787e-05, + "loss": 2.1232, + "step": 5678 + }, + { + "epoch": 1.15, + "learning_rate": 1.6190812319817483e-05, + "loss": 2.168, + "step": 5679 + }, + { + "epoch": 1.15, + "learning_rate": 1.6189455703812512e-05, + "loss": 2.1569, + "step": 5680 + }, + { + "epoch": 1.15, + "learning_rate": 1.618809890313336e-05, + "loss": 2.0905, + "step": 5681 + }, + { + "epoch": 1.15, + "learning_rate": 1.6186741917820505e-05, + "loss": 2.1991, + "step": 5682 + }, + { + "epoch": 1.15, + "learning_rate": 1.618538474791443e-05, + "loss": 2.2071, + "step": 5683 + }, + { + "epoch": 1.15, + "learning_rate": 1.618402739345564e-05, + "loss": 2.0941, + "step": 5684 + }, + { + "epoch": 1.15, + "learning_rate": 1.618266985448463e-05, + "loss": 2.189, + "step": 5685 + }, + { + "epoch": 1.15, + "learning_rate": 1.6181312131041902e-05, + "loss": 2.0714, + "step": 5686 + }, + { + "epoch": 1.15, + "learning_rate": 1.617995422316797e-05, + "loss": 2.1176, + "step": 5687 + }, + { + "epoch": 1.15, + "learning_rate": 1.6178596130903345e-05, + "loss": 2.1127, + "step": 5688 + }, + { + "epoch": 1.15, + "learning_rate": 1.6177237854288553e-05, + "loss": 2.1014, + "step": 5689 + }, + { + "epoch": 1.15, + "learning_rate": 1.6175879393364118e-05, + "loss": 2.08, + "step": 5690 + }, + { + "epoch": 1.16, + "learning_rate": 1.6174520748170572e-05, + "loss": 2.1324, + "step": 5691 + }, + { + "epoch": 1.16, + "learning_rate": 1.617316191874846e-05, + "loss": 2.1775, + "step": 5692 + }, + { + "epoch": 1.16, + "learning_rate": 1.6171802905138312e-05, + "loss": 2.1392, + "step": 5693 + }, + { + "epoch": 1.16, + "learning_rate": 1.6170443707380685e-05, + "loss": 2.1851, + "step": 5694 + }, + { + "epoch": 1.16, + "learning_rate": 1.6169084325516134e-05, + "loss": 2.1305, + "step": 5695 + }, + { + "epoch": 1.16, + "learning_rate": 1.616772475958522e-05, + "loss": 2.0918, + "step": 5696 + }, + { + "epoch": 1.16, + "learning_rate": 1.6166365009628497e-05, + "loss": 2.1008, + "step": 5697 + }, + { + "epoch": 1.16, + "learning_rate": 1.616500507568655e-05, + "loss": 2.1676, + "step": 5698 + }, + { + "epoch": 1.16, + "learning_rate": 1.616364495779995e-05, + "loss": 2.1514, + "step": 5699 + }, + { + "epoch": 1.16, + "learning_rate": 1.6162284656009273e-05, + "loss": 2.0989, + "step": 5700 + }, + { + "epoch": 1.16, + "learning_rate": 1.6160924170355116e-05, + "loss": 2.1863, + "step": 5701 + }, + { + "epoch": 1.16, + "learning_rate": 1.6159563500878062e-05, + "loss": 2.1432, + "step": 5702 + }, + { + "epoch": 1.16, + "learning_rate": 1.615820264761872e-05, + "loss": 2.167, + "step": 5703 + }, + { + "epoch": 1.16, + "learning_rate": 1.615684161061768e-05, + "loss": 2.055, + "step": 5704 + }, + { + "epoch": 1.16, + "learning_rate": 1.6155480389915564e-05, + "loss": 2.1383, + "step": 5705 + }, + { + "epoch": 1.16, + "learning_rate": 1.615411898555298e-05, + "loss": 2.1372, + "step": 5706 + }, + { + "epoch": 1.16, + "learning_rate": 1.6152757397570548e-05, + "loss": 2.1989, + "step": 5707 + }, + { + "epoch": 1.16, + "learning_rate": 1.6151395626008897e-05, + "loss": 2.1215, + "step": 5708 + }, + { + "epoch": 1.16, + "learning_rate": 1.6150033670908655e-05, + "loss": 2.1326, + "step": 5709 + }, + { + "epoch": 1.16, + "learning_rate": 1.6148671532310457e-05, + "loss": 2.1114, + "step": 5710 + }, + { + "epoch": 1.16, + "learning_rate": 1.6147309210254948e-05, + "loss": 2.1554, + "step": 5711 + }, + { + "epoch": 1.16, + "learning_rate": 1.614594670478278e-05, + "loss": 2.109, + "step": 5712 + }, + { + "epoch": 1.16, + "learning_rate": 1.6144584015934598e-05, + "loss": 2.1534, + "step": 5713 + }, + { + "epoch": 1.16, + "learning_rate": 1.614322114375106e-05, + "loss": 2.1513, + "step": 5714 + }, + { + "epoch": 1.16, + "learning_rate": 1.6141858088272838e-05, + "loss": 2.114, + "step": 5715 + }, + { + "epoch": 1.16, + "learning_rate": 1.6140494849540594e-05, + "loss": 2.1435, + "step": 5716 + }, + { + "epoch": 1.16, + "learning_rate": 1.6139131427595006e-05, + "loss": 2.12, + "step": 5717 + }, + { + "epoch": 1.16, + "learning_rate": 1.6137767822476754e-05, + "loss": 2.1348, + "step": 5718 + }, + { + "epoch": 1.16, + "learning_rate": 1.613640403422652e-05, + "loss": 2.1791, + "step": 5719 + }, + { + "epoch": 1.16, + "learning_rate": 1.6135040062885003e-05, + "loss": 2.1452, + "step": 5720 + }, + { + "epoch": 1.16, + "learning_rate": 1.6133675908492895e-05, + "loss": 2.1023, + "step": 5721 + }, + { + "epoch": 1.16, + "learning_rate": 1.6132311571090895e-05, + "loss": 2.1277, + "step": 5722 + }, + { + "epoch": 1.16, + "learning_rate": 1.6130947050719716e-05, + "loss": 2.1883, + "step": 5723 + }, + { + "epoch": 1.16, + "learning_rate": 1.6129582347420067e-05, + "loss": 2.1806, + "step": 5724 + }, + { + "epoch": 1.16, + "learning_rate": 1.6128217461232675e-05, + "loss": 2.1256, + "step": 5725 + }, + { + "epoch": 1.16, + "learning_rate": 1.612685239219825e-05, + "loss": 2.1202, + "step": 5726 + }, + { + "epoch": 1.16, + "learning_rate": 1.612548714035753e-05, + "loss": 2.0866, + "step": 5727 + }, + { + "epoch": 1.16, + "learning_rate": 1.6124121705751258e-05, + "loss": 2.1879, + "step": 5728 + }, + { + "epoch": 1.16, + "learning_rate": 1.6122756088420154e-05, + "loss": 2.213, + "step": 5729 + }, + { + "epoch": 1.16, + "learning_rate": 1.612139028840498e-05, + "loss": 2.0943, + "step": 5730 + }, + { + "epoch": 1.16, + "learning_rate": 1.6120024305746484e-05, + "loss": 2.1856, + "step": 5731 + }, + { + "epoch": 1.16, + "learning_rate": 1.6118658140485416e-05, + "loss": 2.1592, + "step": 5732 + }, + { + "epoch": 1.16, + "learning_rate": 1.6117291792662547e-05, + "loss": 2.1422, + "step": 5733 + }, + { + "epoch": 1.16, + "learning_rate": 1.611592526231864e-05, + "loss": 2.1551, + "step": 5734 + }, + { + "epoch": 1.16, + "learning_rate": 1.611455854949447e-05, + "loss": 2.0591, + "step": 5735 + }, + { + "epoch": 1.16, + "learning_rate": 1.611319165423081e-05, + "loss": 2.0865, + "step": 5736 + }, + { + "epoch": 1.16, + "learning_rate": 1.611182457656845e-05, + "loss": 2.1965, + "step": 5737 + }, + { + "epoch": 1.16, + "learning_rate": 1.611045731654818e-05, + "loss": 2.1731, + "step": 5738 + }, + { + "epoch": 1.16, + "learning_rate": 1.6109089874210787e-05, + "loss": 2.0728, + "step": 5739 + }, + { + "epoch": 1.17, + "learning_rate": 1.610772224959708e-05, + "loss": 2.2441, + "step": 5740 + }, + { + "epoch": 1.17, + "learning_rate": 1.610635444274786e-05, + "loss": 2.146, + "step": 5741 + }, + { + "epoch": 1.17, + "learning_rate": 1.6104986453703943e-05, + "loss": 2.1247, + "step": 5742 + }, + { + "epoch": 1.17, + "learning_rate": 1.6103618282506137e-05, + "loss": 2.0992, + "step": 5743 + }, + { + "epoch": 1.17, + "learning_rate": 1.6102249929195272e-05, + "loss": 2.0639, + "step": 5744 + }, + { + "epoch": 1.17, + "learning_rate": 1.610088139381217e-05, + "loss": 2.162, + "step": 5745 + }, + { + "epoch": 1.17, + "learning_rate": 1.6099512676397665e-05, + "loss": 2.116, + "step": 5746 + }, + { + "epoch": 1.17, + "learning_rate": 1.60981437769926e-05, + "loss": 2.1678, + "step": 5747 + }, + { + "epoch": 1.17, + "learning_rate": 1.6096774695637812e-05, + "loss": 2.0852, + "step": 5748 + }, + { + "epoch": 1.17, + "learning_rate": 1.609540543237416e-05, + "loss": 2.021, + "step": 5749 + }, + { + "epoch": 1.17, + "learning_rate": 1.609403598724249e-05, + "loss": 2.1664, + "step": 5750 + }, + { + "epoch": 1.17, + "learning_rate": 1.6092666360283654e-05, + "loss": 2.2147, + "step": 5751 + }, + { + "epoch": 1.17, + "learning_rate": 1.6091296551538536e-05, + "loss": 2.1151, + "step": 5752 + }, + { + "epoch": 1.17, + "learning_rate": 1.6089926561047998e-05, + "loss": 2.1443, + "step": 5753 + }, + { + "epoch": 1.17, + "learning_rate": 1.6088556388852917e-05, + "loss": 2.0878, + "step": 5754 + }, + { + "epoch": 1.17, + "learning_rate": 1.608718603499417e-05, + "loss": 2.0891, + "step": 5755 + }, + { + "epoch": 1.17, + "learning_rate": 1.608581549951265e-05, + "loss": 2.091, + "step": 5756 + }, + { + "epoch": 1.17, + "learning_rate": 1.6084444782449248e-05, + "loss": 2.0707, + "step": 5757 + }, + { + "epoch": 1.17, + "learning_rate": 1.6083073883844858e-05, + "loss": 2.1791, + "step": 5758 + }, + { + "epoch": 1.17, + "learning_rate": 1.6081702803740393e-05, + "loss": 2.1176, + "step": 5759 + }, + { + "epoch": 1.17, + "learning_rate": 1.6080331542176754e-05, + "loss": 2.119, + "step": 5760 + }, + { + "epoch": 1.17, + "learning_rate": 1.6078960099194856e-05, + "loss": 2.1064, + "step": 5761 + }, + { + "epoch": 1.17, + "learning_rate": 1.6077588474835622e-05, + "loss": 2.1581, + "step": 5762 + }, + { + "epoch": 1.17, + "learning_rate": 1.6076216669139973e-05, + "loss": 2.2495, + "step": 5763 + }, + { + "epoch": 1.17, + "learning_rate": 1.6074844682148844e-05, + "loss": 2.0928, + "step": 5764 + }, + { + "epoch": 1.17, + "learning_rate": 1.607347251390316e-05, + "loss": 2.1005, + "step": 5765 + }, + { + "epoch": 1.17, + "learning_rate": 1.6072100164443876e-05, + "loss": 2.1011, + "step": 5766 + }, + { + "epoch": 1.17, + "learning_rate": 1.6070727633811934e-05, + "loss": 2.2004, + "step": 5767 + }, + { + "epoch": 1.17, + "learning_rate": 1.6069354922048282e-05, + "loss": 2.18, + "step": 5768 + }, + { + "epoch": 1.17, + "learning_rate": 1.606798202919388e-05, + "loss": 2.1603, + "step": 5769 + }, + { + "epoch": 1.17, + "learning_rate": 1.6066608955289695e-05, + "loss": 2.133, + "step": 5770 + }, + { + "epoch": 1.17, + "learning_rate": 1.606523570037669e-05, + "loss": 2.0917, + "step": 5771 + }, + { + "epoch": 1.17, + "learning_rate": 1.6063862264495836e-05, + "loss": 2.1244, + "step": 5772 + }, + { + "epoch": 1.17, + "learning_rate": 1.6062488647688122e-05, + "loss": 2.1325, + "step": 5773 + }, + { + "epoch": 1.17, + "learning_rate": 1.6061114849994526e-05, + "loss": 2.1547, + "step": 5774 + }, + { + "epoch": 1.17, + "learning_rate": 1.6059740871456035e-05, + "loss": 2.0531, + "step": 5775 + }, + { + "epoch": 1.17, + "learning_rate": 1.6058366712113652e-05, + "loss": 2.1709, + "step": 5776 + }, + { + "epoch": 1.17, + "learning_rate": 1.605699237200837e-05, + "loss": 2.084, + "step": 5777 + }, + { + "epoch": 1.17, + "learning_rate": 1.6055617851181202e-05, + "loss": 2.1071, + "step": 5778 + }, + { + "epoch": 1.17, + "learning_rate": 1.605424314967315e-05, + "loss": 2.0709, + "step": 5779 + }, + { + "epoch": 1.17, + "learning_rate": 1.6052868267525245e-05, + "loss": 2.1486, + "step": 5780 + }, + { + "epoch": 1.17, + "learning_rate": 1.6051493204778497e-05, + "loss": 2.159, + "step": 5781 + }, + { + "epoch": 1.17, + "learning_rate": 1.605011796147394e-05, + "loss": 2.1718, + "step": 5782 + }, + { + "epoch": 1.17, + "learning_rate": 1.60487425376526e-05, + "loss": 2.1752, + "step": 5783 + }, + { + "epoch": 1.17, + "learning_rate": 1.6047366933355524e-05, + "loss": 2.09, + "step": 5784 + }, + { + "epoch": 1.17, + "learning_rate": 1.6045991148623752e-05, + "loss": 2.13, + "step": 5785 + }, + { + "epoch": 1.17, + "learning_rate": 1.604461518349833e-05, + "loss": 2.1526, + "step": 5786 + }, + { + "epoch": 1.17, + "learning_rate": 1.6043239038020316e-05, + "loss": 2.0819, + "step": 5787 + }, + { + "epoch": 1.17, + "learning_rate": 1.6041862712230773e-05, + "loss": 2.053, + "step": 5788 + }, + { + "epoch": 1.18, + "learning_rate": 1.604048620617076e-05, + "loss": 2.0956, + "step": 5789 + }, + { + "epoch": 1.18, + "learning_rate": 1.603910951988135e-05, + "loss": 2.2181, + "step": 5790 + }, + { + "epoch": 1.18, + "learning_rate": 1.6037732653403623e-05, + "loss": 2.1497, + "step": 5791 + }, + { + "epoch": 1.18, + "learning_rate": 1.6036355606778653e-05, + "loss": 2.1374, + "step": 5792 + }, + { + "epoch": 1.18, + "learning_rate": 1.6034978380047534e-05, + "loss": 2.1279, + "step": 5793 + }, + { + "epoch": 1.18, + "learning_rate": 1.603360097325135e-05, + "loss": 2.17, + "step": 5794 + }, + { + "epoch": 1.18, + "learning_rate": 1.6032223386431204e-05, + "loss": 2.0915, + "step": 5795 + }, + { + "epoch": 1.18, + "learning_rate": 1.6030845619628205e-05, + "loss": 2.0969, + "step": 5796 + }, + { + "epoch": 1.18, + "learning_rate": 1.602946767288345e-05, + "loss": 2.1658, + "step": 5797 + }, + { + "epoch": 1.18, + "learning_rate": 1.602808954623806e-05, + "loss": 2.0755, + "step": 5798 + }, + { + "epoch": 1.18, + "learning_rate": 1.6026711239733147e-05, + "loss": 2.2112, + "step": 5799 + }, + { + "epoch": 1.18, + "learning_rate": 1.602533275340984e-05, + "loss": 2.1035, + "step": 5800 + }, + { + "epoch": 1.18, + "learning_rate": 1.602395408730927e-05, + "loss": 2.0668, + "step": 5801 + }, + { + "epoch": 1.18, + "learning_rate": 1.6022575241472576e-05, + "loss": 2.167, + "step": 5802 + }, + { + "epoch": 1.18, + "learning_rate": 1.6021196215940886e-05, + "loss": 2.1213, + "step": 5803 + }, + { + "epoch": 1.18, + "learning_rate": 1.601981701075536e-05, + "loss": 2.0938, + "step": 5804 + }, + { + "epoch": 1.18, + "learning_rate": 1.6018437625957135e-05, + "loss": 2.1577, + "step": 5805 + }, + { + "epoch": 1.18, + "learning_rate": 1.6017058061587377e-05, + "loss": 2.0716, + "step": 5806 + }, + { + "epoch": 1.18, + "learning_rate": 1.6015678317687245e-05, + "loss": 2.1062, + "step": 5807 + }, + { + "epoch": 1.18, + "learning_rate": 1.6014298394297907e-05, + "loss": 2.0969, + "step": 5808 + }, + { + "epoch": 1.18, + "learning_rate": 1.601291829146054e-05, + "loss": 2.1638, + "step": 5809 + }, + { + "epoch": 1.18, + "learning_rate": 1.6011538009216313e-05, + "loss": 2.1268, + "step": 5810 + }, + { + "epoch": 1.18, + "learning_rate": 1.6010157547606414e-05, + "loss": 2.1382, + "step": 5811 + }, + { + "epoch": 1.18, + "learning_rate": 1.6008776906672038e-05, + "loss": 2.0261, + "step": 5812 + }, + { + "epoch": 1.18, + "learning_rate": 1.6007396086454367e-05, + "loss": 2.1311, + "step": 5813 + }, + { + "epoch": 1.18, + "learning_rate": 1.6006015086994607e-05, + "loss": 2.1923, + "step": 5814 + }, + { + "epoch": 1.18, + "learning_rate": 1.600463390833396e-05, + "loss": 2.0837, + "step": 5815 + }, + { + "epoch": 1.18, + "learning_rate": 1.600325255051364e-05, + "loss": 2.1095, + "step": 5816 + }, + { + "epoch": 1.18, + "learning_rate": 1.6001871013574862e-05, + "loss": 2.1039, + "step": 5817 + }, + { + "epoch": 1.18, + "learning_rate": 1.6000489297558843e-05, + "loss": 2.0804, + "step": 5818 + }, + { + "epoch": 1.18, + "learning_rate": 1.5999107402506814e-05, + "loss": 2.1873, + "step": 5819 + }, + { + "epoch": 1.18, + "learning_rate": 1.599772532846e-05, + "loss": 2.1593, + "step": 5820 + }, + { + "epoch": 1.18, + "learning_rate": 1.599634307545964e-05, + "loss": 2.0931, + "step": 5821 + }, + { + "epoch": 1.18, + "learning_rate": 1.5994960643546982e-05, + "loss": 2.1991, + "step": 5822 + }, + { + "epoch": 1.18, + "learning_rate": 1.5993578032763267e-05, + "loss": 2.1514, + "step": 5823 + }, + { + "epoch": 1.18, + "learning_rate": 1.5992195243149754e-05, + "loss": 2.1166, + "step": 5824 + }, + { + "epoch": 1.18, + "learning_rate": 1.5990812274747695e-05, + "loss": 2.1792, + "step": 5825 + }, + { + "epoch": 1.18, + "learning_rate": 1.5989429127598353e-05, + "loss": 2.1221, + "step": 5826 + }, + { + "epoch": 1.18, + "learning_rate": 1.5988045801743003e-05, + "loss": 2.1588, + "step": 5827 + }, + { + "epoch": 1.18, + "learning_rate": 1.5986662297222912e-05, + "loss": 2.1672, + "step": 5828 + }, + { + "epoch": 1.18, + "learning_rate": 1.598527861407937e-05, + "loss": 2.1127, + "step": 5829 + }, + { + "epoch": 1.18, + "learning_rate": 1.5983894752353646e-05, + "loss": 2.1381, + "step": 5830 + }, + { + "epoch": 1.18, + "learning_rate": 1.5982510712087042e-05, + "loss": 2.0637, + "step": 5831 + }, + { + "epoch": 1.18, + "learning_rate": 1.5981126493320855e-05, + "loss": 2.1225, + "step": 5832 + }, + { + "epoch": 1.18, + "learning_rate": 1.5979742096096376e-05, + "loss": 2.1383, + "step": 5833 + }, + { + "epoch": 1.18, + "learning_rate": 1.597835752045492e-05, + "loss": 2.1437, + "step": 5834 + }, + { + "epoch": 1.18, + "learning_rate": 1.5976972766437796e-05, + "loss": 2.0612, + "step": 5835 + }, + { + "epoch": 1.18, + "learning_rate": 1.5975587834086317e-05, + "loss": 2.1895, + "step": 5836 + }, + { + "epoch": 1.18, + "learning_rate": 1.597420272344181e-05, + "loss": 2.1457, + "step": 5837 + }, + { + "epoch": 1.19, + "learning_rate": 1.59728174345456e-05, + "loss": 2.1388, + "step": 5838 + }, + { + "epoch": 1.19, + "learning_rate": 1.597143196743902e-05, + "loss": 2.1521, + "step": 5839 + }, + { + "epoch": 1.19, + "learning_rate": 1.5970046322163405e-05, + "loss": 2.1123, + "step": 5840 + }, + { + "epoch": 1.19, + "learning_rate": 1.59686604987601e-05, + "loss": 2.082, + "step": 5841 + }, + { + "epoch": 1.19, + "learning_rate": 1.596727449727046e-05, + "loss": 2.1582, + "step": 5842 + }, + { + "epoch": 1.19, + "learning_rate": 1.596588831773583e-05, + "loss": 2.1553, + "step": 5843 + }, + { + "epoch": 1.19, + "learning_rate": 1.5964501960197578e-05, + "loss": 2.1307, + "step": 5844 + }, + { + "epoch": 1.19, + "learning_rate": 1.5963115424697062e-05, + "loss": 2.1419, + "step": 5845 + }, + { + "epoch": 1.19, + "learning_rate": 1.5961728711275656e-05, + "loss": 2.1489, + "step": 5846 + }, + { + "epoch": 1.19, + "learning_rate": 1.5960341819974726e-05, + "loss": 2.1791, + "step": 5847 + }, + { + "epoch": 1.19, + "learning_rate": 1.5958954750835668e-05, + "loss": 2.1299, + "step": 5848 + }, + { + "epoch": 1.19, + "learning_rate": 1.5957567503899857e-05, + "loss": 2.0234, + "step": 5849 + }, + { + "epoch": 1.19, + "learning_rate": 1.5956180079208684e-05, + "loss": 2.1636, + "step": 5850 + }, + { + "epoch": 1.19, + "learning_rate": 1.595479247680355e-05, + "loss": 2.1633, + "step": 5851 + }, + { + "epoch": 1.19, + "learning_rate": 1.5953404696725853e-05, + "loss": 2.169, + "step": 5852 + }, + { + "epoch": 1.19, + "learning_rate": 1.5952016739017e-05, + "loss": 2.1421, + "step": 5853 + }, + { + "epoch": 1.19, + "learning_rate": 1.5950628603718416e-05, + "loss": 2.0986, + "step": 5854 + }, + { + "epoch": 1.19, + "learning_rate": 1.5949240290871496e-05, + "loss": 2.0508, + "step": 5855 + }, + { + "epoch": 1.19, + "learning_rate": 1.5947851800517682e-05, + "loss": 2.1153, + "step": 5856 + }, + { + "epoch": 1.19, + "learning_rate": 1.594646313269839e-05, + "loss": 2.2551, + "step": 5857 + }, + { + "epoch": 1.19, + "learning_rate": 1.5945074287455064e-05, + "loss": 2.1316, + "step": 5858 + }, + { + "epoch": 1.19, + "learning_rate": 1.5943685264829137e-05, + "loss": 2.0989, + "step": 5859 + }, + { + "epoch": 1.19, + "learning_rate": 1.594229606486205e-05, + "loss": 2.097, + "step": 5860 + }, + { + "epoch": 1.19, + "learning_rate": 1.594090668759526e-05, + "loss": 2.0864, + "step": 5861 + }, + { + "epoch": 1.19, + "learning_rate": 1.5939517133070213e-05, + "loss": 2.1349, + "step": 5862 + }, + { + "epoch": 1.19, + "learning_rate": 1.5938127401328377e-05, + "loss": 2.1437, + "step": 5863 + }, + { + "epoch": 1.19, + "learning_rate": 1.5936737492411214e-05, + "loss": 2.1233, + "step": 5864 + }, + { + "epoch": 1.19, + "learning_rate": 1.5935347406360192e-05, + "loss": 2.1839, + "step": 5865 + }, + { + "epoch": 1.19, + "learning_rate": 1.5933957143216794e-05, + "loss": 2.2062, + "step": 5866 + }, + { + "epoch": 1.19, + "learning_rate": 1.5932566703022494e-05, + "loss": 2.092, + "step": 5867 + }, + { + "epoch": 1.19, + "learning_rate": 1.593117608581878e-05, + "loss": 2.1012, + "step": 5868 + }, + { + "epoch": 1.19, + "learning_rate": 1.592978529164715e-05, + "loss": 2.1341, + "step": 5869 + }, + { + "epoch": 1.19, + "learning_rate": 1.592839432054909e-05, + "loss": 2.1439, + "step": 5870 + }, + { + "epoch": 1.19, + "learning_rate": 1.592700317256611e-05, + "loss": 2.1471, + "step": 5871 + }, + { + "epoch": 1.19, + "learning_rate": 1.5925611847739717e-05, + "loss": 2.094, + "step": 5872 + }, + { + "epoch": 1.19, + "learning_rate": 1.5924220346111422e-05, + "loss": 2.089, + "step": 5873 + }, + { + "epoch": 1.19, + "learning_rate": 1.5922828667722746e-05, + "loss": 2.1135, + "step": 5874 + }, + { + "epoch": 1.19, + "learning_rate": 1.59214368126152e-05, + "loss": 2.086, + "step": 5875 + }, + { + "epoch": 1.19, + "learning_rate": 1.5920044780830337e-05, + "loss": 2.2003, + "step": 5876 + }, + { + "epoch": 1.19, + "learning_rate": 1.5918652572409666e-05, + "loss": 2.069, + "step": 5877 + }, + { + "epoch": 1.19, + "learning_rate": 1.591726018739474e-05, + "loss": 2.1604, + "step": 5878 + }, + { + "epoch": 1.19, + "learning_rate": 1.5915867625827102e-05, + "loss": 2.1198, + "step": 5879 + }, + { + "epoch": 1.19, + "learning_rate": 1.5914474887748297e-05, + "loss": 2.1421, + "step": 5880 + }, + { + "epoch": 1.19, + "learning_rate": 1.5913081973199884e-05, + "loss": 2.1827, + "step": 5881 + }, + { + "epoch": 1.19, + "learning_rate": 1.591168888222342e-05, + "loss": 2.0772, + "step": 5882 + }, + { + "epoch": 1.19, + "learning_rate": 1.5910295614860477e-05, + "loss": 2.161, + "step": 5883 + }, + { + "epoch": 1.19, + "learning_rate": 1.5908902171152617e-05, + "loss": 2.1451, + "step": 5884 + }, + { + "epoch": 1.19, + "learning_rate": 1.5907508551141423e-05, + "loss": 2.0725, + "step": 5885 + }, + { + "epoch": 1.19, + "learning_rate": 1.5906114754868473e-05, + "loss": 2.1053, + "step": 5886 + }, + { + "epoch": 1.19, + "learning_rate": 1.5904720782375353e-05, + "loss": 2.1607, + "step": 5887 + }, + { + "epoch": 1.2, + "learning_rate": 1.590332663370366e-05, + "loss": 2.0509, + "step": 5888 + }, + { + "epoch": 1.2, + "learning_rate": 1.590193230889498e-05, + "loss": 2.1485, + "step": 5889 + }, + { + "epoch": 1.2, + "learning_rate": 1.590053780799093e-05, + "loss": 2.1454, + "step": 5890 + }, + { + "epoch": 1.2, + "learning_rate": 1.589914313103311e-05, + "loss": 2.1976, + "step": 5891 + }, + { + "epoch": 1.2, + "learning_rate": 1.589774827806313e-05, + "loss": 2.1815, + "step": 5892 + }, + { + "epoch": 1.2, + "learning_rate": 1.589635324912261e-05, + "loss": 2.0404, + "step": 5893 + }, + { + "epoch": 1.2, + "learning_rate": 1.5894958044253173e-05, + "loss": 2.242, + "step": 5894 + }, + { + "epoch": 1.2, + "learning_rate": 1.5893562663496455e-05, + "loss": 2.121, + "step": 5895 + }, + { + "epoch": 1.2, + "learning_rate": 1.5892167106894077e-05, + "loss": 2.1862, + "step": 5896 + }, + { + "epoch": 1.2, + "learning_rate": 1.589077137448769e-05, + "loss": 2.1562, + "step": 5897 + }, + { + "epoch": 1.2, + "learning_rate": 1.588937546631893e-05, + "loss": 2.1952, + "step": 5898 + }, + { + "epoch": 1.2, + "learning_rate": 1.5887979382429453e-05, + "loss": 2.0874, + "step": 5899 + }, + { + "epoch": 1.2, + "learning_rate": 1.588658312286091e-05, + "loss": 2.1728, + "step": 5900 + }, + { + "epoch": 1.2, + "learning_rate": 1.588518668765496e-05, + "loss": 2.099, + "step": 5901 + }, + { + "epoch": 1.2, + "learning_rate": 1.5883790076853272e-05, + "loss": 2.1534, + "step": 5902 + }, + { + "epoch": 1.2, + "learning_rate": 1.5882393290497515e-05, + "loss": 2.0569, + "step": 5903 + }, + { + "epoch": 1.2, + "learning_rate": 1.588099632862936e-05, + "loss": 2.0659, + "step": 5904 + }, + { + "epoch": 1.2, + "learning_rate": 1.5879599191290496e-05, + "loss": 2.1676, + "step": 5905 + }, + { + "epoch": 1.2, + "learning_rate": 1.5878201878522606e-05, + "loss": 2.0766, + "step": 5906 + }, + { + "epoch": 1.2, + "learning_rate": 1.587680439036738e-05, + "loss": 2.1592, + "step": 5907 + }, + { + "epoch": 1.2, + "learning_rate": 1.5875406726866514e-05, + "loss": 2.129, + "step": 5908 + }, + { + "epoch": 1.2, + "learning_rate": 1.5874008888061715e-05, + "loss": 2.1807, + "step": 5909 + }, + { + "epoch": 1.2, + "learning_rate": 1.5872610873994685e-05, + "loss": 2.1755, + "step": 5910 + }, + { + "epoch": 1.2, + "learning_rate": 1.587121268470714e-05, + "loss": 2.1727, + "step": 5911 + }, + { + "epoch": 1.2, + "learning_rate": 1.5869814320240792e-05, + "loss": 2.1415, + "step": 5912 + }, + { + "epoch": 1.2, + "learning_rate": 1.586841578063737e-05, + "loss": 2.1367, + "step": 5913 + }, + { + "epoch": 1.2, + "learning_rate": 1.58670170659386e-05, + "loss": 2.1136, + "step": 5914 + }, + { + "epoch": 1.2, + "learning_rate": 1.586561817618622e-05, + "loss": 2.1328, + "step": 5915 + }, + { + "epoch": 1.2, + "learning_rate": 1.5864219111421958e-05, + "loss": 2.127, + "step": 5916 + }, + { + "epoch": 1.2, + "learning_rate": 1.586281987168756e-05, + "loss": 2.0731, + "step": 5917 + }, + { + "epoch": 1.2, + "learning_rate": 1.5861420457024787e-05, + "loss": 2.1242, + "step": 5918 + }, + { + "epoch": 1.2, + "learning_rate": 1.5860020867475385e-05, + "loss": 2.1729, + "step": 5919 + }, + { + "epoch": 1.2, + "learning_rate": 1.5858621103081106e-05, + "loss": 2.1802, + "step": 5920 + }, + { + "epoch": 1.2, + "learning_rate": 1.5857221163883723e-05, + "loss": 2.1759, + "step": 5921 + }, + { + "epoch": 1.2, + "learning_rate": 1.585582104992501e-05, + "loss": 2.0967, + "step": 5922 + }, + { + "epoch": 1.2, + "learning_rate": 1.5854420761246734e-05, + "loss": 2.103, + "step": 5923 + }, + { + "epoch": 1.2, + "learning_rate": 1.5853020297890673e-05, + "loss": 2.0065, + "step": 5924 + }, + { + "epoch": 1.2, + "learning_rate": 1.5851619659898623e-05, + "loss": 2.1254, + "step": 5925 + }, + { + "epoch": 1.2, + "learning_rate": 1.5850218847312372e-05, + "loss": 2.1384, + "step": 5926 + }, + { + "epoch": 1.2, + "learning_rate": 1.5848817860173708e-05, + "loss": 2.135, + "step": 5927 + }, + { + "epoch": 1.2, + "learning_rate": 1.5847416698524437e-05, + "loss": 2.0993, + "step": 5928 + }, + { + "epoch": 1.2, + "learning_rate": 1.5846015362406365e-05, + "loss": 2.09, + "step": 5929 + }, + { + "epoch": 1.2, + "learning_rate": 1.5844613851861306e-05, + "loss": 2.1173, + "step": 5930 + }, + { + "epoch": 1.2, + "learning_rate": 1.5843212166931075e-05, + "loss": 2.1573, + "step": 5931 + }, + { + "epoch": 1.2, + "learning_rate": 1.5841810307657493e-05, + "loss": 2.1627, + "step": 5932 + }, + { + "epoch": 1.2, + "learning_rate": 1.584040827408239e-05, + "loss": 2.143, + "step": 5933 + }, + { + "epoch": 1.2, + "learning_rate": 1.583900606624759e-05, + "loss": 2.1315, + "step": 5934 + }, + { + "epoch": 1.2, + "learning_rate": 1.5837603684194943e-05, + "loss": 2.1559, + "step": 5935 + }, + { + "epoch": 1.2, + "learning_rate": 1.583620112796628e-05, + "loss": 2.1317, + "step": 5936 + }, + { + "epoch": 1.21, + "learning_rate": 1.583479839760346e-05, + "loss": 2.0798, + "step": 5937 + }, + { + "epoch": 1.21, + "learning_rate": 1.5833395493148328e-05, + "loss": 2.1832, + "step": 5938 + }, + { + "epoch": 1.21, + "learning_rate": 1.5831992414642745e-05, + "loss": 2.1709, + "step": 5939 + }, + { + "epoch": 1.21, + "learning_rate": 1.5830589162128574e-05, + "loss": 2.1325, + "step": 5940 + }, + { + "epoch": 1.21, + "learning_rate": 1.5829185735647684e-05, + "loss": 2.1527, + "step": 5941 + }, + { + "epoch": 1.21, + "learning_rate": 1.5827782135241948e-05, + "loss": 2.1718, + "step": 5942 + }, + { + "epoch": 1.21, + "learning_rate": 1.5826378360953246e-05, + "loss": 2.1257, + "step": 5943 + }, + { + "epoch": 1.21, + "learning_rate": 1.5824974412823463e-05, + "loss": 2.0987, + "step": 5944 + }, + { + "epoch": 1.21, + "learning_rate": 1.5823570290894492e-05, + "loss": 2.0877, + "step": 5945 + }, + { + "epoch": 1.21, + "learning_rate": 1.582216599520822e-05, + "loss": 2.1517, + "step": 5946 + }, + { + "epoch": 1.21, + "learning_rate": 1.5820761525806547e-05, + "loss": 2.0619, + "step": 5947 + }, + { + "epoch": 1.21, + "learning_rate": 1.5819356882731387e-05, + "loss": 2.1075, + "step": 5948 + }, + { + "epoch": 1.21, + "learning_rate": 1.581795206602464e-05, + "loss": 2.1122, + "step": 5949 + }, + { + "epoch": 1.21, + "learning_rate": 1.5816547075728227e-05, + "loss": 2.1714, + "step": 5950 + }, + { + "epoch": 1.21, + "learning_rate": 1.581514191188407e-05, + "loss": 2.1122, + "step": 5951 + }, + { + "epoch": 1.21, + "learning_rate": 1.581373657453409e-05, + "loss": 2.1548, + "step": 5952 + }, + { + "epoch": 1.21, + "learning_rate": 1.5812331063720218e-05, + "loss": 2.0907, + "step": 5953 + }, + { + "epoch": 1.21, + "learning_rate": 1.5810925379484394e-05, + "loss": 2.2196, + "step": 5954 + }, + { + "epoch": 1.21, + "learning_rate": 1.580951952186856e-05, + "loss": 2.1153, + "step": 5955 + }, + { + "epoch": 1.21, + "learning_rate": 1.5808113490914652e-05, + "loss": 2.1777, + "step": 5956 + }, + { + "epoch": 1.21, + "learning_rate": 1.5806707286664638e-05, + "loss": 2.0901, + "step": 5957 + }, + { + "epoch": 1.21, + "learning_rate": 1.5805300909160465e-05, + "loss": 2.1533, + "step": 5958 + }, + { + "epoch": 1.21, + "learning_rate": 1.5803894358444096e-05, + "loss": 2.1629, + "step": 5959 + }, + { + "epoch": 1.21, + "learning_rate": 1.5802487634557495e-05, + "loss": 2.1128, + "step": 5960 + }, + { + "epoch": 1.21, + "learning_rate": 1.5801080737542638e-05, + "loss": 2.1377, + "step": 5961 + }, + { + "epoch": 1.21, + "learning_rate": 1.57996736674415e-05, + "loss": 2.1761, + "step": 5962 + }, + { + "epoch": 1.21, + "learning_rate": 1.579826642429607e-05, + "loss": 2.142, + "step": 5963 + }, + { + "epoch": 1.21, + "learning_rate": 1.579685900814833e-05, + "loss": 2.129, + "step": 5964 + }, + { + "epoch": 1.21, + "learning_rate": 1.5795451419040278e-05, + "loss": 2.1155, + "step": 5965 + }, + { + "epoch": 1.21, + "learning_rate": 1.5794043657013905e-05, + "loss": 2.0232, + "step": 5966 + }, + { + "epoch": 1.21, + "learning_rate": 1.5792635722111218e-05, + "loss": 2.0802, + "step": 5967 + }, + { + "epoch": 1.21, + "learning_rate": 1.5791227614374226e-05, + "loss": 2.1557, + "step": 5968 + }, + { + "epoch": 1.21, + "learning_rate": 1.578981933384494e-05, + "loss": 2.0569, + "step": 5969 + }, + { + "epoch": 1.21, + "learning_rate": 1.578841088056538e-05, + "loss": 2.039, + "step": 5970 + }, + { + "epoch": 1.21, + "learning_rate": 1.578700225457757e-05, + "loss": 2.2138, + "step": 5971 + }, + { + "epoch": 1.21, + "learning_rate": 1.5785593455923542e-05, + "loss": 2.0317, + "step": 5972 + }, + { + "epoch": 1.21, + "learning_rate": 1.5784184484645324e-05, + "loss": 2.1103, + "step": 5973 + }, + { + "epoch": 1.21, + "learning_rate": 1.5782775340784962e-05, + "loss": 2.163, + "step": 5974 + }, + { + "epoch": 1.21, + "learning_rate": 1.5781366024384498e-05, + "loss": 2.1529, + "step": 5975 + }, + { + "epoch": 1.21, + "learning_rate": 1.5779956535485977e-05, + "loss": 2.1675, + "step": 5976 + }, + { + "epoch": 1.21, + "learning_rate": 1.577854687413146e-05, + "loss": 2.1675, + "step": 5977 + }, + { + "epoch": 1.21, + "learning_rate": 1.5777137040363007e-05, + "loss": 2.103, + "step": 5978 + }, + { + "epoch": 1.21, + "learning_rate": 1.5775727034222675e-05, + "loss": 2.144, + "step": 5979 + }, + { + "epoch": 1.21, + "learning_rate": 1.5774316855752548e-05, + "loss": 2.1637, + "step": 5980 + }, + { + "epoch": 1.21, + "learning_rate": 1.5772906504994686e-05, + "loss": 2.2256, + "step": 5981 + }, + { + "epoch": 1.21, + "learning_rate": 1.577149598199118e-05, + "loss": 2.1042, + "step": 5982 + }, + { + "epoch": 1.21, + "learning_rate": 1.577008528678411e-05, + "loss": 2.0528, + "step": 5983 + }, + { + "epoch": 1.21, + "learning_rate": 1.5768674419415576e-05, + "loss": 2.1702, + "step": 5984 + }, + { + "epoch": 1.21, + "learning_rate": 1.5767263379927663e-05, + "loss": 2.1817, + "step": 5985 + }, + { + "epoch": 1.22, + "learning_rate": 1.5765852168362475e-05, + "loss": 2.1063, + "step": 5986 + }, + { + "epoch": 1.22, + "learning_rate": 1.576444078476212e-05, + "loss": 2.0612, + "step": 5987 + }, + { + "epoch": 1.22, + "learning_rate": 1.576302922916871e-05, + "loss": 2.119, + "step": 5988 + }, + { + "epoch": 1.22, + "learning_rate": 1.5761617501624358e-05, + "loss": 2.093, + "step": 5989 + }, + { + "epoch": 1.22, + "learning_rate": 1.576020560217119e-05, + "loss": 2.1481, + "step": 5990 + }, + { + "epoch": 1.22, + "learning_rate": 1.575879353085133e-05, + "loss": 2.1806, + "step": 5991 + }, + { + "epoch": 1.22, + "learning_rate": 1.5757381287706908e-05, + "loss": 2.0793, + "step": 5992 + }, + { + "epoch": 1.22, + "learning_rate": 1.5755968872780064e-05, + "loss": 2.2074, + "step": 5993 + }, + { + "epoch": 1.22, + "learning_rate": 1.575455628611294e-05, + "loss": 2.1344, + "step": 5994 + }, + { + "epoch": 1.22, + "learning_rate": 1.5753143527747684e-05, + "loss": 2.1798, + "step": 5995 + }, + { + "epoch": 1.22, + "learning_rate": 1.5751730597726447e-05, + "loss": 2.1255, + "step": 5996 + }, + { + "epoch": 1.22, + "learning_rate": 1.5750317496091385e-05, + "loss": 2.1509, + "step": 5997 + }, + { + "epoch": 1.22, + "learning_rate": 1.574890422288466e-05, + "loss": 2.1337, + "step": 5998 + }, + { + "epoch": 1.22, + "learning_rate": 1.5747490778148444e-05, + "loss": 2.1048, + "step": 5999 + }, + { + "epoch": 1.22, + "learning_rate": 1.574607716192491e-05, + "loss": 2.13, + "step": 6000 + }, + { + "epoch": 1.22, + "learning_rate": 1.574466337425623e-05, + "loss": 2.1405, + "step": 6001 + }, + { + "epoch": 1.22, + "learning_rate": 1.5743249415184586e-05, + "loss": 2.0732, + "step": 6002 + }, + { + "epoch": 1.22, + "learning_rate": 1.5741835284752177e-05, + "loss": 2.0635, + "step": 6003 + }, + { + "epoch": 1.22, + "learning_rate": 1.5740420983001187e-05, + "loss": 2.1086, + "step": 6004 + }, + { + "epoch": 1.22, + "learning_rate": 1.573900650997382e-05, + "loss": 2.1228, + "step": 6005 + }, + { + "epoch": 1.22, + "learning_rate": 1.5737591865712275e-05, + "loss": 2.1663, + "step": 6006 + }, + { + "epoch": 1.22, + "learning_rate": 1.573617705025876e-05, + "loss": 2.1759, + "step": 6007 + }, + { + "epoch": 1.22, + "learning_rate": 1.573476206365549e-05, + "loss": 2.1106, + "step": 6008 + }, + { + "epoch": 1.22, + "learning_rate": 1.573334690594469e-05, + "loss": 2.1639, + "step": 6009 + }, + { + "epoch": 1.22, + "learning_rate": 1.573193157716858e-05, + "loss": 2.1983, + "step": 6010 + }, + { + "epoch": 1.22, + "learning_rate": 1.5730516077369384e-05, + "loss": 2.1605, + "step": 6011 + }, + { + "epoch": 1.22, + "learning_rate": 1.5729100406589338e-05, + "loss": 2.116, + "step": 6012 + }, + { + "epoch": 1.22, + "learning_rate": 1.5727684564870685e-05, + "loss": 2.1266, + "step": 6013 + }, + { + "epoch": 1.22, + "learning_rate": 1.572626855225567e-05, + "loss": 2.0872, + "step": 6014 + }, + { + "epoch": 1.22, + "learning_rate": 1.572485236878654e-05, + "loss": 2.112, + "step": 6015 + }, + { + "epoch": 1.22, + "learning_rate": 1.5723436014505545e-05, + "loss": 2.1496, + "step": 6016 + }, + { + "epoch": 1.22, + "learning_rate": 1.572201948945495e-05, + "loss": 2.1544, + "step": 6017 + }, + { + "epoch": 1.22, + "learning_rate": 1.572060279367702e-05, + "loss": 2.1831, + "step": 6018 + }, + { + "epoch": 1.22, + "learning_rate": 1.5719185927214024e-05, + "loss": 2.106, + "step": 6019 + }, + { + "epoch": 1.22, + "learning_rate": 1.5717768890108235e-05, + "loss": 2.091, + "step": 6020 + }, + { + "epoch": 1.22, + "learning_rate": 1.5716351682401936e-05, + "loss": 2.1178, + "step": 6021 + }, + { + "epoch": 1.22, + "learning_rate": 1.571493430413741e-05, + "loss": 2.1314, + "step": 6022 + }, + { + "epoch": 1.22, + "learning_rate": 1.5713516755356947e-05, + "loss": 2.1306, + "step": 6023 + }, + { + "epoch": 1.22, + "learning_rate": 1.5712099036102847e-05, + "loss": 2.107, + "step": 6024 + }, + { + "epoch": 1.22, + "learning_rate": 1.57106811464174e-05, + "loss": 2.1152, + "step": 6025 + }, + { + "epoch": 1.22, + "learning_rate": 1.5709263086342924e-05, + "loss": 2.0922, + "step": 6026 + }, + { + "epoch": 1.22, + "learning_rate": 1.570784485592172e-05, + "loss": 2.1232, + "step": 6027 + }, + { + "epoch": 1.22, + "learning_rate": 1.5706426455196106e-05, + "loss": 2.1942, + "step": 6028 + }, + { + "epoch": 1.22, + "learning_rate": 1.5705007884208402e-05, + "loss": 2.1297, + "step": 6029 + }, + { + "epoch": 1.22, + "learning_rate": 1.570358914300094e-05, + "loss": 2.1548, + "step": 6030 + }, + { + "epoch": 1.22, + "learning_rate": 1.5702170231616045e-05, + "loss": 2.1229, + "step": 6031 + }, + { + "epoch": 1.22, + "learning_rate": 1.5700751150096054e-05, + "loss": 2.1905, + "step": 6032 + }, + { + "epoch": 1.22, + "learning_rate": 1.5699331898483306e-05, + "loss": 2.144, + "step": 6033 + }, + { + "epoch": 1.22, + "learning_rate": 1.5697912476820152e-05, + "loss": 2.1349, + "step": 6034 + }, + { + "epoch": 1.23, + "learning_rate": 1.5696492885148938e-05, + "loss": 2.0847, + "step": 6035 + }, + { + "epoch": 1.23, + "learning_rate": 1.5695073123512025e-05, + "loss": 2.1197, + "step": 6036 + }, + { + "epoch": 1.23, + "learning_rate": 1.569365319195177e-05, + "loss": 2.1873, + "step": 6037 + }, + { + "epoch": 1.23, + "learning_rate": 1.569223309051054e-05, + "loss": 2.1615, + "step": 6038 + }, + { + "epoch": 1.23, + "learning_rate": 1.569081281923071e-05, + "loss": 2.1339, + "step": 6039 + }, + { + "epoch": 1.23, + "learning_rate": 1.568939237815465e-05, + "loss": 2.1254, + "step": 6040 + }, + { + "epoch": 1.23, + "learning_rate": 1.5687971767324755e-05, + "loss": 2.1564, + "step": 6041 + }, + { + "epoch": 1.23, + "learning_rate": 1.5686550986783395e-05, + "loss": 2.1526, + "step": 6042 + }, + { + "epoch": 1.23, + "learning_rate": 1.568513003657297e-05, + "loss": 2.1362, + "step": 6043 + }, + { + "epoch": 1.23, + "learning_rate": 1.5683708916735877e-05, + "loss": 2.1438, + "step": 6044 + }, + { + "epoch": 1.23, + "learning_rate": 1.5682287627314513e-05, + "loss": 2.1395, + "step": 6045 + }, + { + "epoch": 1.23, + "learning_rate": 1.5680866168351293e-05, + "loss": 2.122, + "step": 6046 + }, + { + "epoch": 1.23, + "learning_rate": 1.5679444539888624e-05, + "loss": 2.0339, + "step": 6047 + }, + { + "epoch": 1.23, + "learning_rate": 1.567802274196892e-05, + "loss": 2.0729, + "step": 6048 + }, + { + "epoch": 1.23, + "learning_rate": 1.567660077463461e-05, + "loss": 2.096, + "step": 6049 + }, + { + "epoch": 1.23, + "learning_rate": 1.5675178637928114e-05, + "loss": 2.1281, + "step": 6050 + }, + { + "epoch": 1.23, + "learning_rate": 1.5673756331891868e-05, + "loss": 2.1788, + "step": 6051 + }, + { + "epoch": 1.23, + "learning_rate": 1.5672333856568315e-05, + "loss": 2.1491, + "step": 6052 + }, + { + "epoch": 1.23, + "learning_rate": 1.5670911211999885e-05, + "loss": 2.1593, + "step": 6053 + }, + { + "epoch": 1.23, + "learning_rate": 1.5669488398229034e-05, + "loss": 2.1201, + "step": 6054 + }, + { + "epoch": 1.23, + "learning_rate": 1.566806541529821e-05, + "loss": 2.0677, + "step": 6055 + }, + { + "epoch": 1.23, + "learning_rate": 1.5666642263249873e-05, + "loss": 2.0539, + "step": 6056 + }, + { + "epoch": 1.23, + "learning_rate": 1.5665218942126483e-05, + "loss": 2.0841, + "step": 6057 + }, + { + "epoch": 1.23, + "learning_rate": 1.5663795451970513e-05, + "loss": 2.1948, + "step": 6058 + }, + { + "epoch": 1.23, + "learning_rate": 1.5662371792824426e-05, + "loss": 2.1219, + "step": 6059 + }, + { + "epoch": 1.23, + "learning_rate": 1.566094796473071e-05, + "loss": 2.0626, + "step": 6060 + }, + { + "epoch": 1.23, + "learning_rate": 1.565952396773184e-05, + "loss": 2.1564, + "step": 6061 + }, + { + "epoch": 1.23, + "learning_rate": 1.565809980187031e-05, + "loss": 2.1955, + "step": 6062 + }, + { + "epoch": 1.23, + "learning_rate": 1.565667546718861e-05, + "loss": 2.0932, + "step": 6063 + }, + { + "epoch": 1.23, + "learning_rate": 1.565525096372923e-05, + "loss": 2.1538, + "step": 6064 + }, + { + "epoch": 1.23, + "learning_rate": 1.5653826291534683e-05, + "loss": 2.195, + "step": 6065 + }, + { + "epoch": 1.23, + "learning_rate": 1.5652401450647476e-05, + "loss": 2.0503, + "step": 6066 + }, + { + "epoch": 1.23, + "learning_rate": 1.5650976441110118e-05, + "loss": 2.0803, + "step": 6067 + }, + { + "epoch": 1.23, + "learning_rate": 1.564955126296513e-05, + "loss": 2.1426, + "step": 6068 + }, + { + "epoch": 1.23, + "learning_rate": 1.564812591625503e-05, + "loss": 2.1682, + "step": 6069 + }, + { + "epoch": 1.23, + "learning_rate": 1.564670040102235e-05, + "loss": 2.0639, + "step": 6070 + }, + { + "epoch": 1.23, + "learning_rate": 1.564527471730962e-05, + "loss": 2.1381, + "step": 6071 + }, + { + "epoch": 1.23, + "learning_rate": 1.5643848865159387e-05, + "loss": 2.1412, + "step": 6072 + }, + { + "epoch": 1.23, + "learning_rate": 1.5642422844614178e-05, + "loss": 2.0766, + "step": 6073 + }, + { + "epoch": 1.23, + "learning_rate": 1.5640996655716557e-05, + "loss": 2.073, + "step": 6074 + }, + { + "epoch": 1.23, + "learning_rate": 1.5639570298509067e-05, + "loss": 2.0712, + "step": 6075 + }, + { + "epoch": 1.23, + "learning_rate": 1.563814377303427e-05, + "loss": 2.1427, + "step": 6076 + }, + { + "epoch": 1.23, + "learning_rate": 1.5636717079334727e-05, + "loss": 2.1442, + "step": 6077 + }, + { + "epoch": 1.23, + "learning_rate": 1.563529021745301e-05, + "loss": 2.1006, + "step": 6078 + }, + { + "epoch": 1.23, + "learning_rate": 1.563386318743169e-05, + "loss": 2.1787, + "step": 6079 + }, + { + "epoch": 1.23, + "learning_rate": 1.563243598931334e-05, + "loss": 2.0202, + "step": 6080 + }, + { + "epoch": 1.23, + "learning_rate": 1.5631008623140552e-05, + "loss": 2.1626, + "step": 6081 + }, + { + "epoch": 1.23, + "learning_rate": 1.5629581088955907e-05, + "loss": 2.099, + "step": 6082 + }, + { + "epoch": 1.23, + "learning_rate": 1.5628153386802002e-05, + "loss": 2.0934, + "step": 6083 + }, + { + "epoch": 1.23, + "learning_rate": 1.5626725516721437e-05, + "loss": 2.1108, + "step": 6084 + }, + { + "epoch": 1.24, + "learning_rate": 1.562529747875681e-05, + "loss": 2.0849, + "step": 6085 + }, + { + "epoch": 1.24, + "learning_rate": 1.5623869272950733e-05, + "loss": 2.1076, + "step": 6086 + }, + { + "epoch": 1.24, + "learning_rate": 1.5622440899345818e-05, + "loss": 2.1732, + "step": 6087 + }, + { + "epoch": 1.24, + "learning_rate": 1.5621012357984687e-05, + "loss": 2.103, + "step": 6088 + }, + { + "epoch": 1.24, + "learning_rate": 1.5619583648909954e-05, + "loss": 2.1828, + "step": 6089 + }, + { + "epoch": 1.24, + "learning_rate": 1.5618154772164257e-05, + "loss": 2.1096, + "step": 6090 + }, + { + "epoch": 1.24, + "learning_rate": 1.561672572779022e-05, + "loss": 2.1576, + "step": 6091 + }, + { + "epoch": 1.24, + "learning_rate": 1.561529651583049e-05, + "loss": 2.1772, + "step": 6092 + }, + { + "epoch": 1.24, + "learning_rate": 1.561386713632771e-05, + "loss": 2.0874, + "step": 6093 + }, + { + "epoch": 1.24, + "learning_rate": 1.5612437589324523e-05, + "loss": 2.1287, + "step": 6094 + }, + { + "epoch": 1.24, + "learning_rate": 1.5611007874863583e-05, + "loss": 2.1883, + "step": 6095 + }, + { + "epoch": 1.24, + "learning_rate": 1.5609577992987547e-05, + "loss": 2.2019, + "step": 6096 + }, + { + "epoch": 1.24, + "learning_rate": 1.5608147943739084e-05, + "loss": 2.133, + "step": 6097 + }, + { + "epoch": 1.24, + "learning_rate": 1.5606717727160855e-05, + "loss": 2.1488, + "step": 6098 + }, + { + "epoch": 1.24, + "learning_rate": 1.5605287343295544e-05, + "loss": 2.2165, + "step": 6099 + }, + { + "epoch": 1.24, + "learning_rate": 1.5603856792185817e-05, + "loss": 2.0854, + "step": 6100 + }, + { + "epoch": 1.24, + "learning_rate": 1.5602426073874364e-05, + "loss": 2.0673, + "step": 6101 + }, + { + "epoch": 1.24, + "learning_rate": 1.560099518840387e-05, + "loss": 2.1008, + "step": 6102 + }, + { + "epoch": 1.24, + "learning_rate": 1.5599564135817034e-05, + "loss": 2.118, + "step": 6103 + }, + { + "epoch": 1.24, + "learning_rate": 1.559813291615655e-05, + "loss": 2.0604, + "step": 6104 + }, + { + "epoch": 1.24, + "learning_rate": 1.559670152946512e-05, + "loss": 2.1186, + "step": 6105 + }, + { + "epoch": 1.24, + "learning_rate": 1.559526997578545e-05, + "loss": 2.1826, + "step": 6106 + }, + { + "epoch": 1.24, + "learning_rate": 1.559383825516026e-05, + "loss": 2.1524, + "step": 6107 + }, + { + "epoch": 1.24, + "learning_rate": 1.5592406367632262e-05, + "loss": 2.2339, + "step": 6108 + }, + { + "epoch": 1.24, + "learning_rate": 1.5590974313244184e-05, + "loss": 2.0618, + "step": 6109 + }, + { + "epoch": 1.24, + "learning_rate": 1.558954209203875e-05, + "loss": 2.1669, + "step": 6110 + }, + { + "epoch": 1.24, + "learning_rate": 1.55881097040587e-05, + "loss": 2.1181, + "step": 6111 + }, + { + "epoch": 1.24, + "learning_rate": 1.5586677149346764e-05, + "loss": 2.1154, + "step": 6112 + }, + { + "epoch": 1.24, + "learning_rate": 1.558524442794569e-05, + "loss": 2.0968, + "step": 6113 + }, + { + "epoch": 1.24, + "learning_rate": 1.5583811539898216e-05, + "loss": 2.0263, + "step": 6114 + }, + { + "epoch": 1.24, + "learning_rate": 1.558237848524711e-05, + "loss": 2.1686, + "step": 6115 + }, + { + "epoch": 1.24, + "learning_rate": 1.558094526403512e-05, + "loss": 2.1686, + "step": 6116 + }, + { + "epoch": 1.24, + "learning_rate": 1.5579511876305016e-05, + "loss": 2.2159, + "step": 6117 + }, + { + "epoch": 1.24, + "learning_rate": 1.5578078322099558e-05, + "loss": 2.1405, + "step": 6118 + }, + { + "epoch": 1.24, + "learning_rate": 1.557664460146152e-05, + "loss": 2.1036, + "step": 6119 + }, + { + "epoch": 1.24, + "learning_rate": 1.5575210714433687e-05, + "loss": 2.0829, + "step": 6120 + }, + { + "epoch": 1.24, + "learning_rate": 1.557377666105883e-05, + "loss": 2.1122, + "step": 6121 + }, + { + "epoch": 1.24, + "learning_rate": 1.557234244137975e-05, + "loss": 2.1568, + "step": 6122 + }, + { + "epoch": 1.24, + "learning_rate": 1.5570908055439234e-05, + "loss": 2.1421, + "step": 6123 + }, + { + "epoch": 1.24, + "learning_rate": 1.5569473503280076e-05, + "loss": 2.1636, + "step": 6124 + }, + { + "epoch": 1.24, + "learning_rate": 1.5568038784945078e-05, + "loss": 2.1484, + "step": 6125 + }, + { + "epoch": 1.24, + "learning_rate": 1.5566603900477056e-05, + "loss": 2.0469, + "step": 6126 + }, + { + "epoch": 1.24, + "learning_rate": 1.5565168849918816e-05, + "loss": 2.1771, + "step": 6127 + }, + { + "epoch": 1.24, + "learning_rate": 1.5563733633313177e-05, + "loss": 2.1609, + "step": 6128 + }, + { + "epoch": 1.24, + "learning_rate": 1.556229825070296e-05, + "loss": 2.1215, + "step": 6129 + }, + { + "epoch": 1.24, + "learning_rate": 1.5560862702130995e-05, + "loss": 2.1493, + "step": 6130 + }, + { + "epoch": 1.24, + "learning_rate": 1.5559426987640115e-05, + "loss": 2.1424, + "step": 6131 + }, + { + "epoch": 1.24, + "learning_rate": 1.5557991107273153e-05, + "loss": 2.0991, + "step": 6132 + }, + { + "epoch": 1.24, + "learning_rate": 1.5556555061072956e-05, + "loss": 2.0815, + "step": 6133 + }, + { + "epoch": 1.25, + "learning_rate": 1.5555118849082366e-05, + "loss": 2.112, + "step": 6134 + }, + { + "epoch": 1.25, + "learning_rate": 1.5553682471344237e-05, + "loss": 2.1062, + "step": 6135 + }, + { + "epoch": 1.25, + "learning_rate": 1.555224592790143e-05, + "loss": 2.175, + "step": 6136 + }, + { + "epoch": 1.25, + "learning_rate": 1.55508092187968e-05, + "loss": 2.0324, + "step": 6137 + }, + { + "epoch": 1.25, + "learning_rate": 1.554937234407322e-05, + "loss": 2.103, + "step": 6138 + }, + { + "epoch": 1.25, + "learning_rate": 1.554793530377356e-05, + "loss": 2.1778, + "step": 6139 + }, + { + "epoch": 1.25, + "learning_rate": 1.5546498097940697e-05, + "loss": 2.1558, + "step": 6140 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545060726617512e-05, + "loss": 2.1796, + "step": 6141 + }, + { + "epoch": 1.25, + "learning_rate": 1.554362318984689e-05, + "loss": 2.1534, + "step": 6142 + }, + { + "epoch": 1.25, + "learning_rate": 1.554218548767173e-05, + "loss": 2.0871, + "step": 6143 + }, + { + "epoch": 1.25, + "learning_rate": 1.554074762013492e-05, + "loss": 2.0544, + "step": 6144 + }, + { + "epoch": 1.25, + "learning_rate": 1.5539309587279364e-05, + "loss": 2.1162, + "step": 6145 + }, + { + "epoch": 1.25, + "learning_rate": 1.553787138914797e-05, + "loss": 2.1211, + "step": 6146 + }, + { + "epoch": 1.25, + "learning_rate": 1.553643302578365e-05, + "loss": 2.1566, + "step": 6147 + }, + { + "epoch": 1.25, + "learning_rate": 1.5534994497229316e-05, + "loss": 2.0608, + "step": 6148 + }, + { + "epoch": 1.25, + "learning_rate": 1.5533555803527892e-05, + "loss": 2.0687, + "step": 6149 + }, + { + "epoch": 1.25, + "learning_rate": 1.5532116944722308e-05, + "loss": 2.0577, + "step": 6150 + }, + { + "epoch": 1.25, + "learning_rate": 1.553067792085549e-05, + "loss": 2.0768, + "step": 6151 + }, + { + "epoch": 1.25, + "learning_rate": 1.5529238731970377e-05, + "loss": 2.1844, + "step": 6152 + }, + { + "epoch": 1.25, + "learning_rate": 1.5527799378109905e-05, + "loss": 2.1464, + "step": 6153 + }, + { + "epoch": 1.25, + "learning_rate": 1.5526359859317024e-05, + "loss": 2.133, + "step": 6154 + }, + { + "epoch": 1.25, + "learning_rate": 1.5524920175634686e-05, + "loss": 2.1251, + "step": 6155 + }, + { + "epoch": 1.25, + "learning_rate": 1.5523480327105842e-05, + "loss": 2.0376, + "step": 6156 + }, + { + "epoch": 1.25, + "learning_rate": 1.5522040313773456e-05, + "loss": 2.1737, + "step": 6157 + }, + { + "epoch": 1.25, + "learning_rate": 1.5520600135680494e-05, + "loss": 2.1414, + "step": 6158 + }, + { + "epoch": 1.25, + "learning_rate": 1.5519159792869923e-05, + "loss": 2.0728, + "step": 6159 + }, + { + "epoch": 1.25, + "learning_rate": 1.5517719285384723e-05, + "loss": 2.1369, + "step": 6160 + }, + { + "epoch": 1.25, + "learning_rate": 1.5516278613267872e-05, + "loss": 2.112, + "step": 6161 + }, + { + "epoch": 1.25, + "learning_rate": 1.5514837776562358e-05, + "loss": 2.0927, + "step": 6162 + }, + { + "epoch": 1.25, + "learning_rate": 1.5513396775311163e-05, + "loss": 2.1867, + "step": 6163 + }, + { + "epoch": 1.25, + "learning_rate": 1.551195560955729e-05, + "loss": 2.2582, + "step": 6164 + }, + { + "epoch": 1.25, + "learning_rate": 1.5510514279343736e-05, + "loss": 2.0916, + "step": 6165 + }, + { + "epoch": 1.25, + "learning_rate": 1.5509072784713505e-05, + "loss": 2.1604, + "step": 6166 + }, + { + "epoch": 1.25, + "learning_rate": 1.550763112570961e-05, + "loss": 2.1566, + "step": 6167 + }, + { + "epoch": 1.25, + "learning_rate": 1.550618930237506e-05, + "loss": 2.1579, + "step": 6168 + }, + { + "epoch": 1.25, + "learning_rate": 1.550474731475288e-05, + "loss": 2.0669, + "step": 6169 + }, + { + "epoch": 1.25, + "learning_rate": 1.550330516288609e-05, + "loss": 2.083, + "step": 6170 + }, + { + "epoch": 1.25, + "learning_rate": 1.5501862846817724e-05, + "loss": 2.1515, + "step": 6171 + }, + { + "epoch": 1.25, + "learning_rate": 1.5500420366590818e-05, + "loss": 2.166, + "step": 6172 + }, + { + "epoch": 1.25, + "learning_rate": 1.54989777222484e-05, + "loss": 2.1512, + "step": 6173 + }, + { + "epoch": 1.25, + "learning_rate": 1.549753491383352e-05, + "loss": 2.1663, + "step": 6174 + }, + { + "epoch": 1.25, + "learning_rate": 1.5496091941389234e-05, + "loss": 2.1441, + "step": 6175 + }, + { + "epoch": 1.25, + "learning_rate": 1.549464880495859e-05, + "loss": 2.1448, + "step": 6176 + }, + { + "epoch": 1.25, + "learning_rate": 1.5493205504584645e-05, + "loss": 2.1194, + "step": 6177 + }, + { + "epoch": 1.25, + "learning_rate": 1.5491762040310466e-05, + "loss": 2.1333, + "step": 6178 + }, + { + "epoch": 1.25, + "learning_rate": 1.5490318412179116e-05, + "loss": 2.1591, + "step": 6179 + }, + { + "epoch": 1.25, + "learning_rate": 1.5488874620233674e-05, + "loss": 2.0948, + "step": 6180 + }, + { + "epoch": 1.25, + "learning_rate": 1.5487430664517214e-05, + "loss": 2.0793, + "step": 6181 + }, + { + "epoch": 1.25, + "learning_rate": 1.5485986545072826e-05, + "loss": 2.1516, + "step": 6182 + }, + { + "epoch": 1.26, + "learning_rate": 1.5484542261943593e-05, + "loss": 2.0738, + "step": 6183 + }, + { + "epoch": 1.26, + "learning_rate": 1.5483097815172605e-05, + "loss": 2.1715, + "step": 6184 + }, + { + "epoch": 1.26, + "learning_rate": 1.5481653204802967e-05, + "loss": 2.0527, + "step": 6185 + }, + { + "epoch": 1.26, + "learning_rate": 1.5480208430877776e-05, + "loss": 2.1208, + "step": 6186 + }, + { + "epoch": 1.26, + "learning_rate": 1.547876349344014e-05, + "loss": 2.1029, + "step": 6187 + }, + { + "epoch": 1.26, + "learning_rate": 1.5477318392533178e-05, + "loss": 2.1542, + "step": 6188 + }, + { + "epoch": 1.26, + "learning_rate": 1.54758731282e-05, + "loss": 2.0991, + "step": 6189 + }, + { + "epoch": 1.26, + "learning_rate": 1.547442770048373e-05, + "loss": 2.1176, + "step": 6190 + }, + { + "epoch": 1.26, + "learning_rate": 1.5472982109427495e-05, + "loss": 2.1864, + "step": 6191 + }, + { + "epoch": 1.26, + "learning_rate": 1.547153635507443e-05, + "loss": 2.1523, + "step": 6192 + }, + { + "epoch": 1.26, + "learning_rate": 1.5470090437467667e-05, + "loss": 2.0168, + "step": 6193 + }, + { + "epoch": 1.26, + "learning_rate": 1.5468644356650354e-05, + "loss": 2.1898, + "step": 6194 + }, + { + "epoch": 1.26, + "learning_rate": 1.5467198112665632e-05, + "loss": 2.1439, + "step": 6195 + }, + { + "epoch": 1.26, + "learning_rate": 1.5465751705556652e-05, + "loss": 2.1402, + "step": 6196 + }, + { + "epoch": 1.26, + "learning_rate": 1.546430513536658e-05, + "loss": 2.1138, + "step": 6197 + }, + { + "epoch": 1.26, + "learning_rate": 1.546285840213856e-05, + "loss": 2.0499, + "step": 6198 + }, + { + "epoch": 1.26, + "learning_rate": 1.5461411505915772e-05, + "loss": 2.1964, + "step": 6199 + }, + { + "epoch": 1.26, + "learning_rate": 1.545996444674138e-05, + "loss": 2.0689, + "step": 6200 + }, + { + "epoch": 1.26, + "learning_rate": 1.545851722465857e-05, + "loss": 2.2016, + "step": 6201 + }, + { + "epoch": 1.26, + "learning_rate": 1.545706983971051e-05, + "loss": 2.153, + "step": 6202 + }, + { + "epoch": 1.26, + "learning_rate": 1.545562229194039e-05, + "loss": 2.1599, + "step": 6203 + }, + { + "epoch": 1.26, + "learning_rate": 1.54541745813914e-05, + "loss": 2.1659, + "step": 6204 + }, + { + "epoch": 1.26, + "learning_rate": 1.545272670810674e-05, + "loss": 2.1122, + "step": 6205 + }, + { + "epoch": 1.26, + "learning_rate": 1.54512786721296e-05, + "loss": 2.065, + "step": 6206 + }, + { + "epoch": 1.26, + "learning_rate": 1.54498304735032e-05, + "loss": 2.0468, + "step": 6207 + }, + { + "epoch": 1.26, + "learning_rate": 1.544838211227073e-05, + "loss": 2.1806, + "step": 6208 + }, + { + "epoch": 1.26, + "learning_rate": 1.5446933588475425e-05, + "loss": 2.1139, + "step": 6209 + }, + { + "epoch": 1.26, + "learning_rate": 1.544548490216049e-05, + "loss": 2.0893, + "step": 6210 + }, + { + "epoch": 1.26, + "learning_rate": 1.544403605336916e-05, + "loss": 2.1384, + "step": 6211 + }, + { + "epoch": 1.26, + "learning_rate": 1.544258704214465e-05, + "loss": 2.1299, + "step": 6212 + }, + { + "epoch": 1.26, + "learning_rate": 1.5441137868530208e-05, + "loss": 2.1319, + "step": 6213 + }, + { + "epoch": 1.26, + "learning_rate": 1.5439688532569066e-05, + "loss": 2.1577, + "step": 6214 + }, + { + "epoch": 1.26, + "learning_rate": 1.543823903430447e-05, + "loss": 2.1772, + "step": 6215 + }, + { + "epoch": 1.26, + "learning_rate": 1.5436789373779662e-05, + "loss": 2.0563, + "step": 6216 + }, + { + "epoch": 1.26, + "learning_rate": 1.5435339551037905e-05, + "loss": 2.1422, + "step": 6217 + }, + { + "epoch": 1.26, + "learning_rate": 1.5433889566122454e-05, + "loss": 2.1961, + "step": 6218 + }, + { + "epoch": 1.26, + "learning_rate": 1.543243941907657e-05, + "loss": 2.2366, + "step": 6219 + }, + { + "epoch": 1.26, + "learning_rate": 1.5430989109943523e-05, + "loss": 2.1049, + "step": 6220 + }, + { + "epoch": 1.26, + "learning_rate": 1.5429538638766583e-05, + "loss": 2.2169, + "step": 6221 + }, + { + "epoch": 1.26, + "learning_rate": 1.5428088005589028e-05, + "loss": 2.1134, + "step": 6222 + }, + { + "epoch": 1.26, + "learning_rate": 1.5426637210454142e-05, + "loss": 2.0481, + "step": 6223 + }, + { + "epoch": 1.26, + "learning_rate": 1.5425186253405215e-05, + "loss": 2.1434, + "step": 6224 + }, + { + "epoch": 1.26, + "learning_rate": 1.5423735134485537e-05, + "loss": 2.1341, + "step": 6225 + }, + { + "epoch": 1.26, + "learning_rate": 1.54222838537384e-05, + "loss": 2.1548, + "step": 6226 + }, + { + "epoch": 1.26, + "learning_rate": 1.542083241120711e-05, + "loss": 2.0997, + "step": 6227 + }, + { + "epoch": 1.26, + "learning_rate": 1.5419380806934972e-05, + "loss": 2.0651, + "step": 6228 + }, + { + "epoch": 1.26, + "learning_rate": 1.54179290409653e-05, + "loss": 2.1587, + "step": 6229 + }, + { + "epoch": 1.26, + "learning_rate": 1.5416477113341412e-05, + "loss": 2.1746, + "step": 6230 + }, + { + "epoch": 1.26, + "learning_rate": 1.5415025024106624e-05, + "loss": 2.104, + "step": 6231 + }, + { + "epoch": 1.26, + "learning_rate": 1.5413572773304263e-05, + "loss": 2.1389, + "step": 6232 + }, + { + "epoch": 1.27, + "learning_rate": 1.5412120360977662e-05, + "loss": 2.1279, + "step": 6233 + }, + { + "epoch": 1.27, + "learning_rate": 1.5410667787170152e-05, + "loss": 2.1601, + "step": 6234 + }, + { + "epoch": 1.27, + "learning_rate": 1.5409215051925077e-05, + "loss": 2.1315, + "step": 6235 + }, + { + "epoch": 1.27, + "learning_rate": 1.540776215528578e-05, + "loss": 2.155, + "step": 6236 + }, + { + "epoch": 1.27, + "learning_rate": 1.540630909729561e-05, + "loss": 2.0883, + "step": 6237 + }, + { + "epoch": 1.27, + "learning_rate": 1.540485587799793e-05, + "loss": 2.0126, + "step": 6238 + }, + { + "epoch": 1.27, + "learning_rate": 1.540340249743609e-05, + "loss": 2.1826, + "step": 6239 + }, + { + "epoch": 1.27, + "learning_rate": 1.540194895565346e-05, + "loss": 2.0982, + "step": 6240 + }, + { + "epoch": 1.27, + "learning_rate": 1.5400495252693404e-05, + "loss": 2.1602, + "step": 6241 + }, + { + "epoch": 1.27, + "learning_rate": 1.5399041388599304e-05, + "loss": 2.1192, + "step": 6242 + }, + { + "epoch": 1.27, + "learning_rate": 1.5397587363414527e-05, + "loss": 2.1154, + "step": 6243 + }, + { + "epoch": 1.27, + "learning_rate": 1.5396133177182467e-05, + "loss": 2.219, + "step": 6244 + }, + { + "epoch": 1.27, + "learning_rate": 1.539467882994651e-05, + "loss": 2.0868, + "step": 6245 + }, + { + "epoch": 1.27, + "learning_rate": 1.5393224321750045e-05, + "loss": 2.1556, + "step": 6246 + }, + { + "epoch": 1.27, + "learning_rate": 1.5391769652636476e-05, + "loss": 2.0668, + "step": 6247 + }, + { + "epoch": 1.27, + "learning_rate": 1.53903148226492e-05, + "loss": 2.1094, + "step": 6248 + }, + { + "epoch": 1.27, + "learning_rate": 1.5388859831831632e-05, + "loss": 2.1335, + "step": 6249 + }, + { + "epoch": 1.27, + "learning_rate": 1.5387404680227175e-05, + "loss": 2.0885, + "step": 6250 + }, + { + "epoch": 1.27, + "learning_rate": 1.5385949367879252e-05, + "loss": 2.126, + "step": 6251 + }, + { + "epoch": 1.27, + "learning_rate": 1.5384493894831287e-05, + "loss": 2.1112, + "step": 6252 + }, + { + "epoch": 1.27, + "learning_rate": 1.53830382611267e-05, + "loss": 2.0797, + "step": 6253 + }, + { + "epoch": 1.27, + "learning_rate": 1.538158246680893e-05, + "loss": 2.1722, + "step": 6254 + }, + { + "epoch": 1.27, + "learning_rate": 1.5380126511921404e-05, + "loss": 2.1329, + "step": 6255 + }, + { + "epoch": 1.27, + "learning_rate": 1.5378670396507575e-05, + "loss": 2.0494, + "step": 6256 + }, + { + "epoch": 1.27, + "learning_rate": 1.5377214120610882e-05, + "loss": 2.0726, + "step": 6257 + }, + { + "epoch": 1.27, + "learning_rate": 1.5375757684274775e-05, + "loss": 2.1973, + "step": 6258 + }, + { + "epoch": 1.27, + "learning_rate": 1.5374301087542714e-05, + "loss": 2.0675, + "step": 6259 + }, + { + "epoch": 1.27, + "learning_rate": 1.5372844330458154e-05, + "loss": 2.147, + "step": 6260 + }, + { + "epoch": 1.27, + "learning_rate": 1.5371387413064566e-05, + "loss": 2.1414, + "step": 6261 + }, + { + "epoch": 1.27, + "learning_rate": 1.5369930335405415e-05, + "loss": 2.111, + "step": 6262 + }, + { + "epoch": 1.27, + "learning_rate": 1.5368473097524175e-05, + "loss": 2.1683, + "step": 6263 + }, + { + "epoch": 1.27, + "learning_rate": 1.5367015699464332e-05, + "loss": 2.093, + "step": 6264 + }, + { + "epoch": 1.27, + "learning_rate": 1.5365558141269367e-05, + "loss": 2.1484, + "step": 6265 + }, + { + "epoch": 1.27, + "learning_rate": 1.5364100422982762e-05, + "loss": 2.1546, + "step": 6266 + }, + { + "epoch": 1.27, + "learning_rate": 1.5362642544648022e-05, + "loss": 2.1483, + "step": 6267 + }, + { + "epoch": 1.27, + "learning_rate": 1.536118450630864e-05, + "loss": 2.0161, + "step": 6268 + }, + { + "epoch": 1.27, + "learning_rate": 1.5359726308008118e-05, + "loss": 2.1067, + "step": 6269 + }, + { + "epoch": 1.27, + "learning_rate": 1.5358267949789968e-05, + "loss": 2.0658, + "step": 6270 + }, + { + "epoch": 1.27, + "learning_rate": 1.53568094316977e-05, + "loss": 2.0425, + "step": 6271 + }, + { + "epoch": 1.27, + "learning_rate": 1.5355350753774835e-05, + "loss": 2.2208, + "step": 6272 + }, + { + "epoch": 1.27, + "learning_rate": 1.535389191606489e-05, + "loss": 2.0383, + "step": 6273 + }, + { + "epoch": 1.27, + "learning_rate": 1.5352432918611392e-05, + "loss": 2.1184, + "step": 6274 + }, + { + "epoch": 1.27, + "learning_rate": 1.535097376145788e-05, + "loss": 2.1061, + "step": 6275 + }, + { + "epoch": 1.27, + "learning_rate": 1.5349514444647887e-05, + "loss": 2.1144, + "step": 6276 + }, + { + "epoch": 1.27, + "learning_rate": 1.5348054968224958e-05, + "loss": 2.1076, + "step": 6277 + }, + { + "epoch": 1.27, + "learning_rate": 1.534659533223263e-05, + "loss": 2.2049, + "step": 6278 + }, + { + "epoch": 1.27, + "learning_rate": 1.534513553671446e-05, + "loss": 2.112, + "step": 6279 + }, + { + "epoch": 1.27, + "learning_rate": 1.5343675581714003e-05, + "loss": 2.1062, + "step": 6280 + }, + { + "epoch": 1.27, + "learning_rate": 1.534221546727482e-05, + "loss": 2.0774, + "step": 6281 + }, + { + "epoch": 1.28, + "learning_rate": 1.534075519344048e-05, + "loss": 2.2493, + "step": 6282 + }, + { + "epoch": 1.28, + "learning_rate": 1.5339294760254548e-05, + "loss": 2.152, + "step": 6283 + }, + { + "epoch": 1.28, + "learning_rate": 1.53378341677606e-05, + "loss": 2.1693, + "step": 6284 + }, + { + "epoch": 1.28, + "learning_rate": 1.533637341600221e-05, + "loss": 2.1566, + "step": 6285 + }, + { + "epoch": 1.28, + "learning_rate": 1.533491250502298e-05, + "loss": 2.172, + "step": 6286 + }, + { + "epoch": 1.28, + "learning_rate": 1.533345143486648e-05, + "loss": 2.1604, + "step": 6287 + }, + { + "epoch": 1.28, + "learning_rate": 1.5331990205576307e-05, + "loss": 2.1624, + "step": 6288 + }, + { + "epoch": 1.28, + "learning_rate": 1.5330528817196068e-05, + "loss": 2.0309, + "step": 6289 + }, + { + "epoch": 1.28, + "learning_rate": 1.5329067269769363e-05, + "loss": 2.2005, + "step": 6290 + }, + { + "epoch": 1.28, + "learning_rate": 1.5327605563339796e-05, + "loss": 2.1013, + "step": 6291 + }, + { + "epoch": 1.28, + "learning_rate": 1.5326143697950984e-05, + "loss": 2.1386, + "step": 6292 + }, + { + "epoch": 1.28, + "learning_rate": 1.532468167364654e-05, + "loss": 2.1853, + "step": 6293 + }, + { + "epoch": 1.28, + "learning_rate": 1.5323219490470093e-05, + "loss": 2.0756, + "step": 6294 + }, + { + "epoch": 1.28, + "learning_rate": 1.532175714846526e-05, + "loss": 2.1963, + "step": 6295 + }, + { + "epoch": 1.28, + "learning_rate": 1.5320294647675686e-05, + "loss": 2.0763, + "step": 6296 + }, + { + "epoch": 1.28, + "learning_rate": 1.5318831988145e-05, + "loss": 2.0417, + "step": 6297 + }, + { + "epoch": 1.28, + "learning_rate": 1.5317369169916844e-05, + "loss": 2.1988, + "step": 6298 + }, + { + "epoch": 1.28, + "learning_rate": 1.531590619303486e-05, + "loss": 2.2153, + "step": 6299 + }, + { + "epoch": 1.28, + "learning_rate": 1.5314443057542703e-05, + "loss": 2.1243, + "step": 6300 + }, + { + "epoch": 1.28, + "learning_rate": 1.5312979763484033e-05, + "loss": 2.1914, + "step": 6301 + }, + { + "epoch": 1.28, + "learning_rate": 1.53115163109025e-05, + "loss": 2.0991, + "step": 6302 + }, + { + "epoch": 1.28, + "learning_rate": 1.531005269984178e-05, + "loss": 2.1179, + "step": 6303 + }, + { + "epoch": 1.28, + "learning_rate": 1.5308588930345527e-05, + "loss": 2.1279, + "step": 6304 + }, + { + "epoch": 1.28, + "learning_rate": 1.5307125002457432e-05, + "loss": 2.1901, + "step": 6305 + }, + { + "epoch": 1.28, + "learning_rate": 1.5305660916221162e-05, + "loss": 2.1007, + "step": 6306 + }, + { + "epoch": 1.28, + "learning_rate": 1.530419667168041e-05, + "loss": 2.1383, + "step": 6307 + }, + { + "epoch": 1.28, + "learning_rate": 1.5302732268878858e-05, + "loss": 2.1161, + "step": 6308 + }, + { + "epoch": 1.28, + "learning_rate": 1.5301267707860205e-05, + "loss": 2.0462, + "step": 6309 + }, + { + "epoch": 1.28, + "learning_rate": 1.5299802988668143e-05, + "loss": 2.0655, + "step": 6310 + }, + { + "epoch": 1.28, + "learning_rate": 1.5298338111346377e-05, + "loss": 2.207, + "step": 6311 + }, + { + "epoch": 1.28, + "learning_rate": 1.5296873075938617e-05, + "loss": 2.1419, + "step": 6312 + }, + { + "epoch": 1.28, + "learning_rate": 1.529540788248857e-05, + "loss": 2.0535, + "step": 6313 + }, + { + "epoch": 1.28, + "learning_rate": 1.5293942531039955e-05, + "loss": 2.1499, + "step": 6314 + }, + { + "epoch": 1.28, + "learning_rate": 1.5292477021636498e-05, + "loss": 2.1681, + "step": 6315 + }, + { + "epoch": 1.28, + "learning_rate": 1.529101135432192e-05, + "loss": 2.0816, + "step": 6316 + }, + { + "epoch": 1.28, + "learning_rate": 1.528954552913995e-05, + "loss": 2.0663, + "step": 6317 + }, + { + "epoch": 1.28, + "learning_rate": 1.528807954613433e-05, + "loss": 2.0866, + "step": 6318 + }, + { + "epoch": 1.28, + "learning_rate": 1.52866134053488e-05, + "loss": 2.115, + "step": 6319 + }, + { + "epoch": 1.28, + "learning_rate": 1.5285147106827096e-05, + "loss": 2.0987, + "step": 6320 + }, + { + "epoch": 1.28, + "learning_rate": 1.528368065061298e-05, + "loss": 2.1337, + "step": 6321 + }, + { + "epoch": 1.28, + "learning_rate": 1.5282214036750197e-05, + "loss": 2.135, + "step": 6322 + }, + { + "epoch": 1.28, + "learning_rate": 1.5280747265282512e-05, + "loss": 2.1462, + "step": 6323 + }, + { + "epoch": 1.28, + "learning_rate": 1.5279280336253686e-05, + "loss": 2.029, + "step": 6324 + }, + { + "epoch": 1.28, + "learning_rate": 1.527781324970749e-05, + "loss": 2.0651, + "step": 6325 + }, + { + "epoch": 1.28, + "learning_rate": 1.5276346005687695e-05, + "loss": 2.0658, + "step": 6326 + }, + { + "epoch": 1.28, + "learning_rate": 1.5274878604238083e-05, + "loss": 2.1464, + "step": 6327 + }, + { + "epoch": 1.28, + "learning_rate": 1.5273411045402425e-05, + "loss": 2.1946, + "step": 6328 + }, + { + "epoch": 1.28, + "learning_rate": 1.5271943329224525e-05, + "loss": 2.1156, + "step": 6329 + }, + { + "epoch": 1.28, + "learning_rate": 1.5270475455748165e-05, + "loss": 2.1348, + "step": 6330 + }, + { + "epoch": 1.29, + "learning_rate": 1.5269007425017143e-05, + "loss": 2.2444, + "step": 6331 + }, + { + "epoch": 1.29, + "learning_rate": 1.5267539237075265e-05, + "loss": 2.1848, + "step": 6332 + }, + { + "epoch": 1.29, + "learning_rate": 1.5266070891966328e-05, + "loss": 2.1291, + "step": 6333 + }, + { + "epoch": 1.29, + "learning_rate": 1.5264602389734152e-05, + "loss": 2.0312, + "step": 6334 + }, + { + "epoch": 1.29, + "learning_rate": 1.526313373042255e-05, + "loss": 2.0846, + "step": 6335 + }, + { + "epoch": 1.29, + "learning_rate": 1.526166491407534e-05, + "loss": 2.0719, + "step": 6336 + }, + { + "epoch": 1.29, + "learning_rate": 1.5260195940736346e-05, + "loss": 2.1114, + "step": 6337 + }, + { + "epoch": 1.29, + "learning_rate": 1.5258726810449402e-05, + "loss": 2.1015, + "step": 6338 + }, + { + "epoch": 1.29, + "learning_rate": 1.525725752325834e-05, + "loss": 2.1853, + "step": 6339 + }, + { + "epoch": 1.29, + "learning_rate": 1.5255788079207e-05, + "loss": 2.1523, + "step": 6340 + }, + { + "epoch": 1.29, + "learning_rate": 1.5254318478339226e-05, + "loss": 2.1241, + "step": 6341 + }, + { + "epoch": 1.29, + "learning_rate": 1.5252848720698861e-05, + "loss": 2.1263, + "step": 6342 + }, + { + "epoch": 1.29, + "learning_rate": 1.5251378806329764e-05, + "loss": 2.1568, + "step": 6343 + }, + { + "epoch": 1.29, + "learning_rate": 1.5249908735275791e-05, + "loss": 2.0741, + "step": 6344 + }, + { + "epoch": 1.29, + "learning_rate": 1.5248438507580806e-05, + "loss": 2.0836, + "step": 6345 + }, + { + "epoch": 1.29, + "learning_rate": 1.524696812328867e-05, + "loss": 2.1301, + "step": 6346 + }, + { + "epoch": 1.29, + "learning_rate": 1.5245497582443263e-05, + "loss": 2.1936, + "step": 6347 + }, + { + "epoch": 1.29, + "learning_rate": 1.5244026885088455e-05, + "loss": 2.1214, + "step": 6348 + }, + { + "epoch": 1.29, + "learning_rate": 1.524255603126813e-05, + "loss": 2.1407, + "step": 6349 + }, + { + "epoch": 1.29, + "learning_rate": 1.5241085021026171e-05, + "loss": 2.1583, + "step": 6350 + }, + { + "epoch": 1.29, + "learning_rate": 1.5239613854406474e-05, + "loss": 2.1952, + "step": 6351 + }, + { + "epoch": 1.29, + "learning_rate": 1.5238142531452929e-05, + "loss": 2.161, + "step": 6352 + }, + { + "epoch": 1.29, + "learning_rate": 1.5236671052209437e-05, + "loss": 2.1377, + "step": 6353 + }, + { + "epoch": 1.29, + "learning_rate": 1.5235199416719902e-05, + "loss": 2.0665, + "step": 6354 + }, + { + "epoch": 1.29, + "learning_rate": 1.5233727625028236e-05, + "loss": 2.0855, + "step": 6355 + }, + { + "epoch": 1.29, + "learning_rate": 1.5232255677178352e-05, + "loss": 2.1581, + "step": 6356 + }, + { + "epoch": 1.29, + "learning_rate": 1.523078357321416e-05, + "loss": 2.1602, + "step": 6357 + }, + { + "epoch": 1.29, + "learning_rate": 1.5229311313179595e-05, + "loss": 2.2117, + "step": 6358 + }, + { + "epoch": 1.29, + "learning_rate": 1.5227838897118577e-05, + "loss": 2.1907, + "step": 6359 + }, + { + "epoch": 1.29, + "learning_rate": 1.5226366325075042e-05, + "loss": 2.0917, + "step": 6360 + }, + { + "epoch": 1.29, + "learning_rate": 1.5224893597092926e-05, + "loss": 2.05, + "step": 6361 + }, + { + "epoch": 1.29, + "learning_rate": 1.522342071321617e-05, + "loss": 2.0871, + "step": 6362 + }, + { + "epoch": 1.29, + "learning_rate": 1.5221947673488721e-05, + "loss": 2.1749, + "step": 6363 + }, + { + "epoch": 1.29, + "learning_rate": 1.522047447795453e-05, + "loss": 2.0006, + "step": 6364 + }, + { + "epoch": 1.29, + "learning_rate": 1.521900112665755e-05, + "loss": 2.1782, + "step": 6365 + }, + { + "epoch": 1.29, + "learning_rate": 1.5217527619641745e-05, + "loss": 2.1808, + "step": 6366 + }, + { + "epoch": 1.29, + "learning_rate": 1.5216053956951081e-05, + "loss": 2.0902, + "step": 6367 + }, + { + "epoch": 1.29, + "learning_rate": 1.5214580138629522e-05, + "loss": 2.119, + "step": 6368 + }, + { + "epoch": 1.29, + "learning_rate": 1.5213106164721045e-05, + "loss": 2.1613, + "step": 6369 + }, + { + "epoch": 1.29, + "learning_rate": 1.5211632035269628e-05, + "loss": 2.142, + "step": 6370 + }, + { + "epoch": 1.29, + "learning_rate": 1.5210157750319257e-05, + "loss": 2.1597, + "step": 6371 + }, + { + "epoch": 1.29, + "learning_rate": 1.5208683309913918e-05, + "loss": 2.1326, + "step": 6372 + }, + { + "epoch": 1.29, + "learning_rate": 1.5207208714097605e-05, + "loss": 2.1502, + "step": 6373 + }, + { + "epoch": 1.29, + "learning_rate": 1.5205733962914313e-05, + "loss": 2.1229, + "step": 6374 + }, + { + "epoch": 1.29, + "learning_rate": 1.5204259056408046e-05, + "loss": 2.1435, + "step": 6375 + }, + { + "epoch": 1.29, + "learning_rate": 1.5202783994622814e-05, + "loss": 2.0711, + "step": 6376 + }, + { + "epoch": 1.29, + "learning_rate": 1.5201308777602619e-05, + "loss": 2.2528, + "step": 6377 + }, + { + "epoch": 1.29, + "learning_rate": 1.5199833405391485e-05, + "loss": 2.1523, + "step": 6378 + }, + { + "epoch": 1.29, + "learning_rate": 1.5198357878033427e-05, + "loss": 2.1953, + "step": 6379 + }, + { + "epoch": 1.3, + "learning_rate": 1.5196882195572478e-05, + "loss": 2.1182, + "step": 6380 + }, + { + "epoch": 1.3, + "learning_rate": 1.519540635805266e-05, + "loss": 2.1674, + "step": 6381 + }, + { + "epoch": 1.3, + "learning_rate": 1.5193930365518011e-05, + "loss": 2.1633, + "step": 6382 + }, + { + "epoch": 1.3, + "learning_rate": 1.5192454218012569e-05, + "loss": 2.148, + "step": 6383 + }, + { + "epoch": 1.3, + "learning_rate": 1.5190977915580375e-05, + "loss": 2.1273, + "step": 6384 + }, + { + "epoch": 1.3, + "learning_rate": 1.5189501458265486e-05, + "loss": 2.1172, + "step": 6385 + }, + { + "epoch": 1.3, + "learning_rate": 1.5188024846111945e-05, + "loss": 2.1469, + "step": 6386 + }, + { + "epoch": 1.3, + "learning_rate": 1.5186548079163817e-05, + "loss": 2.1198, + "step": 6387 + }, + { + "epoch": 1.3, + "learning_rate": 1.5185071157465157e-05, + "loss": 2.0274, + "step": 6388 + }, + { + "epoch": 1.3, + "learning_rate": 1.5183594081060038e-05, + "loss": 2.126, + "step": 6389 + }, + { + "epoch": 1.3, + "learning_rate": 1.5182116849992528e-05, + "loss": 2.1979, + "step": 6390 + }, + { + "epoch": 1.3, + "learning_rate": 1.5180639464306706e-05, + "loss": 2.068, + "step": 6391 + }, + { + "epoch": 1.3, + "learning_rate": 1.5179161924046648e-05, + "loss": 2.0696, + "step": 6392 + }, + { + "epoch": 1.3, + "learning_rate": 1.5177684229256445e-05, + "loss": 2.1697, + "step": 6393 + }, + { + "epoch": 1.3, + "learning_rate": 1.5176206379980181e-05, + "loss": 2.1345, + "step": 6394 + }, + { + "epoch": 1.3, + "learning_rate": 1.5174728376261953e-05, + "loss": 2.1288, + "step": 6395 + }, + { + "epoch": 1.3, + "learning_rate": 1.5173250218145862e-05, + "loss": 2.1691, + "step": 6396 + }, + { + "epoch": 1.3, + "learning_rate": 1.517177190567601e-05, + "loss": 2.178, + "step": 6397 + }, + { + "epoch": 1.3, + "learning_rate": 1.5170293438896507e-05, + "loss": 2.0621, + "step": 6398 + }, + { + "epoch": 1.3, + "learning_rate": 1.5168814817851459e-05, + "loss": 2.099, + "step": 6399 + }, + { + "epoch": 1.3, + "learning_rate": 1.516733604258499e-05, + "loss": 2.114, + "step": 6400 + }, + { + "epoch": 1.3, + "learning_rate": 1.516585711314122e-05, + "loss": 2.0875, + "step": 6401 + }, + { + "epoch": 1.3, + "learning_rate": 1.5164378029564279e-05, + "loss": 2.1309, + "step": 6402 + }, + { + "epoch": 1.3, + "learning_rate": 1.5162898791898295e-05, + "loss": 2.1373, + "step": 6403 + }, + { + "epoch": 1.3, + "learning_rate": 1.5161419400187403e-05, + "loss": 2.1411, + "step": 6404 + }, + { + "epoch": 1.3, + "learning_rate": 1.5159939854475746e-05, + "loss": 2.1113, + "step": 6405 + }, + { + "epoch": 1.3, + "learning_rate": 1.5158460154807466e-05, + "loss": 2.1033, + "step": 6406 + }, + { + "epoch": 1.3, + "learning_rate": 1.5156980301226716e-05, + "loss": 2.1304, + "step": 6407 + }, + { + "epoch": 1.3, + "learning_rate": 1.5155500293777647e-05, + "loss": 2.1571, + "step": 6408 + }, + { + "epoch": 1.3, + "learning_rate": 1.5154020132504424e-05, + "loss": 2.1949, + "step": 6409 + }, + { + "epoch": 1.3, + "learning_rate": 1.51525398174512e-05, + "loss": 2.1594, + "step": 6410 + }, + { + "epoch": 1.3, + "learning_rate": 1.5151059348662154e-05, + "loss": 2.0563, + "step": 6411 + }, + { + "epoch": 1.3, + "learning_rate": 1.514957872618145e-05, + "loss": 2.1789, + "step": 6412 + }, + { + "epoch": 1.3, + "learning_rate": 1.514809795005327e-05, + "loss": 2.1183, + "step": 6413 + }, + { + "epoch": 1.3, + "learning_rate": 1.5146617020321797e-05, + "loss": 2.0833, + "step": 6414 + }, + { + "epoch": 1.3, + "learning_rate": 1.5145135937031215e-05, + "loss": 2.0944, + "step": 6415 + }, + { + "epoch": 1.3, + "learning_rate": 1.5143654700225711e-05, + "loss": 2.0409, + "step": 6416 + }, + { + "epoch": 1.3, + "learning_rate": 1.5142173309949488e-05, + "loss": 2.0909, + "step": 6417 + }, + { + "epoch": 1.3, + "learning_rate": 1.5140691766246742e-05, + "loss": 2.0538, + "step": 6418 + }, + { + "epoch": 1.3, + "learning_rate": 1.5139210069161676e-05, + "loss": 2.151, + "step": 6419 + }, + { + "epoch": 1.3, + "learning_rate": 1.5137728218738504e-05, + "loss": 2.1263, + "step": 6420 + }, + { + "epoch": 1.3, + "learning_rate": 1.5136246215021438e-05, + "loss": 2.0704, + "step": 6421 + }, + { + "epoch": 1.3, + "learning_rate": 1.5134764058054692e-05, + "loss": 2.128, + "step": 6422 + }, + { + "epoch": 1.3, + "learning_rate": 1.5133281747882495e-05, + "loss": 2.128, + "step": 6423 + }, + { + "epoch": 1.3, + "learning_rate": 1.513179928454907e-05, + "loss": 2.1428, + "step": 6424 + }, + { + "epoch": 1.3, + "learning_rate": 1.5130316668098655e-05, + "loss": 2.1614, + "step": 6425 + }, + { + "epoch": 1.3, + "learning_rate": 1.5128833898575482e-05, + "loss": 2.2033, + "step": 6426 + }, + { + "epoch": 1.3, + "learning_rate": 1.5127350976023793e-05, + "loss": 2.0988, + "step": 6427 + }, + { + "epoch": 1.3, + "learning_rate": 1.5125867900487835e-05, + "loss": 2.0675, + "step": 6428 + }, + { + "epoch": 1.3, + "learning_rate": 1.5124384672011854e-05, + "loss": 2.0839, + "step": 6429 + }, + { + "epoch": 1.31, + "learning_rate": 1.5122901290640111e-05, + "loss": 2.1099, + "step": 6430 + }, + { + "epoch": 1.31, + "learning_rate": 1.5121417756416862e-05, + "loss": 2.1167, + "step": 6431 + }, + { + "epoch": 1.31, + "learning_rate": 1.5119934069386371e-05, + "loss": 2.0828, + "step": 6432 + }, + { + "epoch": 1.31, + "learning_rate": 1.5118450229592908e-05, + "loss": 2.1144, + "step": 6433 + }, + { + "epoch": 1.31, + "learning_rate": 1.5116966237080746e-05, + "loss": 2.1625, + "step": 6434 + }, + { + "epoch": 1.31, + "learning_rate": 1.5115482091894164e-05, + "loss": 2.1953, + "step": 6435 + }, + { + "epoch": 1.31, + "learning_rate": 1.5113997794077442e-05, + "loss": 2.1535, + "step": 6436 + }, + { + "epoch": 1.31, + "learning_rate": 1.5112513343674869e-05, + "loss": 2.0969, + "step": 6437 + }, + { + "epoch": 1.31, + "learning_rate": 1.5111028740730729e-05, + "loss": 2.1284, + "step": 6438 + }, + { + "epoch": 1.31, + "learning_rate": 1.510954398528933e-05, + "loss": 2.13, + "step": 6439 + }, + { + "epoch": 1.31, + "learning_rate": 1.5108059077394965e-05, + "loss": 2.0364, + "step": 6440 + }, + { + "epoch": 1.31, + "learning_rate": 1.5106574017091942e-05, + "loss": 2.1923, + "step": 6441 + }, + { + "epoch": 1.31, + "learning_rate": 1.5105088804424569e-05, + "loss": 2.1861, + "step": 6442 + }, + { + "epoch": 1.31, + "learning_rate": 1.5103603439437158e-05, + "loss": 2.2273, + "step": 6443 + }, + { + "epoch": 1.31, + "learning_rate": 1.5102117922174032e-05, + "loss": 2.175, + "step": 6444 + }, + { + "epoch": 1.31, + "learning_rate": 1.5100632252679513e-05, + "loss": 2.1033, + "step": 6445 + }, + { + "epoch": 1.31, + "learning_rate": 1.5099146430997924e-05, + "loss": 2.0869, + "step": 6446 + }, + { + "epoch": 1.31, + "learning_rate": 1.5097660457173604e-05, + "loss": 2.1374, + "step": 6447 + }, + { + "epoch": 1.31, + "learning_rate": 1.5096174331250886e-05, + "loss": 2.0406, + "step": 6448 + }, + { + "epoch": 1.31, + "learning_rate": 1.5094688053274114e-05, + "loss": 2.1287, + "step": 6449 + }, + { + "epoch": 1.31, + "learning_rate": 1.5093201623287631e-05, + "loss": 2.112, + "step": 6450 + }, + { + "epoch": 1.31, + "learning_rate": 1.509171504133579e-05, + "loss": 2.0364, + "step": 6451 + }, + { + "epoch": 1.31, + "learning_rate": 1.5090228307462946e-05, + "loss": 2.0709, + "step": 6452 + }, + { + "epoch": 1.31, + "learning_rate": 1.5088741421713456e-05, + "loss": 2.123, + "step": 6453 + }, + { + "epoch": 1.31, + "learning_rate": 1.5087254384131686e-05, + "loss": 2.0954, + "step": 6454 + }, + { + "epoch": 1.31, + "learning_rate": 1.5085767194762003e-05, + "loss": 2.202, + "step": 6455 + }, + { + "epoch": 1.31, + "learning_rate": 1.5084279853648784e-05, + "loss": 2.1855, + "step": 6456 + }, + { + "epoch": 1.31, + "learning_rate": 1.5082792360836402e-05, + "loss": 2.0939, + "step": 6457 + }, + { + "epoch": 1.31, + "learning_rate": 1.5081304716369238e-05, + "loss": 2.1456, + "step": 6458 + }, + { + "epoch": 1.31, + "learning_rate": 1.5079816920291688e-05, + "loss": 2.1951, + "step": 6459 + }, + { + "epoch": 1.31, + "learning_rate": 1.5078328972648132e-05, + "loss": 2.2015, + "step": 6460 + }, + { + "epoch": 1.31, + "learning_rate": 1.5076840873482975e-05, + "loss": 2.0861, + "step": 6461 + }, + { + "epoch": 1.31, + "learning_rate": 1.507535262284061e-05, + "loss": 2.0707, + "step": 6462 + }, + { + "epoch": 1.31, + "learning_rate": 1.5073864220765446e-05, + "loss": 2.1333, + "step": 6463 + }, + { + "epoch": 1.31, + "learning_rate": 1.5072375667301893e-05, + "loss": 2.1133, + "step": 6464 + }, + { + "epoch": 1.31, + "learning_rate": 1.507088696249436e-05, + "loss": 2.0167, + "step": 6465 + }, + { + "epoch": 1.31, + "learning_rate": 1.506939810638727e-05, + "loss": 2.1091, + "step": 6466 + }, + { + "epoch": 1.31, + "learning_rate": 1.5067909099025044e-05, + "loss": 2.0878, + "step": 6467 + }, + { + "epoch": 1.31, + "learning_rate": 1.5066419940452108e-05, + "loss": 2.1671, + "step": 6468 + }, + { + "epoch": 1.31, + "learning_rate": 1.5064930630712897e-05, + "loss": 2.0882, + "step": 6469 + }, + { + "epoch": 1.31, + "learning_rate": 1.5063441169851847e-05, + "loss": 2.1488, + "step": 6470 + }, + { + "epoch": 1.31, + "learning_rate": 1.5061951557913394e-05, + "loss": 2.2144, + "step": 6471 + }, + { + "epoch": 1.31, + "learning_rate": 1.5060461794941992e-05, + "loss": 2.1172, + "step": 6472 + }, + { + "epoch": 1.31, + "learning_rate": 1.5058971880982084e-05, + "loss": 2.0862, + "step": 6473 + }, + { + "epoch": 1.31, + "learning_rate": 1.5057481816078124e-05, + "loss": 2.1686, + "step": 6474 + }, + { + "epoch": 1.31, + "learning_rate": 1.5055991600274574e-05, + "loss": 2.1326, + "step": 6475 + }, + { + "epoch": 1.31, + "learning_rate": 1.5054501233615897e-05, + "loss": 2.1362, + "step": 6476 + }, + { + "epoch": 1.31, + "learning_rate": 1.5053010716146564e-05, + "loss": 2.0404, + "step": 6477 + }, + { + "epoch": 1.31, + "learning_rate": 1.505152004791104e-05, + "loss": 2.1866, + "step": 6478 + }, + { + "epoch": 1.32, + "learning_rate": 1.5050029228953808e-05, + "loss": 2.1317, + "step": 6479 + }, + { + "epoch": 1.32, + "learning_rate": 1.5048538259319347e-05, + "loss": 2.0395, + "step": 6480 + }, + { + "epoch": 1.32, + "learning_rate": 1.5047047139052144e-05, + "loss": 2.1762, + "step": 6481 + }, + { + "epoch": 1.32, + "learning_rate": 1.5045555868196688e-05, + "loss": 2.1194, + "step": 6482 + }, + { + "epoch": 1.32, + "learning_rate": 1.5044064446797473e-05, + "loss": 2.1498, + "step": 6483 + }, + { + "epoch": 1.32, + "learning_rate": 1.5042572874899004e-05, + "loss": 2.1212, + "step": 6484 + }, + { + "epoch": 1.32, + "learning_rate": 1.504108115254578e-05, + "loss": 2.0638, + "step": 6485 + }, + { + "epoch": 1.32, + "learning_rate": 1.5039589279782307e-05, + "loss": 2.1438, + "step": 6486 + }, + { + "epoch": 1.32, + "learning_rate": 1.5038097256653105e-05, + "loss": 2.0668, + "step": 6487 + }, + { + "epoch": 1.32, + "learning_rate": 1.5036605083202687e-05, + "loss": 2.0619, + "step": 6488 + }, + { + "epoch": 1.32, + "learning_rate": 1.5035112759475574e-05, + "loss": 2.1379, + "step": 6489 + }, + { + "epoch": 1.32, + "learning_rate": 1.5033620285516295e-05, + "loss": 2.1425, + "step": 6490 + }, + { + "epoch": 1.32, + "learning_rate": 1.503212766136938e-05, + "loss": 2.1682, + "step": 6491 + }, + { + "epoch": 1.32, + "learning_rate": 1.5030634887079365e-05, + "loss": 2.1249, + "step": 6492 + }, + { + "epoch": 1.32, + "learning_rate": 1.5029141962690785e-05, + "loss": 2.0935, + "step": 6493 + }, + { + "epoch": 1.32, + "learning_rate": 1.5027648888248188e-05, + "loss": 2.0736, + "step": 6494 + }, + { + "epoch": 1.32, + "learning_rate": 1.5026155663796123e-05, + "loss": 2.1826, + "step": 6495 + }, + { + "epoch": 1.32, + "learning_rate": 1.5024662289379147e-05, + "loss": 2.166, + "step": 6496 + }, + { + "epoch": 1.32, + "learning_rate": 1.5023168765041808e-05, + "loss": 2.1215, + "step": 6497 + }, + { + "epoch": 1.32, + "learning_rate": 1.5021675090828675e-05, + "loss": 2.0224, + "step": 6498 + }, + { + "epoch": 1.32, + "learning_rate": 1.5020181266784311e-05, + "loss": 2.153, + "step": 6499 + }, + { + "epoch": 1.32, + "learning_rate": 1.5018687292953294e-05, + "loss": 2.0798, + "step": 6500 + }, + { + "epoch": 1.32, + "learning_rate": 1.501719316938019e-05, + "loss": 2.169, + "step": 6501 + }, + { + "epoch": 1.32, + "learning_rate": 1.5015698896109588e-05, + "loss": 2.0917, + "step": 6502 + }, + { + "epoch": 1.32, + "learning_rate": 1.5014204473186064e-05, + "loss": 2.1905, + "step": 6503 + }, + { + "epoch": 1.32, + "learning_rate": 1.5012709900654216e-05, + "loss": 2.1073, + "step": 6504 + }, + { + "epoch": 1.32, + "learning_rate": 1.5011215178558629e-05, + "loss": 2.1445, + "step": 6505 + }, + { + "epoch": 1.32, + "learning_rate": 1.5009720306943907e-05, + "loss": 2.1676, + "step": 6506 + }, + { + "epoch": 1.32, + "learning_rate": 1.5008225285854648e-05, + "loss": 2.1899, + "step": 6507 + }, + { + "epoch": 1.32, + "learning_rate": 1.5006730115335462e-05, + "loss": 2.0782, + "step": 6508 + }, + { + "epoch": 1.32, + "learning_rate": 1.5005234795430958e-05, + "loss": 2.2087, + "step": 6509 + }, + { + "epoch": 1.32, + "learning_rate": 1.500373932618575e-05, + "loss": 2.137, + "step": 6510 + }, + { + "epoch": 1.32, + "learning_rate": 1.5002243707644466e-05, + "loss": 2.0894, + "step": 6511 + }, + { + "epoch": 1.32, + "learning_rate": 1.5000747939851726e-05, + "loss": 2.0366, + "step": 6512 + }, + { + "epoch": 1.32, + "learning_rate": 1.4999252022852154e-05, + "loss": 2.0946, + "step": 6513 + }, + { + "epoch": 1.32, + "learning_rate": 1.4997755956690387e-05, + "loss": 2.1877, + "step": 6514 + }, + { + "epoch": 1.32, + "learning_rate": 1.4996259741411068e-05, + "loss": 2.0727, + "step": 6515 + }, + { + "epoch": 1.32, + "learning_rate": 1.4994763377058834e-05, + "loss": 2.0981, + "step": 6516 + }, + { + "epoch": 1.32, + "learning_rate": 1.4993266863678333e-05, + "loss": 2.133, + "step": 6517 + }, + { + "epoch": 1.32, + "learning_rate": 1.4991770201314218e-05, + "loss": 2.1319, + "step": 6518 + }, + { + "epoch": 1.32, + "learning_rate": 1.4990273390011139e-05, + "loss": 2.1014, + "step": 6519 + }, + { + "epoch": 1.32, + "learning_rate": 1.4988776429813764e-05, + "loss": 2.1231, + "step": 6520 + }, + { + "epoch": 1.32, + "learning_rate": 1.4987279320766755e-05, + "loss": 2.1674, + "step": 6521 + }, + { + "epoch": 1.32, + "learning_rate": 1.498578206291478e-05, + "loss": 2.098, + "step": 6522 + }, + { + "epoch": 1.32, + "learning_rate": 1.498428465630251e-05, + "loss": 2.0768, + "step": 6523 + }, + { + "epoch": 1.32, + "learning_rate": 1.498278710097463e-05, + "loss": 2.2325, + "step": 6524 + }, + { + "epoch": 1.32, + "learning_rate": 1.4981289396975816e-05, + "loss": 2.1038, + "step": 6525 + }, + { + "epoch": 1.32, + "learning_rate": 1.4979791544350759e-05, + "loss": 2.1657, + "step": 6526 + }, + { + "epoch": 1.32, + "learning_rate": 1.4978293543144144e-05, + "loss": 2.0122, + "step": 6527 + }, + { + "epoch": 1.33, + "learning_rate": 1.4976795393400676e-05, + "loss": 2.2092, + "step": 6528 + }, + { + "epoch": 1.33, + "learning_rate": 1.4975297095165048e-05, + "loss": 2.0559, + "step": 6529 + }, + { + "epoch": 1.33, + "learning_rate": 1.4973798648481967e-05, + "loss": 2.1634, + "step": 6530 + }, + { + "epoch": 1.33, + "learning_rate": 1.4972300053396143e-05, + "loss": 2.1619, + "step": 6531 + }, + { + "epoch": 1.33, + "learning_rate": 1.4970801309952285e-05, + "loss": 2.1694, + "step": 6532 + }, + { + "epoch": 1.33, + "learning_rate": 1.496930241819512e-05, + "loss": 2.0756, + "step": 6533 + }, + { + "epoch": 1.33, + "learning_rate": 1.4967803378169361e-05, + "loss": 2.0743, + "step": 6534 + }, + { + "epoch": 1.33, + "learning_rate": 1.4966304189919739e-05, + "loss": 2.1488, + "step": 6535 + }, + { + "epoch": 1.33, + "learning_rate": 1.4964804853490981e-05, + "loss": 2.0489, + "step": 6536 + }, + { + "epoch": 1.33, + "learning_rate": 1.4963305368927828e-05, + "loss": 2.138, + "step": 6537 + }, + { + "epoch": 1.33, + "learning_rate": 1.4961805736275019e-05, + "loss": 2.1853, + "step": 6538 + }, + { + "epoch": 1.33, + "learning_rate": 1.4960305955577297e-05, + "loss": 2.0578, + "step": 6539 + }, + { + "epoch": 1.33, + "learning_rate": 1.4958806026879411e-05, + "loss": 2.0879, + "step": 6540 + }, + { + "epoch": 1.33, + "learning_rate": 1.4957305950226111e-05, + "loss": 2.0793, + "step": 6541 + }, + { + "epoch": 1.33, + "learning_rate": 1.4955805725662162e-05, + "loss": 2.1113, + "step": 6542 + }, + { + "epoch": 1.33, + "learning_rate": 1.495430535323232e-05, + "loss": 2.1017, + "step": 6543 + }, + { + "epoch": 1.33, + "learning_rate": 1.4952804832981353e-05, + "loss": 2.0524, + "step": 6544 + }, + { + "epoch": 1.33, + "learning_rate": 1.4951304164954033e-05, + "loss": 2.0955, + "step": 6545 + }, + { + "epoch": 1.33, + "learning_rate": 1.4949803349195135e-05, + "loss": 2.0926, + "step": 6546 + }, + { + "epoch": 1.33, + "learning_rate": 1.4948302385749435e-05, + "loss": 2.0764, + "step": 6547 + }, + { + "epoch": 1.33, + "learning_rate": 1.4946801274661726e-05, + "loss": 2.1489, + "step": 6548 + }, + { + "epoch": 1.33, + "learning_rate": 1.4945300015976789e-05, + "loss": 2.1499, + "step": 6549 + }, + { + "epoch": 1.33, + "learning_rate": 1.4943798609739417e-05, + "loss": 2.1066, + "step": 6550 + }, + { + "epoch": 1.33, + "learning_rate": 1.494229705599441e-05, + "loss": 2.1038, + "step": 6551 + }, + { + "epoch": 1.33, + "learning_rate": 1.4940795354786568e-05, + "loss": 2.1481, + "step": 6552 + }, + { + "epoch": 1.33, + "learning_rate": 1.49392935061607e-05, + "loss": 2.1135, + "step": 6553 + }, + { + "epoch": 1.33, + "learning_rate": 1.4937791510161615e-05, + "loss": 2.1267, + "step": 6554 + }, + { + "epoch": 1.33, + "learning_rate": 1.4936289366834123e-05, + "loss": 2.115, + "step": 6555 + }, + { + "epoch": 1.33, + "learning_rate": 1.4934787076223049e-05, + "loss": 2.0877, + "step": 6556 + }, + { + "epoch": 1.33, + "learning_rate": 1.4933284638373216e-05, + "loss": 2.1282, + "step": 6557 + }, + { + "epoch": 1.33, + "learning_rate": 1.4931782053329453e-05, + "loss": 2.1022, + "step": 6558 + }, + { + "epoch": 1.33, + "learning_rate": 1.4930279321136592e-05, + "loss": 2.1198, + "step": 6559 + }, + { + "epoch": 1.33, + "learning_rate": 1.4928776441839463e-05, + "loss": 2.1816, + "step": 6560 + }, + { + "epoch": 1.33, + "learning_rate": 1.4927273415482916e-05, + "loss": 2.1155, + "step": 6561 + }, + { + "epoch": 1.33, + "learning_rate": 1.4925770242111796e-05, + "loss": 2.1439, + "step": 6562 + }, + { + "epoch": 1.33, + "learning_rate": 1.4924266921770948e-05, + "loss": 2.1968, + "step": 6563 + }, + { + "epoch": 1.33, + "learning_rate": 1.492276345450523e-05, + "loss": 2.166, + "step": 6564 + }, + { + "epoch": 1.33, + "learning_rate": 1.4921259840359498e-05, + "loss": 2.1198, + "step": 6565 + }, + { + "epoch": 1.33, + "learning_rate": 1.491975607937862e-05, + "loss": 2.0932, + "step": 6566 + }, + { + "epoch": 1.33, + "learning_rate": 1.4918252171607458e-05, + "loss": 2.1533, + "step": 6567 + }, + { + "epoch": 1.33, + "learning_rate": 1.491674811709089e-05, + "loss": 2.1978, + "step": 6568 + }, + { + "epoch": 1.33, + "learning_rate": 1.4915243915873787e-05, + "loss": 2.0808, + "step": 6569 + }, + { + "epoch": 1.33, + "learning_rate": 1.4913739568001034e-05, + "loss": 2.0118, + "step": 6570 + }, + { + "epoch": 1.33, + "learning_rate": 1.4912235073517511e-05, + "loss": 2.1046, + "step": 6571 + }, + { + "epoch": 1.33, + "learning_rate": 1.4910730432468112e-05, + "loss": 2.0884, + "step": 6572 + }, + { + "epoch": 1.33, + "learning_rate": 1.490922564489773e-05, + "loss": 2.155, + "step": 6573 + }, + { + "epoch": 1.33, + "learning_rate": 1.4907720710851257e-05, + "loss": 2.1707, + "step": 6574 + }, + { + "epoch": 1.33, + "learning_rate": 1.4906215630373606e-05, + "loss": 2.1844, + "step": 6575 + }, + { + "epoch": 1.33, + "learning_rate": 1.490471040350968e-05, + "loss": 2.1395, + "step": 6576 + }, + { + "epoch": 1.34, + "learning_rate": 1.4903205030304384e-05, + "loss": 2.1619, + "step": 6577 + }, + { + "epoch": 1.34, + "learning_rate": 1.490169951080264e-05, + "loss": 2.1067, + "step": 6578 + }, + { + "epoch": 1.34, + "learning_rate": 1.4900193845049366e-05, + "loss": 2.1206, + "step": 6579 + }, + { + "epoch": 1.34, + "learning_rate": 1.4898688033089492e-05, + "loss": 2.2113, + "step": 6580 + }, + { + "epoch": 1.34, + "learning_rate": 1.489718207496794e-05, + "loss": 2.1136, + "step": 6581 + }, + { + "epoch": 1.34, + "learning_rate": 1.4895675970729641e-05, + "loss": 2.0351, + "step": 6582 + }, + { + "epoch": 1.34, + "learning_rate": 1.489416972041954e-05, + "loss": 2.1895, + "step": 6583 + }, + { + "epoch": 1.34, + "learning_rate": 1.4892663324082574e-05, + "loss": 2.085, + "step": 6584 + }, + { + "epoch": 1.34, + "learning_rate": 1.4891156781763694e-05, + "loss": 2.1317, + "step": 6585 + }, + { + "epoch": 1.34, + "learning_rate": 1.4889650093507842e-05, + "loss": 2.1876, + "step": 6586 + }, + { + "epoch": 1.34, + "learning_rate": 1.4888143259359979e-05, + "loss": 2.2079, + "step": 6587 + }, + { + "epoch": 1.34, + "learning_rate": 1.4886636279365067e-05, + "loss": 2.1715, + "step": 6588 + }, + { + "epoch": 1.34, + "learning_rate": 1.4885129153568063e-05, + "loss": 2.1276, + "step": 6589 + }, + { + "epoch": 1.34, + "learning_rate": 1.488362188201394e-05, + "loss": 2.222, + "step": 6590 + }, + { + "epoch": 1.34, + "learning_rate": 1.4882114464747669e-05, + "loss": 2.1425, + "step": 6591 + }, + { + "epoch": 1.34, + "learning_rate": 1.4880606901814221e-05, + "loss": 2.2003, + "step": 6592 + }, + { + "epoch": 1.34, + "learning_rate": 1.4879099193258587e-05, + "loss": 2.1583, + "step": 6593 + }, + { + "epoch": 1.34, + "learning_rate": 1.4877591339125743e-05, + "loss": 2.1307, + "step": 6594 + }, + { + "epoch": 1.34, + "learning_rate": 1.4876083339460685e-05, + "loss": 2.135, + "step": 6595 + }, + { + "epoch": 1.34, + "learning_rate": 1.4874575194308408e-05, + "loss": 2.2943, + "step": 6596 + }, + { + "epoch": 1.34, + "learning_rate": 1.4873066903713902e-05, + "loss": 2.1747, + "step": 6597 + }, + { + "epoch": 1.34, + "learning_rate": 1.4871558467722178e-05, + "loss": 2.0972, + "step": 6598 + }, + { + "epoch": 1.34, + "learning_rate": 1.487004988637824e-05, + "loss": 2.1543, + "step": 6599 + }, + { + "epoch": 1.34, + "learning_rate": 1.4868541159727099e-05, + "loss": 2.062, + "step": 6600 + }, + { + "epoch": 1.34, + "learning_rate": 1.4867032287813772e-05, + "loss": 2.1253, + "step": 6601 + }, + { + "epoch": 1.34, + "learning_rate": 1.4865523270683278e-05, + "loss": 2.1465, + "step": 6602 + }, + { + "epoch": 1.34, + "learning_rate": 1.486401410838064e-05, + "loss": 2.1254, + "step": 6603 + }, + { + "epoch": 1.34, + "learning_rate": 1.486250480095089e-05, + "loss": 2.1056, + "step": 6604 + }, + { + "epoch": 1.34, + "learning_rate": 1.486099534843906e-05, + "loss": 2.1033, + "step": 6605 + }, + { + "epoch": 1.34, + "learning_rate": 1.4859485750890184e-05, + "loss": 2.2006, + "step": 6606 + }, + { + "epoch": 1.34, + "learning_rate": 1.485797600834931e-05, + "loss": 2.1743, + "step": 6607 + }, + { + "epoch": 1.34, + "learning_rate": 1.4856466120861479e-05, + "loss": 2.1275, + "step": 6608 + }, + { + "epoch": 1.34, + "learning_rate": 1.4854956088471743e-05, + "loss": 2.0713, + "step": 6609 + }, + { + "epoch": 1.34, + "learning_rate": 1.4853445911225157e-05, + "loss": 2.1173, + "step": 6610 + }, + { + "epoch": 1.34, + "learning_rate": 1.4851935589166782e-05, + "loss": 2.0971, + "step": 6611 + }, + { + "epoch": 1.34, + "learning_rate": 1.4850425122341676e-05, + "loss": 2.2063, + "step": 6612 + }, + { + "epoch": 1.34, + "learning_rate": 1.4848914510794912e-05, + "loss": 2.139, + "step": 6613 + }, + { + "epoch": 1.34, + "learning_rate": 1.4847403754571557e-05, + "loss": 2.1173, + "step": 6614 + }, + { + "epoch": 1.34, + "learning_rate": 1.4845892853716692e-05, + "loss": 2.0591, + "step": 6615 + }, + { + "epoch": 1.34, + "learning_rate": 1.4844381808275392e-05, + "loss": 2.0779, + "step": 6616 + }, + { + "epoch": 1.34, + "learning_rate": 1.4842870618292751e-05, + "loss": 2.0739, + "step": 6617 + }, + { + "epoch": 1.34, + "learning_rate": 1.484135928381385e-05, + "loss": 2.1875, + "step": 6618 + }, + { + "epoch": 1.34, + "learning_rate": 1.4839847804883785e-05, + "loss": 2.1054, + "step": 6619 + }, + { + "epoch": 1.34, + "learning_rate": 1.4838336181547655e-05, + "loss": 2.0981, + "step": 6620 + }, + { + "epoch": 1.34, + "learning_rate": 1.483682441385056e-05, + "loss": 2.1331, + "step": 6621 + }, + { + "epoch": 1.34, + "learning_rate": 1.4835312501837608e-05, + "loss": 2.1218, + "step": 6622 + }, + { + "epoch": 1.34, + "learning_rate": 1.4833800445553911e-05, + "loss": 2.1987, + "step": 6623 + }, + { + "epoch": 1.34, + "learning_rate": 1.4832288245044581e-05, + "loss": 2.1113, + "step": 6624 + }, + { + "epoch": 1.34, + "learning_rate": 1.4830775900354739e-05, + "loss": 2.1123, + "step": 6625 + }, + { + "epoch": 1.34, + "learning_rate": 1.4829263411529508e-05, + "loss": 2.1235, + "step": 6626 + }, + { + "epoch": 1.35, + "learning_rate": 1.4827750778614019e-05, + "loss": 2.1159, + "step": 6627 + }, + { + "epoch": 1.35, + "learning_rate": 1.48262380016534e-05, + "loss": 2.1664, + "step": 6628 + }, + { + "epoch": 1.35, + "learning_rate": 1.482472508069279e-05, + "loss": 2.125, + "step": 6629 + }, + { + "epoch": 1.35, + "learning_rate": 1.4823212015777329e-05, + "loss": 2.196, + "step": 6630 + }, + { + "epoch": 1.35, + "learning_rate": 1.4821698806952163e-05, + "loss": 2.1376, + "step": 6631 + }, + { + "epoch": 1.35, + "learning_rate": 1.482018545426244e-05, + "loss": 2.1734, + "step": 6632 + }, + { + "epoch": 1.35, + "learning_rate": 1.4818671957753315e-05, + "loss": 2.1013, + "step": 6633 + }, + { + "epoch": 1.35, + "learning_rate": 1.4817158317469947e-05, + "loss": 2.2742, + "step": 6634 + }, + { + "epoch": 1.35, + "learning_rate": 1.4815644533457496e-05, + "loss": 2.1271, + "step": 6635 + }, + { + "epoch": 1.35, + "learning_rate": 1.4814130605761128e-05, + "loss": 2.0874, + "step": 6636 + }, + { + "epoch": 1.35, + "learning_rate": 1.4812616534426019e-05, + "loss": 2.0737, + "step": 6637 + }, + { + "epoch": 1.35, + "learning_rate": 1.481110231949734e-05, + "loss": 2.1849, + "step": 6638 + }, + { + "epoch": 1.35, + "learning_rate": 1.480958796102027e-05, + "loss": 2.1258, + "step": 6639 + }, + { + "epoch": 1.35, + "learning_rate": 1.4808073459039996e-05, + "loss": 2.1407, + "step": 6640 + }, + { + "epoch": 1.35, + "learning_rate": 1.4806558813601702e-05, + "loss": 2.144, + "step": 6641 + }, + { + "epoch": 1.35, + "learning_rate": 1.4805044024750584e-05, + "loss": 2.1417, + "step": 6642 + }, + { + "epoch": 1.35, + "learning_rate": 1.4803529092531838e-05, + "loss": 2.0923, + "step": 6643 + }, + { + "epoch": 1.35, + "learning_rate": 1.4802014016990661e-05, + "loss": 2.1562, + "step": 6644 + }, + { + "epoch": 1.35, + "learning_rate": 1.4800498798172263e-05, + "loss": 2.1427, + "step": 6645 + }, + { + "epoch": 1.35, + "learning_rate": 1.479898343612185e-05, + "loss": 2.1817, + "step": 6646 + }, + { + "epoch": 1.35, + "learning_rate": 1.4797467930884638e-05, + "loss": 2.0948, + "step": 6647 + }, + { + "epoch": 1.35, + "learning_rate": 1.4795952282505846e-05, + "loss": 2.1393, + "step": 6648 + }, + { + "epoch": 1.35, + "learning_rate": 1.4794436491030692e-05, + "loss": 2.1254, + "step": 6649 + }, + { + "epoch": 1.35, + "learning_rate": 1.4792920556504407e-05, + "loss": 2.1307, + "step": 6650 + }, + { + "epoch": 1.35, + "learning_rate": 1.4791404478972218e-05, + "loss": 2.2172, + "step": 6651 + }, + { + "epoch": 1.35, + "learning_rate": 1.4789888258479364e-05, + "loss": 2.1418, + "step": 6652 + }, + { + "epoch": 1.35, + "learning_rate": 1.478837189507108e-05, + "loss": 2.1837, + "step": 6653 + }, + { + "epoch": 1.35, + "learning_rate": 1.4786855388792613e-05, + "loss": 2.128, + "step": 6654 + }, + { + "epoch": 1.35, + "learning_rate": 1.478533873968921e-05, + "loss": 2.1609, + "step": 6655 + }, + { + "epoch": 1.35, + "learning_rate": 1.478382194780612e-05, + "loss": 2.1302, + "step": 6656 + }, + { + "epoch": 1.35, + "learning_rate": 1.4782305013188607e-05, + "loss": 2.0989, + "step": 6657 + }, + { + "epoch": 1.35, + "learning_rate": 1.4780787935881921e-05, + "loss": 2.1285, + "step": 6658 + }, + { + "epoch": 1.35, + "learning_rate": 1.4779270715931338e-05, + "loss": 2.2302, + "step": 6659 + }, + { + "epoch": 1.35, + "learning_rate": 1.4777753353382121e-05, + "loss": 2.1123, + "step": 6660 + }, + { + "epoch": 1.35, + "learning_rate": 1.4776235848279541e-05, + "loss": 2.1068, + "step": 6661 + }, + { + "epoch": 1.35, + "learning_rate": 1.477471820066888e-05, + "loss": 2.174, + "step": 6662 + }, + { + "epoch": 1.35, + "learning_rate": 1.4773200410595419e-05, + "loss": 2.2427, + "step": 6663 + }, + { + "epoch": 1.35, + "learning_rate": 1.4771682478104446e-05, + "loss": 2.1689, + "step": 6664 + }, + { + "epoch": 1.35, + "learning_rate": 1.4770164403241247e-05, + "loss": 2.1599, + "step": 6665 + }, + { + "epoch": 1.35, + "learning_rate": 1.4768646186051116e-05, + "loss": 2.2178, + "step": 6666 + }, + { + "epoch": 1.35, + "learning_rate": 1.476712782657936e-05, + "loss": 2.0907, + "step": 6667 + }, + { + "epoch": 1.35, + "learning_rate": 1.4765609324871275e-05, + "loss": 2.1788, + "step": 6668 + }, + { + "epoch": 1.35, + "learning_rate": 1.4764090680972173e-05, + "loss": 2.0399, + "step": 6669 + }, + { + "epoch": 1.35, + "learning_rate": 1.4762571894927359e-05, + "loss": 2.1518, + "step": 6670 + }, + { + "epoch": 1.35, + "learning_rate": 1.4761052966782154e-05, + "loss": 2.1144, + "step": 6671 + }, + { + "epoch": 1.35, + "learning_rate": 1.4759533896581874e-05, + "loss": 2.0845, + "step": 6672 + }, + { + "epoch": 1.35, + "learning_rate": 1.475801468437185e-05, + "loss": 2.1514, + "step": 6673 + }, + { + "epoch": 1.35, + "learning_rate": 1.4756495330197407e-05, + "loss": 2.0571, + "step": 6674 + }, + { + "epoch": 1.35, + "learning_rate": 1.4754975834103877e-05, + "loss": 2.1715, + "step": 6675 + }, + { + "epoch": 1.36, + "learning_rate": 1.4753456196136598e-05, + "loss": 2.1717, + "step": 6676 + }, + { + "epoch": 1.36, + "learning_rate": 1.4751936416340912e-05, + "loss": 2.1222, + "step": 6677 + }, + { + "epoch": 1.36, + "learning_rate": 1.475041649476216e-05, + "loss": 2.1599, + "step": 6678 + }, + { + "epoch": 1.36, + "learning_rate": 1.4748896431445701e-05, + "loss": 2.1738, + "step": 6679 + }, + { + "epoch": 1.36, + "learning_rate": 1.474737622643688e-05, + "loss": 2.1089, + "step": 6680 + }, + { + "epoch": 1.36, + "learning_rate": 1.4745855879781056e-05, + "loss": 2.1035, + "step": 6681 + }, + { + "epoch": 1.36, + "learning_rate": 1.4744335391523598e-05, + "loss": 2.0872, + "step": 6682 + }, + { + "epoch": 1.36, + "learning_rate": 1.4742814761709867e-05, + "loss": 2.1519, + "step": 6683 + }, + { + "epoch": 1.36, + "learning_rate": 1.4741293990385237e-05, + "loss": 2.1681, + "step": 6684 + }, + { + "epoch": 1.36, + "learning_rate": 1.473977307759508e-05, + "loss": 2.0475, + "step": 6685 + }, + { + "epoch": 1.36, + "learning_rate": 1.4738252023384778e-05, + "loss": 2.128, + "step": 6686 + }, + { + "epoch": 1.36, + "learning_rate": 1.4736730827799715e-05, + "loss": 2.0802, + "step": 6687 + }, + { + "epoch": 1.36, + "learning_rate": 1.4735209490885272e-05, + "loss": 2.1877, + "step": 6688 + }, + { + "epoch": 1.36, + "learning_rate": 1.473368801268685e-05, + "loss": 2.1397, + "step": 6689 + }, + { + "epoch": 1.36, + "learning_rate": 1.473216639324984e-05, + "loss": 2.1107, + "step": 6690 + }, + { + "epoch": 1.36, + "learning_rate": 1.4730644632619646e-05, + "loss": 2.0739, + "step": 6691 + }, + { + "epoch": 1.36, + "learning_rate": 1.472912273084167e-05, + "loss": 2.2257, + "step": 6692 + }, + { + "epoch": 1.36, + "learning_rate": 1.4727600687961321e-05, + "loss": 2.1289, + "step": 6693 + }, + { + "epoch": 1.36, + "learning_rate": 1.472607850402401e-05, + "loss": 2.0613, + "step": 6694 + }, + { + "epoch": 1.36, + "learning_rate": 1.472455617907516e-05, + "loss": 2.1038, + "step": 6695 + }, + { + "epoch": 1.36, + "learning_rate": 1.4723033713160186e-05, + "loss": 2.0853, + "step": 6696 + }, + { + "epoch": 1.36, + "learning_rate": 1.4721511106324517e-05, + "loss": 2.1378, + "step": 6697 + }, + { + "epoch": 1.36, + "learning_rate": 1.4719988358613584e-05, + "loss": 2.1056, + "step": 6698 + }, + { + "epoch": 1.36, + "learning_rate": 1.471846547007282e-05, + "loss": 2.0995, + "step": 6699 + }, + { + "epoch": 1.36, + "learning_rate": 1.4716942440747663e-05, + "loss": 2.1738, + "step": 6700 + }, + { + "epoch": 1.36, + "learning_rate": 1.4715419270683553e-05, + "loss": 2.1288, + "step": 6701 + }, + { + "epoch": 1.36, + "learning_rate": 1.4713895959925944e-05, + "loss": 2.1157, + "step": 6702 + }, + { + "epoch": 1.36, + "learning_rate": 1.4712372508520277e-05, + "loss": 2.0691, + "step": 6703 + }, + { + "epoch": 1.36, + "learning_rate": 1.4710848916512015e-05, + "loss": 2.1094, + "step": 6704 + }, + { + "epoch": 1.36, + "learning_rate": 1.4709325183946613e-05, + "loss": 2.1808, + "step": 6705 + }, + { + "epoch": 1.36, + "learning_rate": 1.4707801310869536e-05, + "loss": 2.1959, + "step": 6706 + }, + { + "epoch": 1.36, + "learning_rate": 1.4706277297326253e-05, + "loss": 2.1265, + "step": 6707 + }, + { + "epoch": 1.36, + "learning_rate": 1.4704753143362229e-05, + "loss": 2.1575, + "step": 6708 + }, + { + "epoch": 1.36, + "learning_rate": 1.470322884902295e-05, + "loss": 2.1252, + "step": 6709 + }, + { + "epoch": 1.36, + "learning_rate": 1.470170441435389e-05, + "loss": 2.1164, + "step": 6710 + }, + { + "epoch": 1.36, + "learning_rate": 1.4700179839400536e-05, + "loss": 2.1762, + "step": 6711 + }, + { + "epoch": 1.36, + "learning_rate": 1.4698655124208377e-05, + "loss": 2.1012, + "step": 6712 + }, + { + "epoch": 1.36, + "learning_rate": 1.4697130268822901e-05, + "loss": 2.1533, + "step": 6713 + }, + { + "epoch": 1.36, + "learning_rate": 1.4695605273289608e-05, + "loss": 2.1492, + "step": 6714 + }, + { + "epoch": 1.36, + "learning_rate": 1.4694080137654002e-05, + "loss": 2.1486, + "step": 6715 + }, + { + "epoch": 1.36, + "learning_rate": 1.4692554861961583e-05, + "loss": 2.0811, + "step": 6716 + }, + { + "epoch": 1.36, + "learning_rate": 1.4691029446257865e-05, + "loss": 2.0551, + "step": 6717 + }, + { + "epoch": 1.36, + "learning_rate": 1.468950389058836e-05, + "loss": 2.0989, + "step": 6718 + }, + { + "epoch": 1.36, + "learning_rate": 1.4687978194998586e-05, + "loss": 2.1201, + "step": 6719 + }, + { + "epoch": 1.36, + "learning_rate": 1.4686452359534062e-05, + "loss": 2.1723, + "step": 6720 + }, + { + "epoch": 1.36, + "learning_rate": 1.4684926384240322e-05, + "loss": 2.0429, + "step": 6721 + }, + { + "epoch": 1.36, + "learning_rate": 1.4683400269162889e-05, + "loss": 2.1556, + "step": 6722 + }, + { + "epoch": 1.36, + "learning_rate": 1.46818740143473e-05, + "loss": 2.1797, + "step": 6723 + }, + { + "epoch": 1.36, + "learning_rate": 1.4680347619839093e-05, + "loss": 2.2343, + "step": 6724 + }, + { + "epoch": 1.37, + "learning_rate": 1.467882108568381e-05, + "loss": 2.0935, + "step": 6725 + }, + { + "epoch": 1.37, + "learning_rate": 1.4677294411927004e-05, + "loss": 2.0913, + "step": 6726 + }, + { + "epoch": 1.37, + "learning_rate": 1.4675767598614219e-05, + "loss": 2.1165, + "step": 6727 + }, + { + "epoch": 1.37, + "learning_rate": 1.4674240645791011e-05, + "loss": 2.1027, + "step": 6728 + }, + { + "epoch": 1.37, + "learning_rate": 1.4672713553502946e-05, + "loss": 2.1405, + "step": 6729 + }, + { + "epoch": 1.37, + "learning_rate": 1.467118632179558e-05, + "loss": 2.1101, + "step": 6730 + }, + { + "epoch": 1.37, + "learning_rate": 1.4669658950714487e-05, + "loss": 2.1634, + "step": 6731 + }, + { + "epoch": 1.37, + "learning_rate": 1.4668131440305235e-05, + "loss": 2.2113, + "step": 6732 + }, + { + "epoch": 1.37, + "learning_rate": 1.46666037906134e-05, + "loss": 2.0661, + "step": 6733 + }, + { + "epoch": 1.37, + "learning_rate": 1.4665076001684566e-05, + "loss": 2.0646, + "step": 6734 + }, + { + "epoch": 1.37, + "learning_rate": 1.4663548073564316e-05, + "loss": 2.1638, + "step": 6735 + }, + { + "epoch": 1.37, + "learning_rate": 1.4662020006298235e-05, + "loss": 2.0504, + "step": 6736 + }, + { + "epoch": 1.37, + "learning_rate": 1.466049179993192e-05, + "loss": 2.1707, + "step": 6737 + }, + { + "epoch": 1.37, + "learning_rate": 1.4658963454510968e-05, + "loss": 2.0893, + "step": 6738 + }, + { + "epoch": 1.37, + "learning_rate": 1.4657434970080977e-05, + "loss": 2.1111, + "step": 6739 + }, + { + "epoch": 1.37, + "learning_rate": 1.4655906346687552e-05, + "loss": 2.1878, + "step": 6740 + }, + { + "epoch": 1.37, + "learning_rate": 1.465437758437631e-05, + "loss": 2.097, + "step": 6741 + }, + { + "epoch": 1.37, + "learning_rate": 1.4652848683192854e-05, + "loss": 2.0961, + "step": 6742 + }, + { + "epoch": 1.37, + "learning_rate": 1.4651319643182808e-05, + "loss": 2.1215, + "step": 6743 + }, + { + "epoch": 1.37, + "learning_rate": 1.4649790464391796e-05, + "loss": 2.0146, + "step": 6744 + }, + { + "epoch": 1.37, + "learning_rate": 1.4648261146865435e-05, + "loss": 2.1264, + "step": 6745 + }, + { + "epoch": 1.37, + "learning_rate": 1.4646731690649364e-05, + "loss": 2.1473, + "step": 6746 + }, + { + "epoch": 1.37, + "learning_rate": 1.4645202095789213e-05, + "loss": 2.1665, + "step": 6747 + }, + { + "epoch": 1.37, + "learning_rate": 1.4643672362330619e-05, + "loss": 2.1177, + "step": 6748 + }, + { + "epoch": 1.37, + "learning_rate": 1.464214249031923e-05, + "loss": 2.0916, + "step": 6749 + }, + { + "epoch": 1.37, + "learning_rate": 1.4640612479800686e-05, + "loss": 2.1368, + "step": 6750 + }, + { + "epoch": 1.37, + "learning_rate": 1.4639082330820645e-05, + "loss": 2.122, + "step": 6751 + }, + { + "epoch": 1.37, + "learning_rate": 1.4637552043424752e-05, + "loss": 2.1311, + "step": 6752 + }, + { + "epoch": 1.37, + "learning_rate": 1.4636021617658677e-05, + "loss": 2.1567, + "step": 6753 + }, + { + "epoch": 1.37, + "learning_rate": 1.4634491053568077e-05, + "loss": 2.0006, + "step": 6754 + }, + { + "epoch": 1.37, + "learning_rate": 1.463296035119862e-05, + "loss": 2.1724, + "step": 6755 + }, + { + "epoch": 1.37, + "learning_rate": 1.4631429510595978e-05, + "loss": 2.185, + "step": 6756 + }, + { + "epoch": 1.37, + "learning_rate": 1.4629898531805824e-05, + "loss": 2.1175, + "step": 6757 + }, + { + "epoch": 1.37, + "learning_rate": 1.4628367414873843e-05, + "loss": 2.0968, + "step": 6758 + }, + { + "epoch": 1.37, + "learning_rate": 1.4626836159845714e-05, + "loss": 2.0698, + "step": 6759 + }, + { + "epoch": 1.37, + "learning_rate": 1.462530476676713e-05, + "loss": 2.0869, + "step": 6760 + }, + { + "epoch": 1.37, + "learning_rate": 1.4623773235683775e-05, + "loss": 2.1451, + "step": 6761 + }, + { + "epoch": 1.37, + "learning_rate": 1.4622241566641353e-05, + "loss": 2.1298, + "step": 6762 + }, + { + "epoch": 1.37, + "learning_rate": 1.4620709759685558e-05, + "loss": 2.1384, + "step": 6763 + }, + { + "epoch": 1.37, + "learning_rate": 1.4619177814862102e-05, + "loss": 2.1694, + "step": 6764 + }, + { + "epoch": 1.37, + "learning_rate": 1.4617645732216686e-05, + "loss": 2.0836, + "step": 6765 + }, + { + "epoch": 1.37, + "learning_rate": 1.4616113511795028e-05, + "loss": 2.238, + "step": 6766 + }, + { + "epoch": 1.37, + "learning_rate": 1.4614581153642839e-05, + "loss": 2.1686, + "step": 6767 + }, + { + "epoch": 1.37, + "learning_rate": 1.4613048657805846e-05, + "loss": 2.0953, + "step": 6768 + }, + { + "epoch": 1.37, + "learning_rate": 1.461151602432977e-05, + "loss": 2.1604, + "step": 6769 + }, + { + "epoch": 1.37, + "learning_rate": 1.460998325326034e-05, + "loss": 2.1488, + "step": 6770 + }, + { + "epoch": 1.37, + "learning_rate": 1.4608450344643292e-05, + "loss": 2.1823, + "step": 6771 + }, + { + "epoch": 1.37, + "learning_rate": 1.460691729852436e-05, + "loss": 2.0605, + "step": 6772 + }, + { + "epoch": 1.37, + "learning_rate": 1.460538411494929e-05, + "loss": 2.1914, + "step": 6773 + }, + { + "epoch": 1.38, + "learning_rate": 1.4603850793963823e-05, + "loss": 2.0935, + "step": 6774 + }, + { + "epoch": 1.38, + "learning_rate": 1.4602317335613707e-05, + "loss": 2.1682, + "step": 6775 + }, + { + "epoch": 1.38, + "learning_rate": 1.4600783739944702e-05, + "loss": 2.1524, + "step": 6776 + }, + { + "epoch": 1.38, + "learning_rate": 1.459925000700256e-05, + "loss": 2.1301, + "step": 6777 + }, + { + "epoch": 1.38, + "learning_rate": 1.4597716136833044e-05, + "loss": 2.1125, + "step": 6778 + }, + { + "epoch": 1.38, + "learning_rate": 1.4596182129481924e-05, + "loss": 2.1818, + "step": 6779 + }, + { + "epoch": 1.38, + "learning_rate": 1.4594647984994966e-05, + "loss": 2.173, + "step": 6780 + }, + { + "epoch": 1.38, + "learning_rate": 1.4593113703417943e-05, + "loss": 2.1036, + "step": 6781 + }, + { + "epoch": 1.38, + "learning_rate": 1.4591579284796638e-05, + "loss": 2.1669, + "step": 6782 + }, + { + "epoch": 1.38, + "learning_rate": 1.4590044729176828e-05, + "loss": 2.1457, + "step": 6783 + }, + { + "epoch": 1.38, + "learning_rate": 1.4588510036604304e-05, + "loss": 2.173, + "step": 6784 + }, + { + "epoch": 1.38, + "learning_rate": 1.4586975207124854e-05, + "loss": 2.2281, + "step": 6785 + }, + { + "epoch": 1.38, + "learning_rate": 1.4585440240784274e-05, + "loss": 2.1908, + "step": 6786 + }, + { + "epoch": 1.38, + "learning_rate": 1.4583905137628361e-05, + "loss": 2.1007, + "step": 6787 + }, + { + "epoch": 1.38, + "learning_rate": 1.4582369897702919e-05, + "loss": 2.1311, + "step": 6788 + }, + { + "epoch": 1.38, + "learning_rate": 1.4580834521053755e-05, + "loss": 2.1473, + "step": 6789 + }, + { + "epoch": 1.38, + "learning_rate": 1.4579299007726677e-05, + "loss": 2.1215, + "step": 6790 + }, + { + "epoch": 1.38, + "learning_rate": 1.4577763357767505e-05, + "loss": 2.1228, + "step": 6791 + }, + { + "epoch": 1.38, + "learning_rate": 1.457622757122205e-05, + "loss": 2.0669, + "step": 6792 + }, + { + "epoch": 1.38, + "learning_rate": 1.4574691648136145e-05, + "loss": 2.0842, + "step": 6793 + }, + { + "epoch": 1.38, + "learning_rate": 1.457315558855561e-05, + "loss": 2.1778, + "step": 6794 + }, + { + "epoch": 1.38, + "learning_rate": 1.4571619392526279e-05, + "loss": 2.1689, + "step": 6795 + }, + { + "epoch": 1.38, + "learning_rate": 1.4570083060093989e-05, + "loss": 2.1119, + "step": 6796 + }, + { + "epoch": 1.38, + "learning_rate": 1.4568546591304575e-05, + "loss": 2.1405, + "step": 6797 + }, + { + "epoch": 1.38, + "learning_rate": 1.4567009986203883e-05, + "loss": 2.1737, + "step": 6798 + }, + { + "epoch": 1.38, + "learning_rate": 1.4565473244837763e-05, + "loss": 2.1094, + "step": 6799 + }, + { + "epoch": 1.38, + "learning_rate": 1.4563936367252063e-05, + "loss": 2.1459, + "step": 6800 + }, + { + "epoch": 1.38, + "learning_rate": 1.4562399353492636e-05, + "loss": 2.1511, + "step": 6801 + }, + { + "epoch": 1.38, + "learning_rate": 1.456086220360535e-05, + "loss": 2.1769, + "step": 6802 + }, + { + "epoch": 1.38, + "learning_rate": 1.4559324917636063e-05, + "loss": 2.1038, + "step": 6803 + }, + { + "epoch": 1.38, + "learning_rate": 1.4557787495630642e-05, + "loss": 2.203, + "step": 6804 + }, + { + "epoch": 1.38, + "learning_rate": 1.4556249937634963e-05, + "loss": 2.0944, + "step": 6805 + }, + { + "epoch": 1.38, + "learning_rate": 1.45547122436949e-05, + "loss": 2.1428, + "step": 6806 + }, + { + "epoch": 1.38, + "learning_rate": 1.4553174413856331e-05, + "loss": 2.1362, + "step": 6807 + }, + { + "epoch": 1.38, + "learning_rate": 1.4551636448165142e-05, + "loss": 2.1486, + "step": 6808 + }, + { + "epoch": 1.38, + "learning_rate": 1.4550098346667223e-05, + "loss": 2.1052, + "step": 6809 + }, + { + "epoch": 1.38, + "learning_rate": 1.4548560109408465e-05, + "loss": 2.1544, + "step": 6810 + }, + { + "epoch": 1.38, + "learning_rate": 1.4547021736434763e-05, + "loss": 2.1473, + "step": 6811 + }, + { + "epoch": 1.38, + "learning_rate": 1.4545483227792017e-05, + "loss": 2.0625, + "step": 6812 + }, + { + "epoch": 1.38, + "learning_rate": 1.4543944583526132e-05, + "loss": 2.1369, + "step": 6813 + }, + { + "epoch": 1.38, + "learning_rate": 1.4542405803683018e-05, + "loss": 2.1458, + "step": 6814 + }, + { + "epoch": 1.38, + "learning_rate": 1.4540866888308585e-05, + "loss": 2.2177, + "step": 6815 + }, + { + "epoch": 1.38, + "learning_rate": 1.4539327837448751e-05, + "loss": 2.1395, + "step": 6816 + }, + { + "epoch": 1.38, + "learning_rate": 1.4537788651149436e-05, + "loss": 2.1847, + "step": 6817 + }, + { + "epoch": 1.38, + "learning_rate": 1.4536249329456564e-05, + "loss": 2.1585, + "step": 6818 + }, + { + "epoch": 1.38, + "learning_rate": 1.4534709872416064e-05, + "loss": 2.1506, + "step": 6819 + }, + { + "epoch": 1.38, + "learning_rate": 1.453317028007387e-05, + "loss": 2.1909, + "step": 6820 + }, + { + "epoch": 1.38, + "learning_rate": 1.4531630552475915e-05, + "loss": 2.1578, + "step": 6821 + }, + { + "epoch": 1.38, + "learning_rate": 1.4530090689668143e-05, + "loss": 2.133, + "step": 6822 + }, + { + "epoch": 1.38, + "learning_rate": 1.4528550691696498e-05, + "loss": 2.1633, + "step": 6823 + }, + { + "epoch": 1.39, + "learning_rate": 1.4527010558606932e-05, + "loss": 2.2018, + "step": 6824 + }, + { + "epoch": 1.39, + "learning_rate": 1.452547029044539e-05, + "loss": 2.1035, + "step": 6825 + }, + { + "epoch": 1.39, + "learning_rate": 1.4523929887257835e-05, + "loss": 2.1434, + "step": 6826 + }, + { + "epoch": 1.39, + "learning_rate": 1.4522389349090225e-05, + "loss": 2.0418, + "step": 6827 + }, + { + "epoch": 1.39, + "learning_rate": 1.4520848675988526e-05, + "loss": 2.1319, + "step": 6828 + }, + { + "epoch": 1.39, + "learning_rate": 1.451930786799871e-05, + "loss": 2.1463, + "step": 6829 + }, + { + "epoch": 1.39, + "learning_rate": 1.4517766925166743e-05, + "loss": 2.1203, + "step": 6830 + }, + { + "epoch": 1.39, + "learning_rate": 1.4516225847538609e-05, + "loss": 2.1528, + "step": 6831 + }, + { + "epoch": 1.39, + "learning_rate": 1.4514684635160283e-05, + "loss": 2.163, + "step": 6832 + }, + { + "epoch": 1.39, + "learning_rate": 1.4513143288077756e-05, + "loss": 2.1053, + "step": 6833 + }, + { + "epoch": 1.39, + "learning_rate": 1.451160180633701e-05, + "loss": 2.0818, + "step": 6834 + }, + { + "epoch": 1.39, + "learning_rate": 1.4510060189984043e-05, + "loss": 2.0606, + "step": 6835 + }, + { + "epoch": 1.39, + "learning_rate": 1.4508518439064854e-05, + "loss": 2.1423, + "step": 6836 + }, + { + "epoch": 1.39, + "learning_rate": 1.4506976553625438e-05, + "loss": 2.1138, + "step": 6837 + }, + { + "epoch": 1.39, + "learning_rate": 1.4505434533711804e-05, + "loss": 2.1664, + "step": 6838 + }, + { + "epoch": 1.39, + "learning_rate": 1.450389237936996e-05, + "loss": 2.1434, + "step": 6839 + }, + { + "epoch": 1.39, + "learning_rate": 1.4502350090645919e-05, + "loss": 2.095, + "step": 6840 + }, + { + "epoch": 1.39, + "learning_rate": 1.4500807667585696e-05, + "loss": 2.1122, + "step": 6841 + }, + { + "epoch": 1.39, + "learning_rate": 1.4499265110235319e-05, + "loss": 2.1837, + "step": 6842 + }, + { + "epoch": 1.39, + "learning_rate": 1.4497722418640805e-05, + "loss": 2.1684, + "step": 6843 + }, + { + "epoch": 1.39, + "learning_rate": 1.449617959284819e-05, + "loss": 2.1515, + "step": 6844 + }, + { + "epoch": 1.39, + "learning_rate": 1.4494636632903501e-05, + "loss": 2.0999, + "step": 6845 + }, + { + "epoch": 1.39, + "learning_rate": 1.449309353885278e-05, + "loss": 2.2503, + "step": 6846 + }, + { + "epoch": 1.39, + "learning_rate": 1.4491550310742067e-05, + "loss": 2.1072, + "step": 6847 + }, + { + "epoch": 1.39, + "learning_rate": 1.4490006948617405e-05, + "loss": 2.1057, + "step": 6848 + }, + { + "epoch": 1.39, + "learning_rate": 1.4488463452524844e-05, + "loss": 2.0855, + "step": 6849 + }, + { + "epoch": 1.39, + "learning_rate": 1.448691982251044e-05, + "loss": 2.1379, + "step": 6850 + }, + { + "epoch": 1.39, + "learning_rate": 1.4485376058620245e-05, + "loss": 2.0868, + "step": 6851 + }, + { + "epoch": 1.39, + "learning_rate": 1.4483832160900325e-05, + "loss": 2.1169, + "step": 6852 + }, + { + "epoch": 1.39, + "learning_rate": 1.4482288129396742e-05, + "loss": 2.1415, + "step": 6853 + }, + { + "epoch": 1.39, + "learning_rate": 1.4480743964155565e-05, + "loss": 2.1051, + "step": 6854 + }, + { + "epoch": 1.39, + "learning_rate": 1.4479199665222869e-05, + "loss": 2.186, + "step": 6855 + }, + { + "epoch": 1.39, + "learning_rate": 1.4477655232644732e-05, + "loss": 2.17, + "step": 6856 + }, + { + "epoch": 1.39, + "learning_rate": 1.4476110666467232e-05, + "loss": 2.1966, + "step": 6857 + }, + { + "epoch": 1.39, + "learning_rate": 1.4474565966736458e-05, + "loss": 2.0787, + "step": 6858 + }, + { + "epoch": 1.39, + "learning_rate": 1.4473021133498492e-05, + "loss": 2.1195, + "step": 6859 + }, + { + "epoch": 1.39, + "learning_rate": 1.4471476166799437e-05, + "loss": 2.1572, + "step": 6860 + }, + { + "epoch": 1.39, + "learning_rate": 1.4469931066685381e-05, + "loss": 2.1168, + "step": 6861 + }, + { + "epoch": 1.39, + "learning_rate": 1.446838583320243e-05, + "loss": 2.107, + "step": 6862 + }, + { + "epoch": 1.39, + "learning_rate": 1.4466840466396688e-05, + "loss": 2.0838, + "step": 6863 + }, + { + "epoch": 1.39, + "learning_rate": 1.446529496631426e-05, + "loss": 2.0649, + "step": 6864 + }, + { + "epoch": 1.39, + "learning_rate": 1.4463749333001267e-05, + "loss": 2.1596, + "step": 6865 + }, + { + "epoch": 1.39, + "learning_rate": 1.446220356650382e-05, + "loss": 2.1348, + "step": 6866 + }, + { + "epoch": 1.39, + "learning_rate": 1.446065766686804e-05, + "loss": 2.1535, + "step": 6867 + }, + { + "epoch": 1.39, + "learning_rate": 1.4459111634140052e-05, + "loss": 2.1288, + "step": 6868 + }, + { + "epoch": 1.39, + "learning_rate": 1.4457565468365988e-05, + "loss": 2.2036, + "step": 6869 + }, + { + "epoch": 1.39, + "learning_rate": 1.445601916959198e-05, + "loss": 2.1191, + "step": 6870 + }, + { + "epoch": 1.39, + "learning_rate": 1.445447273786416e-05, + "loss": 2.0239, + "step": 6871 + }, + { + "epoch": 1.39, + "learning_rate": 1.4452926173228678e-05, + "loss": 2.1375, + "step": 6872 + }, + { + "epoch": 1.4, + "learning_rate": 1.445137947573167e-05, + "loss": 2.1622, + "step": 6873 + }, + { + "epoch": 1.4, + "learning_rate": 1.4449832645419286e-05, + "loss": 2.1272, + "step": 6874 + }, + { + "epoch": 1.4, + "learning_rate": 1.4448285682337683e-05, + "loss": 2.042, + "step": 6875 + }, + { + "epoch": 1.4, + "learning_rate": 1.4446738586533013e-05, + "loss": 2.1493, + "step": 6876 + }, + { + "epoch": 1.4, + "learning_rate": 1.4445191358051437e-05, + "loss": 2.1642, + "step": 6877 + }, + { + "epoch": 1.4, + "learning_rate": 1.4443643996939125e-05, + "loss": 2.2171, + "step": 6878 + }, + { + "epoch": 1.4, + "learning_rate": 1.444209650324224e-05, + "loss": 2.1103, + "step": 6879 + }, + { + "epoch": 1.4, + "learning_rate": 1.4440548877006954e-05, + "loss": 2.1828, + "step": 6880 + }, + { + "epoch": 1.4, + "learning_rate": 1.4439001118279445e-05, + "loss": 2.1494, + "step": 6881 + }, + { + "epoch": 1.4, + "learning_rate": 1.4437453227105898e-05, + "loss": 2.0691, + "step": 6882 + }, + { + "epoch": 1.4, + "learning_rate": 1.443590520353249e-05, + "loss": 2.1893, + "step": 6883 + }, + { + "epoch": 1.4, + "learning_rate": 1.4434357047605409e-05, + "loss": 2.0373, + "step": 6884 + }, + { + "epoch": 1.4, + "learning_rate": 1.4432808759370855e-05, + "loss": 2.218, + "step": 6885 + }, + { + "epoch": 1.4, + "learning_rate": 1.4431260338875015e-05, + "loss": 2.1835, + "step": 6886 + }, + { + "epoch": 1.4, + "learning_rate": 1.4429711786164098e-05, + "loss": 2.195, + "step": 6887 + }, + { + "epoch": 1.4, + "learning_rate": 1.4428163101284297e-05, + "loss": 2.0675, + "step": 6888 + }, + { + "epoch": 1.4, + "learning_rate": 1.4426614284281833e-05, + "loss": 2.1741, + "step": 6889 + }, + { + "epoch": 1.4, + "learning_rate": 1.4425065335202905e-05, + "loss": 2.1645, + "step": 6890 + }, + { + "epoch": 1.4, + "learning_rate": 1.4423516254093742e-05, + "loss": 2.0821, + "step": 6891 + }, + { + "epoch": 1.4, + "learning_rate": 1.442196704100055e-05, + "loss": 2.1136, + "step": 6892 + }, + { + "epoch": 1.4, + "learning_rate": 1.4420417695969564e-05, + "loss": 2.155, + "step": 6893 + }, + { + "epoch": 1.4, + "learning_rate": 1.4418868219047007e-05, + "loss": 2.1918, + "step": 6894 + }, + { + "epoch": 1.4, + "learning_rate": 1.441731861027911e-05, + "loss": 2.1123, + "step": 6895 + }, + { + "epoch": 1.4, + "learning_rate": 1.4415768869712108e-05, + "loss": 2.1507, + "step": 6896 + }, + { + "epoch": 1.4, + "learning_rate": 1.4414218997392243e-05, + "loss": 2.1122, + "step": 6897 + }, + { + "epoch": 1.4, + "learning_rate": 1.4412668993365757e-05, + "loss": 2.1213, + "step": 6898 + }, + { + "epoch": 1.4, + "learning_rate": 1.44111188576789e-05, + "loss": 2.0758, + "step": 6899 + }, + { + "epoch": 1.4, + "learning_rate": 1.440956859037792e-05, + "loss": 2.0704, + "step": 6900 + }, + { + "epoch": 1.4, + "learning_rate": 1.440801819150907e-05, + "loss": 2.1418, + "step": 6901 + }, + { + "epoch": 1.4, + "learning_rate": 1.4406467661118615e-05, + "loss": 2.1238, + "step": 6902 + }, + { + "epoch": 1.4, + "learning_rate": 1.4404916999252812e-05, + "loss": 2.1892, + "step": 6903 + }, + { + "epoch": 1.4, + "learning_rate": 1.4403366205957933e-05, + "loss": 2.1188, + "step": 6904 + }, + { + "epoch": 1.4, + "learning_rate": 1.4401815281280249e-05, + "loss": 2.0788, + "step": 6905 + }, + { + "epoch": 1.4, + "learning_rate": 1.440026422526603e-05, + "loss": 2.1636, + "step": 6906 + }, + { + "epoch": 1.4, + "learning_rate": 1.4398713037961557e-05, + "loss": 2.1595, + "step": 6907 + }, + { + "epoch": 1.4, + "learning_rate": 1.4397161719413114e-05, + "loss": 2.1783, + "step": 6908 + }, + { + "epoch": 1.4, + "learning_rate": 1.4395610269666987e-05, + "loss": 2.0713, + "step": 6909 + }, + { + "epoch": 1.4, + "learning_rate": 1.4394058688769463e-05, + "loss": 2.1454, + "step": 6910 + }, + { + "epoch": 1.4, + "learning_rate": 1.4392506976766844e-05, + "loss": 2.0232, + "step": 6911 + }, + { + "epoch": 1.4, + "learning_rate": 1.439095513370542e-05, + "loss": 2.1512, + "step": 6912 + }, + { + "epoch": 1.4, + "learning_rate": 1.4389403159631499e-05, + "loss": 2.1689, + "step": 6913 + }, + { + "epoch": 1.4, + "learning_rate": 1.4387851054591384e-05, + "loss": 2.1227, + "step": 6914 + }, + { + "epoch": 1.4, + "learning_rate": 1.4386298818631388e-05, + "loss": 2.0776, + "step": 6915 + }, + { + "epoch": 1.4, + "learning_rate": 1.438474645179782e-05, + "loss": 2.163, + "step": 6916 + }, + { + "epoch": 1.4, + "learning_rate": 1.4383193954137e-05, + "loss": 2.0901, + "step": 6917 + }, + { + "epoch": 1.4, + "learning_rate": 1.4381641325695253e-05, + "loss": 2.2023, + "step": 6918 + }, + { + "epoch": 1.4, + "learning_rate": 1.4380088566518902e-05, + "loss": 2.143, + "step": 6919 + }, + { + "epoch": 1.4, + "learning_rate": 1.4378535676654274e-05, + "loss": 2.0903, + "step": 6920 + }, + { + "epoch": 1.4, + "learning_rate": 1.4376982656147707e-05, + "loss": 2.1786, + "step": 6921 + }, + { + "epoch": 1.41, + "learning_rate": 1.4375429505045537e-05, + "loss": 2.1327, + "step": 6922 + }, + { + "epoch": 1.41, + "learning_rate": 1.43738762233941e-05, + "loss": 2.1268, + "step": 6923 + }, + { + "epoch": 1.41, + "learning_rate": 1.4372322811239752e-05, + "loss": 2.1976, + "step": 6924 + }, + { + "epoch": 1.41, + "learning_rate": 1.4370769268628832e-05, + "loss": 2.1601, + "step": 6925 + }, + { + "epoch": 1.41, + "learning_rate": 1.4369215595607697e-05, + "loss": 2.1689, + "step": 6926 + }, + { + "epoch": 1.41, + "learning_rate": 1.4367661792222705e-05, + "loss": 2.1505, + "step": 6927 + }, + { + "epoch": 1.41, + "learning_rate": 1.4366107858520214e-05, + "loss": 2.0925, + "step": 6928 + }, + { + "epoch": 1.41, + "learning_rate": 1.4364553794546589e-05, + "loss": 2.1253, + "step": 6929 + }, + { + "epoch": 1.41, + "learning_rate": 1.4362999600348198e-05, + "loss": 2.1412, + "step": 6930 + }, + { + "epoch": 1.41, + "learning_rate": 1.4361445275971416e-05, + "loss": 2.1949, + "step": 6931 + }, + { + "epoch": 1.41, + "learning_rate": 1.4359890821462617e-05, + "loss": 2.091, + "step": 6932 + }, + { + "epoch": 1.41, + "learning_rate": 1.435833623686818e-05, + "loss": 2.1303, + "step": 6933 + }, + { + "epoch": 1.41, + "learning_rate": 1.4356781522234493e-05, + "loss": 2.139, + "step": 6934 + }, + { + "epoch": 1.41, + "learning_rate": 1.435522667760794e-05, + "loss": 2.0394, + "step": 6935 + }, + { + "epoch": 1.41, + "learning_rate": 1.4353671703034915e-05, + "loss": 2.1347, + "step": 6936 + }, + { + "epoch": 1.41, + "learning_rate": 1.4352116598561814e-05, + "loss": 2.1022, + "step": 6937 + }, + { + "epoch": 1.41, + "learning_rate": 1.4350561364235034e-05, + "loss": 2.1121, + "step": 6938 + }, + { + "epoch": 1.41, + "learning_rate": 1.434900600010098e-05, + "loss": 2.1705, + "step": 6939 + }, + { + "epoch": 1.41, + "learning_rate": 1.434745050620606e-05, + "loss": 2.0976, + "step": 6940 + }, + { + "epoch": 1.41, + "learning_rate": 1.4345894882596682e-05, + "loss": 2.0867, + "step": 6941 + }, + { + "epoch": 1.41, + "learning_rate": 1.4344339129319267e-05, + "loss": 2.1452, + "step": 6942 + }, + { + "epoch": 1.41, + "learning_rate": 1.4342783246420227e-05, + "loss": 2.0493, + "step": 6943 + }, + { + "epoch": 1.41, + "learning_rate": 1.4341227233945988e-05, + "loss": 2.1377, + "step": 6944 + }, + { + "epoch": 1.41, + "learning_rate": 1.4339671091942977e-05, + "loss": 2.1538, + "step": 6945 + }, + { + "epoch": 1.41, + "learning_rate": 1.4338114820457625e-05, + "loss": 2.1037, + "step": 6946 + }, + { + "epoch": 1.41, + "learning_rate": 1.4336558419536363e-05, + "loss": 2.0801, + "step": 6947 + }, + { + "epoch": 1.41, + "learning_rate": 1.433500188922563e-05, + "loss": 2.1642, + "step": 6948 + }, + { + "epoch": 1.41, + "learning_rate": 1.4333445229571874e-05, + "loss": 2.1334, + "step": 6949 + }, + { + "epoch": 1.41, + "learning_rate": 1.4331888440621533e-05, + "loss": 2.1393, + "step": 6950 + }, + { + "epoch": 1.41, + "learning_rate": 1.4330331522421062e-05, + "loss": 2.0873, + "step": 6951 + }, + { + "epoch": 1.41, + "learning_rate": 1.4328774475016908e-05, + "loss": 2.1668, + "step": 6952 + }, + { + "epoch": 1.41, + "learning_rate": 1.4327217298455537e-05, + "loss": 2.154, + "step": 6953 + }, + { + "epoch": 1.41, + "learning_rate": 1.4325659992783404e-05, + "loss": 2.1401, + "step": 6954 + }, + { + "epoch": 1.41, + "learning_rate": 1.4324102558046979e-05, + "loss": 2.1362, + "step": 6955 + }, + { + "epoch": 1.41, + "learning_rate": 1.4322544994292728e-05, + "loss": 2.0946, + "step": 6956 + }, + { + "epoch": 1.41, + "learning_rate": 1.4320987301567124e-05, + "loss": 2.1895, + "step": 6957 + }, + { + "epoch": 1.41, + "learning_rate": 1.4319429479916645e-05, + "loss": 2.087, + "step": 6958 + }, + { + "epoch": 1.41, + "learning_rate": 1.431787152938777e-05, + "loss": 2.1807, + "step": 6959 + }, + { + "epoch": 1.41, + "learning_rate": 1.4316313450026985e-05, + "loss": 2.0684, + "step": 6960 + }, + { + "epoch": 1.41, + "learning_rate": 1.4314755241880777e-05, + "loss": 2.1541, + "step": 6961 + }, + { + "epoch": 1.41, + "learning_rate": 1.4313196904995638e-05, + "loss": 2.0773, + "step": 6962 + }, + { + "epoch": 1.41, + "learning_rate": 1.4311638439418065e-05, + "loss": 2.1814, + "step": 6963 + }, + { + "epoch": 1.41, + "learning_rate": 1.4310079845194558e-05, + "loss": 2.0804, + "step": 6964 + }, + { + "epoch": 1.41, + "learning_rate": 1.4308521122371617e-05, + "loss": 2.0937, + "step": 6965 + }, + { + "epoch": 1.41, + "learning_rate": 1.4306962270995757e-05, + "loss": 2.1692, + "step": 6966 + }, + { + "epoch": 1.41, + "learning_rate": 1.4305403291113483e-05, + "loss": 2.0864, + "step": 6967 + }, + { + "epoch": 1.41, + "learning_rate": 1.430384418277131e-05, + "loss": 2.1643, + "step": 6968 + }, + { + "epoch": 1.41, + "learning_rate": 1.430228494601576e-05, + "loss": 2.0894, + "step": 6969 + }, + { + "epoch": 1.41, + "learning_rate": 1.4300725580893354e-05, + "loss": 2.1257, + "step": 6970 + }, + { + "epoch": 1.42, + "learning_rate": 1.429916608745062e-05, + "loss": 2.0666, + "step": 6971 + }, + { + "epoch": 1.42, + "learning_rate": 1.4297606465734086e-05, + "loss": 2.1882, + "step": 6972 + }, + { + "epoch": 1.42, + "learning_rate": 1.429604671579029e-05, + "loss": 2.1016, + "step": 6973 + }, + { + "epoch": 1.42, + "learning_rate": 1.4294486837665766e-05, + "loss": 2.1616, + "step": 6974 + }, + { + "epoch": 1.42, + "learning_rate": 1.429292683140706e-05, + "loss": 2.1289, + "step": 6975 + }, + { + "epoch": 1.42, + "learning_rate": 1.4291366697060713e-05, + "loss": 2.1721, + "step": 6976 + }, + { + "epoch": 1.42, + "learning_rate": 1.4289806434673281e-05, + "loss": 2.0982, + "step": 6977 + }, + { + "epoch": 1.42, + "learning_rate": 1.428824604429131e-05, + "loss": 2.0719, + "step": 6978 + }, + { + "epoch": 1.42, + "learning_rate": 1.4286685525961365e-05, + "loss": 2.1292, + "step": 6979 + }, + { + "epoch": 1.42, + "learning_rate": 1.4285124879729998e-05, + "loss": 2.1451, + "step": 6980 + }, + { + "epoch": 1.42, + "learning_rate": 1.4283564105643784e-05, + "loss": 2.1849, + "step": 6981 + }, + { + "epoch": 1.42, + "learning_rate": 1.4282003203749282e-05, + "loss": 2.2117, + "step": 6982 + }, + { + "epoch": 1.42, + "learning_rate": 1.4280442174093072e-05, + "loss": 2.0674, + "step": 6983 + }, + { + "epoch": 1.42, + "learning_rate": 1.4278881016721726e-05, + "loss": 2.1031, + "step": 6984 + }, + { + "epoch": 1.42, + "learning_rate": 1.4277319731681825e-05, + "loss": 2.0351, + "step": 6985 + }, + { + "epoch": 1.42, + "learning_rate": 1.4275758319019955e-05, + "loss": 2.186, + "step": 6986 + }, + { + "epoch": 1.42, + "learning_rate": 1.4274196778782699e-05, + "loss": 2.1344, + "step": 6987 + }, + { + "epoch": 1.42, + "learning_rate": 1.4272635111016655e-05, + "loss": 2.1212, + "step": 6988 + }, + { + "epoch": 1.42, + "learning_rate": 1.4271073315768413e-05, + "loss": 2.1409, + "step": 6989 + }, + { + "epoch": 1.42, + "learning_rate": 1.4269511393084572e-05, + "loss": 2.1371, + "step": 6990 + }, + { + "epoch": 1.42, + "learning_rate": 1.426794934301174e-05, + "loss": 2.0187, + "step": 6991 + }, + { + "epoch": 1.42, + "learning_rate": 1.4266387165596517e-05, + "loss": 2.137, + "step": 6992 + }, + { + "epoch": 1.42, + "learning_rate": 1.4264824860885522e-05, + "loss": 2.1459, + "step": 6993 + }, + { + "epoch": 1.42, + "learning_rate": 1.4263262428925362e-05, + "loss": 2.0764, + "step": 6994 + }, + { + "epoch": 1.42, + "learning_rate": 1.4261699869762655e-05, + "loss": 2.1452, + "step": 6995 + }, + { + "epoch": 1.42, + "learning_rate": 1.4260137183444026e-05, + "loss": 2.2558, + "step": 6996 + }, + { + "epoch": 1.42, + "learning_rate": 1.4258574370016106e-05, + "loss": 2.1172, + "step": 6997 + }, + { + "epoch": 1.42, + "learning_rate": 1.4257011429525514e-05, + "loss": 2.1231, + "step": 6998 + }, + { + "epoch": 1.42, + "learning_rate": 1.4255448362018889e-05, + "loss": 2.1923, + "step": 6999 + }, + { + "epoch": 1.42, + "learning_rate": 1.4253885167542864e-05, + "loss": 2.1817, + "step": 7000 + }, + { + "epoch": 1.42, + "learning_rate": 1.4252321846144088e-05, + "loss": 2.1564, + "step": 7001 + }, + { + "epoch": 1.42, + "learning_rate": 1.4250758397869198e-05, + "loss": 2.1263, + "step": 7002 + }, + { + "epoch": 1.42, + "learning_rate": 1.4249194822764846e-05, + "loss": 2.1417, + "step": 7003 + }, + { + "epoch": 1.42, + "learning_rate": 1.4247631120877685e-05, + "loss": 2.1175, + "step": 7004 + }, + { + "epoch": 1.42, + "learning_rate": 1.4246067292254367e-05, + "loss": 2.109, + "step": 7005 + }, + { + "epoch": 1.42, + "learning_rate": 1.4244503336941555e-05, + "loss": 2.091, + "step": 7006 + }, + { + "epoch": 1.42, + "learning_rate": 1.4242939254985913e-05, + "loss": 2.118, + "step": 7007 + }, + { + "epoch": 1.42, + "learning_rate": 1.4241375046434104e-05, + "loss": 2.1598, + "step": 7008 + }, + { + "epoch": 1.42, + "learning_rate": 1.4239810711332807e-05, + "loss": 2.1741, + "step": 7009 + }, + { + "epoch": 1.42, + "learning_rate": 1.4238246249728687e-05, + "loss": 2.1666, + "step": 7010 + }, + { + "epoch": 1.42, + "learning_rate": 1.4236681661668432e-05, + "loss": 2.0829, + "step": 7011 + }, + { + "epoch": 1.42, + "learning_rate": 1.4235116947198717e-05, + "loss": 2.128, + "step": 7012 + }, + { + "epoch": 1.42, + "learning_rate": 1.4233552106366233e-05, + "loss": 2.1032, + "step": 7013 + }, + { + "epoch": 1.42, + "learning_rate": 1.423198713921767e-05, + "loss": 2.0966, + "step": 7014 + }, + { + "epoch": 1.42, + "learning_rate": 1.4230422045799719e-05, + "loss": 2.2293, + "step": 7015 + }, + { + "epoch": 1.42, + "learning_rate": 1.4228856826159079e-05, + "loss": 2.1641, + "step": 7016 + }, + { + "epoch": 1.42, + "learning_rate": 1.4227291480342451e-05, + "loss": 2.2073, + "step": 7017 + }, + { + "epoch": 1.42, + "learning_rate": 1.422572600839654e-05, + "loss": 2.1067, + "step": 7018 + }, + { + "epoch": 1.42, + "learning_rate": 1.4224160410368055e-05, + "loss": 2.0811, + "step": 7019 + }, + { + "epoch": 1.42, + "learning_rate": 1.4222594686303711e-05, + "loss": 2.1345, + "step": 7020 + }, + { + "epoch": 1.43, + "learning_rate": 1.422102883625022e-05, + "loss": 2.1228, + "step": 7021 + }, + { + "epoch": 1.43, + "learning_rate": 1.4219462860254303e-05, + "loss": 2.1207, + "step": 7022 + }, + { + "epoch": 1.43, + "learning_rate": 1.4217896758362686e-05, + "loss": 2.072, + "step": 7023 + }, + { + "epoch": 1.43, + "learning_rate": 1.4216330530622096e-05, + "loss": 2.0466, + "step": 7024 + }, + { + "epoch": 1.43, + "learning_rate": 1.4214764177079266e-05, + "loss": 2.1156, + "step": 7025 + }, + { + "epoch": 1.43, + "learning_rate": 1.4213197697780927e-05, + "loss": 2.1279, + "step": 7026 + }, + { + "epoch": 1.43, + "learning_rate": 1.4211631092773819e-05, + "loss": 2.078, + "step": 7027 + }, + { + "epoch": 1.43, + "learning_rate": 1.4210064362104689e-05, + "loss": 2.1829, + "step": 7028 + }, + { + "epoch": 1.43, + "learning_rate": 1.4208497505820278e-05, + "loss": 2.1136, + "step": 7029 + }, + { + "epoch": 1.43, + "learning_rate": 1.4206930523967337e-05, + "loss": 2.0552, + "step": 7030 + }, + { + "epoch": 1.43, + "learning_rate": 1.4205363416592626e-05, + "loss": 2.1181, + "step": 7031 + }, + { + "epoch": 1.43, + "learning_rate": 1.4203796183742893e-05, + "loss": 2.1025, + "step": 7032 + }, + { + "epoch": 1.43, + "learning_rate": 1.4202228825464906e-05, + "loss": 2.0823, + "step": 7033 + }, + { + "epoch": 1.43, + "learning_rate": 1.4200661341805426e-05, + "loss": 2.1459, + "step": 7034 + }, + { + "epoch": 1.43, + "learning_rate": 1.4199093732811227e-05, + "loss": 2.1097, + "step": 7035 + }, + { + "epoch": 1.43, + "learning_rate": 1.419752599852908e-05, + "loss": 2.1545, + "step": 7036 + }, + { + "epoch": 1.43, + "learning_rate": 1.4195958139005756e-05, + "loss": 2.162, + "step": 7037 + }, + { + "epoch": 1.43, + "learning_rate": 1.4194390154288043e-05, + "loss": 2.1164, + "step": 7038 + }, + { + "epoch": 1.43, + "learning_rate": 1.4192822044422717e-05, + "loss": 2.1666, + "step": 7039 + }, + { + "epoch": 1.43, + "learning_rate": 1.419125380945657e-05, + "loss": 2.1864, + "step": 7040 + }, + { + "epoch": 1.43, + "learning_rate": 1.4189685449436395e-05, + "loss": 2.1382, + "step": 7041 + }, + { + "epoch": 1.43, + "learning_rate": 1.4188116964408984e-05, + "loss": 2.0979, + "step": 7042 + }, + { + "epoch": 1.43, + "learning_rate": 1.4186548354421135e-05, + "loss": 2.1435, + "step": 7043 + }, + { + "epoch": 1.43, + "learning_rate": 1.4184979619519655e-05, + "loss": 2.1293, + "step": 7044 + }, + { + "epoch": 1.43, + "learning_rate": 1.4183410759751343e-05, + "loss": 2.0952, + "step": 7045 + }, + { + "epoch": 1.43, + "learning_rate": 1.4181841775163017e-05, + "loss": 2.1171, + "step": 7046 + }, + { + "epoch": 1.43, + "learning_rate": 1.4180272665801484e-05, + "loss": 2.1793, + "step": 7047 + }, + { + "epoch": 1.43, + "learning_rate": 1.4178703431713566e-05, + "loss": 2.179, + "step": 7048 + }, + { + "epoch": 1.43, + "learning_rate": 1.417713407294608e-05, + "loss": 2.1134, + "step": 7049 + }, + { + "epoch": 1.43, + "learning_rate": 1.4175564589545853e-05, + "loss": 2.1218, + "step": 7050 + }, + { + "epoch": 1.43, + "learning_rate": 1.4173994981559718e-05, + "loss": 2.0571, + "step": 7051 + }, + { + "epoch": 1.43, + "learning_rate": 1.4172425249034498e-05, + "loss": 2.2032, + "step": 7052 + }, + { + "epoch": 1.43, + "learning_rate": 1.4170855392017035e-05, + "loss": 2.2226, + "step": 7053 + }, + { + "epoch": 1.43, + "learning_rate": 1.4169285410554165e-05, + "loss": 2.1547, + "step": 7054 + }, + { + "epoch": 1.43, + "learning_rate": 1.4167715304692738e-05, + "loss": 2.1763, + "step": 7055 + }, + { + "epoch": 1.43, + "learning_rate": 1.4166145074479596e-05, + "loss": 2.1722, + "step": 7056 + }, + { + "epoch": 1.43, + "learning_rate": 1.4164574719961588e-05, + "loss": 2.1179, + "step": 7057 + }, + { + "epoch": 1.43, + "learning_rate": 1.4163004241185573e-05, + "loss": 2.1961, + "step": 7058 + }, + { + "epoch": 1.43, + "learning_rate": 1.4161433638198407e-05, + "loss": 2.0682, + "step": 7059 + }, + { + "epoch": 1.43, + "learning_rate": 1.4159862911046953e-05, + "loss": 2.0904, + "step": 7060 + }, + { + "epoch": 1.43, + "learning_rate": 1.4158292059778074e-05, + "loss": 2.0978, + "step": 7061 + }, + { + "epoch": 1.43, + "learning_rate": 1.4156721084438643e-05, + "loss": 2.0744, + "step": 7062 + }, + { + "epoch": 1.43, + "learning_rate": 1.415514998507553e-05, + "loss": 2.092, + "step": 7063 + }, + { + "epoch": 1.43, + "learning_rate": 1.4153578761735614e-05, + "loss": 2.1982, + "step": 7064 + }, + { + "epoch": 1.43, + "learning_rate": 1.4152007414465771e-05, + "loss": 2.1619, + "step": 7065 + }, + { + "epoch": 1.43, + "learning_rate": 1.4150435943312895e-05, + "loss": 2.1068, + "step": 7066 + }, + { + "epoch": 1.43, + "learning_rate": 1.4148864348323862e-05, + "loss": 2.1777, + "step": 7067 + }, + { + "epoch": 1.43, + "learning_rate": 1.4147292629545573e-05, + "loss": 2.0839, + "step": 7068 + }, + { + "epoch": 1.43, + "learning_rate": 1.4145720787024916e-05, + "loss": 2.1214, + "step": 7069 + }, + { + "epoch": 1.44, + "learning_rate": 1.4144148820808794e-05, + "loss": 2.056, + "step": 7070 + }, + { + "epoch": 1.44, + "learning_rate": 1.4142576730944107e-05, + "loss": 2.16, + "step": 7071 + }, + { + "epoch": 1.44, + "learning_rate": 1.4141004517477766e-05, + "loss": 2.1854, + "step": 7072 + }, + { + "epoch": 1.44, + "learning_rate": 1.4139432180456678e-05, + "loss": 2.1781, + "step": 7073 + }, + { + "epoch": 1.44, + "learning_rate": 1.4137859719927754e-05, + "loss": 2.0953, + "step": 7074 + }, + { + "epoch": 1.44, + "learning_rate": 1.4136287135937915e-05, + "loss": 2.1075, + "step": 7075 + }, + { + "epoch": 1.44, + "learning_rate": 1.4134714428534082e-05, + "loss": 2.1029, + "step": 7076 + }, + { + "epoch": 1.44, + "learning_rate": 1.4133141597763178e-05, + "loss": 2.1137, + "step": 7077 + }, + { + "epoch": 1.44, + "learning_rate": 1.4131568643672134e-05, + "loss": 2.101, + "step": 7078 + }, + { + "epoch": 1.44, + "learning_rate": 1.4129995566307877e-05, + "loss": 2.1927, + "step": 7079 + }, + { + "epoch": 1.44, + "learning_rate": 1.4128422365717346e-05, + "loss": 2.1556, + "step": 7080 + }, + { + "epoch": 1.44, + "learning_rate": 1.4126849041947481e-05, + "loss": 2.1861, + "step": 7081 + }, + { + "epoch": 1.44, + "learning_rate": 1.4125275595045226e-05, + "loss": 2.1155, + "step": 7082 + }, + { + "epoch": 1.44, + "learning_rate": 1.4123702025057527e-05, + "loss": 2.1195, + "step": 7083 + }, + { + "epoch": 1.44, + "learning_rate": 1.4122128332031331e-05, + "loss": 2.0868, + "step": 7084 + }, + { + "epoch": 1.44, + "learning_rate": 1.4120554516013595e-05, + "loss": 2.1614, + "step": 7085 + }, + { + "epoch": 1.44, + "learning_rate": 1.411898057705128e-05, + "loss": 2.1811, + "step": 7086 + }, + { + "epoch": 1.44, + "learning_rate": 1.4117406515191339e-05, + "loss": 2.1262, + "step": 7087 + }, + { + "epoch": 1.44, + "learning_rate": 1.4115832330480747e-05, + "loss": 2.1581, + "step": 7088 + }, + { + "epoch": 1.44, + "learning_rate": 1.4114258022966465e-05, + "loss": 2.1609, + "step": 7089 + }, + { + "epoch": 1.44, + "learning_rate": 1.411268359269547e-05, + "loss": 2.1015, + "step": 7090 + }, + { + "epoch": 1.44, + "learning_rate": 1.4111109039714736e-05, + "loss": 2.0509, + "step": 7091 + }, + { + "epoch": 1.44, + "learning_rate": 1.4109534364071245e-05, + "loss": 2.1656, + "step": 7092 + }, + { + "epoch": 1.44, + "learning_rate": 1.4107959565811977e-05, + "loss": 2.1662, + "step": 7093 + }, + { + "epoch": 1.44, + "learning_rate": 1.410638464498392e-05, + "loss": 2.1469, + "step": 7094 + }, + { + "epoch": 1.44, + "learning_rate": 1.4104809601634069e-05, + "loss": 2.0513, + "step": 7095 + }, + { + "epoch": 1.44, + "learning_rate": 1.4103234435809409e-05, + "loss": 1.9734, + "step": 7096 + }, + { + "epoch": 1.44, + "learning_rate": 1.4101659147556951e-05, + "loss": 2.1254, + "step": 7097 + }, + { + "epoch": 1.44, + "learning_rate": 1.4100083736923688e-05, + "loss": 2.1476, + "step": 7098 + }, + { + "epoch": 1.44, + "learning_rate": 1.4098508203956627e-05, + "loss": 2.208, + "step": 7099 + }, + { + "epoch": 1.44, + "learning_rate": 1.4096932548702778e-05, + "loss": 2.1056, + "step": 7100 + }, + { + "epoch": 1.44, + "learning_rate": 1.409535677120915e-05, + "loss": 2.1203, + "step": 7101 + }, + { + "epoch": 1.44, + "learning_rate": 1.4093780871522764e-05, + "loss": 2.1016, + "step": 7102 + }, + { + "epoch": 1.44, + "learning_rate": 1.4092204849690636e-05, + "loss": 2.0894, + "step": 7103 + }, + { + "epoch": 1.44, + "learning_rate": 1.4090628705759797e-05, + "loss": 2.0908, + "step": 7104 + }, + { + "epoch": 1.44, + "learning_rate": 1.4089052439777264e-05, + "loss": 2.0782, + "step": 7105 + }, + { + "epoch": 1.44, + "learning_rate": 1.4087476051790076e-05, + "loss": 2.1445, + "step": 7106 + }, + { + "epoch": 1.44, + "learning_rate": 1.4085899541845263e-05, + "loss": 2.1261, + "step": 7107 + }, + { + "epoch": 1.44, + "learning_rate": 1.4084322909989867e-05, + "loss": 2.1439, + "step": 7108 + }, + { + "epoch": 1.44, + "learning_rate": 1.4082746156270925e-05, + "loss": 2.1287, + "step": 7109 + }, + { + "epoch": 1.44, + "learning_rate": 1.4081169280735488e-05, + "loss": 2.1149, + "step": 7110 + }, + { + "epoch": 1.44, + "learning_rate": 1.4079592283430598e-05, + "loss": 2.1681, + "step": 7111 + }, + { + "epoch": 1.44, + "learning_rate": 1.4078015164403316e-05, + "loss": 2.2214, + "step": 7112 + }, + { + "epoch": 1.44, + "learning_rate": 1.407643792370069e-05, + "loss": 2.1959, + "step": 7113 + }, + { + "epoch": 1.44, + "learning_rate": 1.4074860561369787e-05, + "loss": 2.1042, + "step": 7114 + }, + { + "epoch": 1.44, + "learning_rate": 1.4073283077457666e-05, + "loss": 2.1688, + "step": 7115 + }, + { + "epoch": 1.44, + "learning_rate": 1.4071705472011397e-05, + "loss": 2.1411, + "step": 7116 + }, + { + "epoch": 1.44, + "learning_rate": 1.4070127745078051e-05, + "loss": 2.1905, + "step": 7117 + }, + { + "epoch": 1.44, + "learning_rate": 1.4068549896704697e-05, + "loss": 2.1702, + "step": 7118 + }, + { + "epoch": 1.45, + "learning_rate": 1.4066971926938422e-05, + "loss": 2.1251, + "step": 7119 + }, + { + "epoch": 1.45, + "learning_rate": 1.4065393835826302e-05, + "loss": 2.1348, + "step": 7120 + }, + { + "epoch": 1.45, + "learning_rate": 1.4063815623415423e-05, + "loss": 2.1062, + "step": 7121 + }, + { + "epoch": 1.45, + "learning_rate": 1.4062237289752877e-05, + "loss": 2.1784, + "step": 7122 + }, + { + "epoch": 1.45, + "learning_rate": 1.4060658834885747e-05, + "loss": 2.0734, + "step": 7123 + }, + { + "epoch": 1.45, + "learning_rate": 1.4059080258861142e-05, + "loss": 2.1651, + "step": 7124 + }, + { + "epoch": 1.45, + "learning_rate": 1.4057501561726152e-05, + "loss": 2.1081, + "step": 7125 + }, + { + "epoch": 1.45, + "learning_rate": 1.405592274352789e-05, + "loss": 2.1069, + "step": 7126 + }, + { + "epoch": 1.45, + "learning_rate": 1.4054343804313453e-05, + "loss": 2.0689, + "step": 7127 + }, + { + "epoch": 1.45, + "learning_rate": 1.4052764744129959e-05, + "loss": 2.1927, + "step": 7128 + }, + { + "epoch": 1.45, + "learning_rate": 1.4051185563024517e-05, + "loss": 2.1347, + "step": 7129 + }, + { + "epoch": 1.45, + "learning_rate": 1.4049606261044248e-05, + "loss": 2.2162, + "step": 7130 + }, + { + "epoch": 1.45, + "learning_rate": 1.4048026838236272e-05, + "loss": 2.1152, + "step": 7131 + }, + { + "epoch": 1.45, + "learning_rate": 1.4046447294647717e-05, + "loss": 2.1345, + "step": 7132 + }, + { + "epoch": 1.45, + "learning_rate": 1.4044867630325708e-05, + "loss": 2.1072, + "step": 7133 + }, + { + "epoch": 1.45, + "learning_rate": 1.4043287845317381e-05, + "loss": 2.0228, + "step": 7134 + }, + { + "epoch": 1.45, + "learning_rate": 1.4041707939669868e-05, + "loss": 2.1423, + "step": 7135 + }, + { + "epoch": 1.45, + "learning_rate": 1.404012791343031e-05, + "loss": 2.1518, + "step": 7136 + }, + { + "epoch": 1.45, + "learning_rate": 1.4038547766645851e-05, + "loss": 2.0475, + "step": 7137 + }, + { + "epoch": 1.45, + "learning_rate": 1.4036967499363638e-05, + "loss": 2.089, + "step": 7138 + }, + { + "epoch": 1.45, + "learning_rate": 1.403538711163082e-05, + "loss": 2.1574, + "step": 7139 + }, + { + "epoch": 1.45, + "learning_rate": 1.403380660349455e-05, + "loss": 2.0684, + "step": 7140 + }, + { + "epoch": 1.45, + "learning_rate": 1.4032225975001988e-05, + "loss": 2.1604, + "step": 7141 + }, + { + "epoch": 1.45, + "learning_rate": 1.4030645226200293e-05, + "loss": 2.1529, + "step": 7142 + }, + { + "epoch": 1.45, + "learning_rate": 1.4029064357136628e-05, + "loss": 2.1148, + "step": 7143 + }, + { + "epoch": 1.45, + "learning_rate": 1.4027483367858165e-05, + "loss": 2.1597, + "step": 7144 + }, + { + "epoch": 1.45, + "learning_rate": 1.4025902258412076e-05, + "loss": 2.2058, + "step": 7145 + }, + { + "epoch": 1.45, + "learning_rate": 1.4024321028845534e-05, + "loss": 2.1665, + "step": 7146 + }, + { + "epoch": 1.45, + "learning_rate": 1.4022739679205719e-05, + "loss": 2.1033, + "step": 7147 + }, + { + "epoch": 1.45, + "learning_rate": 1.402115820953981e-05, + "loss": 2.0724, + "step": 7148 + }, + { + "epoch": 1.45, + "learning_rate": 1.4019576619895e-05, + "loss": 2.1374, + "step": 7149 + }, + { + "epoch": 1.45, + "learning_rate": 1.4017994910318476e-05, + "loss": 2.1417, + "step": 7150 + }, + { + "epoch": 1.45, + "learning_rate": 1.4016413080857427e-05, + "loss": 2.1197, + "step": 7151 + }, + { + "epoch": 1.45, + "learning_rate": 1.4014831131559057e-05, + "loss": 2.0526, + "step": 7152 + }, + { + "epoch": 1.45, + "learning_rate": 1.4013249062470563e-05, + "loss": 2.1123, + "step": 7153 + }, + { + "epoch": 1.45, + "learning_rate": 1.4011666873639147e-05, + "loss": 2.1655, + "step": 7154 + }, + { + "epoch": 1.45, + "learning_rate": 1.4010084565112018e-05, + "loss": 2.1614, + "step": 7155 + }, + { + "epoch": 1.45, + "learning_rate": 1.400850213693639e-05, + "loss": 2.1917, + "step": 7156 + }, + { + "epoch": 1.45, + "learning_rate": 1.4006919589159475e-05, + "loss": 2.1446, + "step": 7157 + }, + { + "epoch": 1.45, + "learning_rate": 1.4005336921828489e-05, + "loss": 2.168, + "step": 7158 + }, + { + "epoch": 1.45, + "learning_rate": 1.4003754134990662e-05, + "loss": 2.0987, + "step": 7159 + }, + { + "epoch": 1.45, + "learning_rate": 1.4002171228693209e-05, + "loss": 2.0878, + "step": 7160 + }, + { + "epoch": 1.45, + "learning_rate": 1.4000588202983367e-05, + "loss": 2.0454, + "step": 7161 + }, + { + "epoch": 1.45, + "learning_rate": 1.3999005057908368e-05, + "loss": 2.1412, + "step": 7162 + }, + { + "epoch": 1.45, + "learning_rate": 1.3997421793515442e-05, + "loss": 2.0986, + "step": 7163 + }, + { + "epoch": 1.45, + "learning_rate": 1.3995838409851835e-05, + "loss": 2.0885, + "step": 7164 + }, + { + "epoch": 1.45, + "learning_rate": 1.3994254906964785e-05, + "loss": 2.1088, + "step": 7165 + }, + { + "epoch": 1.45, + "learning_rate": 1.3992671284901544e-05, + "loss": 2.1038, + "step": 7166 + }, + { + "epoch": 1.45, + "learning_rate": 1.3991087543709361e-05, + "loss": 2.1277, + "step": 7167 + }, + { + "epoch": 1.45, + "learning_rate": 1.3989503683435486e-05, + "loss": 2.1984, + "step": 7168 + }, + { + "epoch": 1.46, + "learning_rate": 1.3987919704127182e-05, + "loss": 2.2055, + "step": 7169 + }, + { + "epoch": 1.46, + "learning_rate": 1.3986335605831707e-05, + "loss": 2.2048, + "step": 7170 + }, + { + "epoch": 1.46, + "learning_rate": 1.3984751388596327e-05, + "loss": 2.1638, + "step": 7171 + }, + { + "epoch": 1.46, + "learning_rate": 1.3983167052468307e-05, + "loss": 2.1651, + "step": 7172 + }, + { + "epoch": 1.46, + "learning_rate": 1.3981582597494923e-05, + "loss": 2.1152, + "step": 7173 + }, + { + "epoch": 1.46, + "learning_rate": 1.3979998023723448e-05, + "loss": 2.1745, + "step": 7174 + }, + { + "epoch": 1.46, + "learning_rate": 1.3978413331201157e-05, + "loss": 2.1762, + "step": 7175 + }, + { + "epoch": 1.46, + "learning_rate": 1.397682851997534e-05, + "loss": 2.1378, + "step": 7176 + }, + { + "epoch": 1.46, + "learning_rate": 1.3975243590093279e-05, + "loss": 2.1261, + "step": 7177 + }, + { + "epoch": 1.46, + "learning_rate": 1.3973658541602263e-05, + "loss": 2.0568, + "step": 7178 + }, + { + "epoch": 1.46, + "learning_rate": 1.3972073374549586e-05, + "loss": 2.0864, + "step": 7179 + }, + { + "epoch": 1.46, + "learning_rate": 1.3970488088982544e-05, + "loss": 2.1262, + "step": 7180 + }, + { + "epoch": 1.46, + "learning_rate": 1.3968902684948437e-05, + "loss": 2.1151, + "step": 7181 + }, + { + "epoch": 1.46, + "learning_rate": 1.396731716249457e-05, + "loss": 2.0907, + "step": 7182 + }, + { + "epoch": 1.46, + "learning_rate": 1.3965731521668245e-05, + "loss": 2.1635, + "step": 7183 + }, + { + "epoch": 1.46, + "learning_rate": 1.396414576251678e-05, + "loss": 2.1322, + "step": 7184 + }, + { + "epoch": 1.46, + "learning_rate": 1.3962559885087482e-05, + "loss": 2.131, + "step": 7185 + }, + { + "epoch": 1.46, + "learning_rate": 1.3960973889427673e-05, + "loss": 2.1709, + "step": 7186 + }, + { + "epoch": 1.46, + "learning_rate": 1.3959387775584675e-05, + "loss": 2.0184, + "step": 7187 + }, + { + "epoch": 1.46, + "learning_rate": 1.3957801543605806e-05, + "loss": 2.0757, + "step": 7188 + }, + { + "epoch": 1.46, + "learning_rate": 1.3956215193538405e-05, + "loss": 2.1346, + "step": 7189 + }, + { + "epoch": 1.46, + "learning_rate": 1.3954628725429796e-05, + "loss": 2.1636, + "step": 7190 + }, + { + "epoch": 1.46, + "learning_rate": 1.3953042139327314e-05, + "loss": 2.1093, + "step": 7191 + }, + { + "epoch": 1.46, + "learning_rate": 1.3951455435278302e-05, + "loss": 2.1386, + "step": 7192 + }, + { + "epoch": 1.46, + "learning_rate": 1.3949868613330102e-05, + "loss": 2.2479, + "step": 7193 + }, + { + "epoch": 1.46, + "learning_rate": 1.3948281673530055e-05, + "loss": 2.1245, + "step": 7194 + }, + { + "epoch": 1.46, + "learning_rate": 1.3946694615925515e-05, + "loss": 2.0949, + "step": 7195 + }, + { + "epoch": 1.46, + "learning_rate": 1.3945107440563832e-05, + "loss": 2.0834, + "step": 7196 + }, + { + "epoch": 1.46, + "learning_rate": 1.3943520147492366e-05, + "loss": 2.1433, + "step": 7197 + }, + { + "epoch": 1.46, + "learning_rate": 1.3941932736758475e-05, + "loss": 2.0796, + "step": 7198 + }, + { + "epoch": 1.46, + "learning_rate": 1.3940345208409523e-05, + "loss": 2.1721, + "step": 7199 + }, + { + "epoch": 1.46, + "learning_rate": 1.3938757562492873e-05, + "loss": 2.1431, + "step": 7200 + }, + { + "epoch": 1.46, + "learning_rate": 1.3937169799055901e-05, + "loss": 2.0896, + "step": 7201 + }, + { + "epoch": 1.46, + "learning_rate": 1.3935581918145977e-05, + "loss": 2.0891, + "step": 7202 + }, + { + "epoch": 1.46, + "learning_rate": 1.393399391981048e-05, + "loss": 2.129, + "step": 7203 + }, + { + "epoch": 1.46, + "learning_rate": 1.3932405804096794e-05, + "loss": 2.0868, + "step": 7204 + }, + { + "epoch": 1.46, + "learning_rate": 1.3930817571052295e-05, + "loss": 2.0367, + "step": 7205 + }, + { + "epoch": 1.46, + "learning_rate": 1.392922922072438e-05, + "loss": 2.1381, + "step": 7206 + }, + { + "epoch": 1.46, + "learning_rate": 1.3927640753160436e-05, + "loss": 2.1854, + "step": 7207 + }, + { + "epoch": 1.46, + "learning_rate": 1.392605216840786e-05, + "loss": 2.1238, + "step": 7208 + }, + { + "epoch": 1.46, + "learning_rate": 1.3924463466514049e-05, + "loss": 2.1099, + "step": 7209 + }, + { + "epoch": 1.46, + "learning_rate": 1.3922874647526402e-05, + "loss": 2.2004, + "step": 7210 + }, + { + "epoch": 1.46, + "learning_rate": 1.3921285711492332e-05, + "loss": 2.1204, + "step": 7211 + }, + { + "epoch": 1.46, + "learning_rate": 1.3919696658459238e-05, + "loss": 2.1937, + "step": 7212 + }, + { + "epoch": 1.46, + "learning_rate": 1.3918107488474543e-05, + "loss": 2.1339, + "step": 7213 + }, + { + "epoch": 1.46, + "learning_rate": 1.3916518201585656e-05, + "loss": 2.1602, + "step": 7214 + }, + { + "epoch": 1.46, + "learning_rate": 1.3914928797839997e-05, + "loss": 2.1472, + "step": 7215 + }, + { + "epoch": 1.46, + "learning_rate": 1.391333927728499e-05, + "loss": 2.0974, + "step": 7216 + }, + { + "epoch": 1.46, + "learning_rate": 1.3911749639968064e-05, + "loss": 2.0688, + "step": 7217 + }, + { + "epoch": 1.47, + "learning_rate": 1.3910159885936644e-05, + "loss": 2.171, + "step": 7218 + }, + { + "epoch": 1.47, + "learning_rate": 1.3908570015238167e-05, + "loss": 2.1707, + "step": 7219 + }, + { + "epoch": 1.47, + "learning_rate": 1.3906980027920067e-05, + "loss": 2.0718, + "step": 7220 + }, + { + "epoch": 1.47, + "learning_rate": 1.3905389924029788e-05, + "loss": 2.1322, + "step": 7221 + }, + { + "epoch": 1.47, + "learning_rate": 1.3903799703614768e-05, + "loss": 2.0773, + "step": 7222 + }, + { + "epoch": 1.47, + "learning_rate": 1.3902209366722464e-05, + "loss": 2.1266, + "step": 7223 + }, + { + "epoch": 1.47, + "learning_rate": 1.3900618913400316e-05, + "loss": 2.1435, + "step": 7224 + }, + { + "epoch": 1.47, + "learning_rate": 1.3899028343695782e-05, + "loss": 2.1117, + "step": 7225 + }, + { + "epoch": 1.47, + "learning_rate": 1.3897437657656322e-05, + "loss": 2.0557, + "step": 7226 + }, + { + "epoch": 1.47, + "learning_rate": 1.3895846855329395e-05, + "loss": 2.0775, + "step": 7227 + }, + { + "epoch": 1.47, + "learning_rate": 1.3894255936762468e-05, + "loss": 2.0904, + "step": 7228 + }, + { + "epoch": 1.47, + "learning_rate": 1.3892664902003005e-05, + "loss": 2.1891, + "step": 7229 + }, + { + "epoch": 1.47, + "learning_rate": 1.3891073751098481e-05, + "loss": 2.1369, + "step": 7230 + }, + { + "epoch": 1.47, + "learning_rate": 1.388948248409637e-05, + "loss": 2.0807, + "step": 7231 + }, + { + "epoch": 1.47, + "learning_rate": 1.388789110104415e-05, + "loss": 2.1603, + "step": 7232 + }, + { + "epoch": 1.47, + "learning_rate": 1.38862996019893e-05, + "loss": 2.1695, + "step": 7233 + }, + { + "epoch": 1.47, + "learning_rate": 1.3884707986979313e-05, + "loss": 2.1528, + "step": 7234 + }, + { + "epoch": 1.47, + "learning_rate": 1.3883116256061673e-05, + "loss": 2.1499, + "step": 7235 + }, + { + "epoch": 1.47, + "learning_rate": 1.388152440928387e-05, + "loss": 2.184, + "step": 7236 + }, + { + "epoch": 1.47, + "learning_rate": 1.3879932446693407e-05, + "loss": 2.1238, + "step": 7237 + }, + { + "epoch": 1.47, + "learning_rate": 1.3878340368337773e-05, + "loss": 2.1711, + "step": 7238 + }, + { + "epoch": 1.47, + "learning_rate": 1.3876748174264481e-05, + "loss": 2.2207, + "step": 7239 + }, + { + "epoch": 1.47, + "learning_rate": 1.3875155864521031e-05, + "loss": 2.1152, + "step": 7240 + }, + { + "epoch": 1.47, + "learning_rate": 1.3873563439154936e-05, + "loss": 2.048, + "step": 7241 + }, + { + "epoch": 1.47, + "learning_rate": 1.3871970898213702e-05, + "loss": 2.1233, + "step": 7242 + }, + { + "epoch": 1.47, + "learning_rate": 1.3870378241744857e-05, + "loss": 2.1182, + "step": 7243 + }, + { + "epoch": 1.47, + "learning_rate": 1.3868785469795913e-05, + "loss": 2.0327, + "step": 7244 + }, + { + "epoch": 1.47, + "learning_rate": 1.3867192582414393e-05, + "loss": 2.1905, + "step": 7245 + }, + { + "epoch": 1.47, + "learning_rate": 1.3865599579647828e-05, + "loss": 2.0652, + "step": 7246 + }, + { + "epoch": 1.47, + "learning_rate": 1.3864006461543745e-05, + "loss": 2.1494, + "step": 7247 + }, + { + "epoch": 1.47, + "learning_rate": 1.3862413228149678e-05, + "loss": 2.1828, + "step": 7248 + }, + { + "epoch": 1.47, + "learning_rate": 1.3860819879513166e-05, + "loss": 2.1225, + "step": 7249 + }, + { + "epoch": 1.47, + "learning_rate": 1.385922641568175e-05, + "loss": 2.1171, + "step": 7250 + }, + { + "epoch": 1.47, + "learning_rate": 1.3857632836702972e-05, + "loss": 2.1528, + "step": 7251 + }, + { + "epoch": 1.47, + "learning_rate": 1.3856039142624378e-05, + "loss": 2.0672, + "step": 7252 + }, + { + "epoch": 1.47, + "learning_rate": 1.3854445333493524e-05, + "loss": 2.0806, + "step": 7253 + }, + { + "epoch": 1.47, + "learning_rate": 1.3852851409357959e-05, + "loss": 2.1293, + "step": 7254 + }, + { + "epoch": 1.47, + "learning_rate": 1.3851257370265243e-05, + "loss": 2.1837, + "step": 7255 + }, + { + "epoch": 1.47, + "learning_rate": 1.3849663216262939e-05, + "loss": 2.1833, + "step": 7256 + }, + { + "epoch": 1.47, + "learning_rate": 1.384806894739861e-05, + "loss": 2.1614, + "step": 7257 + }, + { + "epoch": 1.47, + "learning_rate": 1.3846474563719821e-05, + "loss": 2.0993, + "step": 7258 + }, + { + "epoch": 1.47, + "learning_rate": 1.3844880065274148e-05, + "loss": 2.1976, + "step": 7259 + }, + { + "epoch": 1.47, + "learning_rate": 1.3843285452109166e-05, + "loss": 2.1141, + "step": 7260 + }, + { + "epoch": 1.47, + "learning_rate": 1.384169072427245e-05, + "loss": 2.1246, + "step": 7261 + }, + { + "epoch": 1.47, + "learning_rate": 1.3840095881811581e-05, + "loss": 2.163, + "step": 7262 + }, + { + "epoch": 1.47, + "learning_rate": 1.383850092477415e-05, + "loss": 2.1871, + "step": 7263 + }, + { + "epoch": 1.47, + "learning_rate": 1.3836905853207741e-05, + "loss": 2.0834, + "step": 7264 + }, + { + "epoch": 1.47, + "learning_rate": 1.3835310667159946e-05, + "loss": 2.1688, + "step": 7265 + }, + { + "epoch": 1.47, + "learning_rate": 1.3833715366678366e-05, + "loss": 2.0775, + "step": 7266 + }, + { + "epoch": 1.48, + "learning_rate": 1.383211995181059e-05, + "loss": 2.1387, + "step": 7267 + }, + { + "epoch": 1.48, + "learning_rate": 1.3830524422604227e-05, + "loss": 2.0592, + "step": 7268 + }, + { + "epoch": 1.48, + "learning_rate": 1.3828928779106882e-05, + "loss": 2.1202, + "step": 7269 + }, + { + "epoch": 1.48, + "learning_rate": 1.3827333021366165e-05, + "loss": 2.1215, + "step": 7270 + }, + { + "epoch": 1.48, + "learning_rate": 1.3825737149429685e-05, + "loss": 2.1253, + "step": 7271 + }, + { + "epoch": 1.48, + "learning_rate": 1.382414116334506e-05, + "loss": 2.1099, + "step": 7272 + }, + { + "epoch": 1.48, + "learning_rate": 1.3822545063159906e-05, + "loss": 2.0997, + "step": 7273 + }, + { + "epoch": 1.48, + "learning_rate": 1.382094884892185e-05, + "loss": 2.1696, + "step": 7274 + }, + { + "epoch": 1.48, + "learning_rate": 1.3819352520678519e-05, + "loss": 2.2013, + "step": 7275 + }, + { + "epoch": 1.48, + "learning_rate": 1.381775607847754e-05, + "loss": 2.1922, + "step": 7276 + }, + { + "epoch": 1.48, + "learning_rate": 1.3816159522366543e-05, + "loss": 2.1147, + "step": 7277 + }, + { + "epoch": 1.48, + "learning_rate": 1.3814562852393167e-05, + "loss": 2.1869, + "step": 7278 + }, + { + "epoch": 1.48, + "learning_rate": 1.3812966068605056e-05, + "loss": 2.1583, + "step": 7279 + }, + { + "epoch": 1.48, + "learning_rate": 1.3811369171049847e-05, + "loss": 2.131, + "step": 7280 + }, + { + "epoch": 1.48, + "learning_rate": 1.3809772159775187e-05, + "loss": 2.1813, + "step": 7281 + }, + { + "epoch": 1.48, + "learning_rate": 1.3808175034828729e-05, + "loss": 2.1913, + "step": 7282 + }, + { + "epoch": 1.48, + "learning_rate": 1.3806577796258124e-05, + "loss": 2.079, + "step": 7283 + }, + { + "epoch": 1.48, + "learning_rate": 1.3804980444111029e-05, + "loss": 2.1449, + "step": 7284 + }, + { + "epoch": 1.48, + "learning_rate": 1.3803382978435108e-05, + "loss": 2.0962, + "step": 7285 + }, + { + "epoch": 1.48, + "learning_rate": 1.3801785399278019e-05, + "loss": 2.1398, + "step": 7286 + }, + { + "epoch": 1.48, + "learning_rate": 1.380018770668743e-05, + "loss": 2.129, + "step": 7287 + }, + { + "epoch": 1.48, + "learning_rate": 1.3798589900711011e-05, + "loss": 2.2213, + "step": 7288 + }, + { + "epoch": 1.48, + "learning_rate": 1.3796991981396434e-05, + "loss": 2.145, + "step": 7289 + }, + { + "epoch": 1.48, + "learning_rate": 1.3795393948791382e-05, + "loss": 2.1304, + "step": 7290 + }, + { + "epoch": 1.48, + "learning_rate": 1.3793795802943532e-05, + "loss": 2.0864, + "step": 7291 + }, + { + "epoch": 1.48, + "learning_rate": 1.3792197543900567e-05, + "loss": 2.1261, + "step": 7292 + }, + { + "epoch": 1.48, + "learning_rate": 1.3790599171710176e-05, + "loss": 2.0971, + "step": 7293 + }, + { + "epoch": 1.48, + "learning_rate": 1.3789000686420045e-05, + "loss": 2.1147, + "step": 7294 + }, + { + "epoch": 1.48, + "learning_rate": 1.3787402088077872e-05, + "loss": 2.1665, + "step": 7295 + }, + { + "epoch": 1.48, + "learning_rate": 1.378580337673135e-05, + "loss": 2.1847, + "step": 7296 + }, + { + "epoch": 1.48, + "learning_rate": 1.3784204552428188e-05, + "loss": 2.0305, + "step": 7297 + }, + { + "epoch": 1.48, + "learning_rate": 1.3782605615216082e-05, + "loss": 2.0742, + "step": 7298 + }, + { + "epoch": 1.48, + "learning_rate": 1.3781006565142743e-05, + "loss": 2.1582, + "step": 7299 + }, + { + "epoch": 1.48, + "learning_rate": 1.377940740225588e-05, + "loss": 2.1708, + "step": 7300 + }, + { + "epoch": 1.48, + "learning_rate": 1.3777808126603205e-05, + "loss": 2.1056, + "step": 7301 + }, + { + "epoch": 1.48, + "learning_rate": 1.3776208738232443e-05, + "loss": 2.1308, + "step": 7302 + }, + { + "epoch": 1.48, + "learning_rate": 1.3774609237191307e-05, + "loss": 2.1383, + "step": 7303 + }, + { + "epoch": 1.48, + "learning_rate": 1.3773009623527522e-05, + "loss": 2.1553, + "step": 7304 + }, + { + "epoch": 1.48, + "learning_rate": 1.3771409897288823e-05, + "loss": 2.0844, + "step": 7305 + }, + { + "epoch": 1.48, + "learning_rate": 1.3769810058522932e-05, + "loss": 2.1217, + "step": 7306 + }, + { + "epoch": 1.48, + "learning_rate": 1.3768210107277589e-05, + "loss": 2.0774, + "step": 7307 + }, + { + "epoch": 1.48, + "learning_rate": 1.3766610043600528e-05, + "loss": 2.1352, + "step": 7308 + }, + { + "epoch": 1.48, + "learning_rate": 1.3765009867539489e-05, + "loss": 2.1335, + "step": 7309 + }, + { + "epoch": 1.48, + "learning_rate": 1.3763409579142222e-05, + "loss": 2.134, + "step": 7310 + }, + { + "epoch": 1.48, + "learning_rate": 1.376180917845647e-05, + "loss": 2.047, + "step": 7311 + }, + { + "epoch": 1.48, + "learning_rate": 1.3760208665529986e-05, + "loss": 2.1808, + "step": 7312 + }, + { + "epoch": 1.48, + "learning_rate": 1.3758608040410523e-05, + "loss": 2.236, + "step": 7313 + }, + { + "epoch": 1.48, + "learning_rate": 1.375700730314584e-05, + "loss": 2.1076, + "step": 7314 + }, + { + "epoch": 1.48, + "learning_rate": 1.3755406453783697e-05, + "loss": 2.1449, + "step": 7315 + }, + { + "epoch": 1.49, + "learning_rate": 1.3753805492371857e-05, + "loss": 2.1167, + "step": 7316 + }, + { + "epoch": 1.49, + "learning_rate": 1.375220441895809e-05, + "loss": 2.0998, + "step": 7317 + }, + { + "epoch": 1.49, + "learning_rate": 1.3750603233590169e-05, + "loss": 2.0545, + "step": 7318 + }, + { + "epoch": 1.49, + "learning_rate": 1.3749001936315862e-05, + "loss": 2.0659, + "step": 7319 + }, + { + "epoch": 1.49, + "learning_rate": 1.3747400527182955e-05, + "loss": 2.1061, + "step": 7320 + }, + { + "epoch": 1.49, + "learning_rate": 1.374579900623922e-05, + "loss": 2.1069, + "step": 7321 + }, + { + "epoch": 1.49, + "learning_rate": 1.374419737353245e-05, + "loss": 2.1644, + "step": 7322 + }, + { + "epoch": 1.49, + "learning_rate": 1.3742595629110427e-05, + "loss": 2.1553, + "step": 7323 + }, + { + "epoch": 1.49, + "learning_rate": 1.3740993773020945e-05, + "loss": 2.1768, + "step": 7324 + }, + { + "epoch": 1.49, + "learning_rate": 1.3739391805311795e-05, + "loss": 2.1076, + "step": 7325 + }, + { + "epoch": 1.49, + "learning_rate": 1.373778972603078e-05, + "loss": 2.0943, + "step": 7326 + }, + { + "epoch": 1.49, + "learning_rate": 1.3736187535225696e-05, + "loss": 2.1042, + "step": 7327 + }, + { + "epoch": 1.49, + "learning_rate": 1.3734585232944353e-05, + "loss": 2.1755, + "step": 7328 + }, + { + "epoch": 1.49, + "learning_rate": 1.373298281923455e-05, + "loss": 2.1057, + "step": 7329 + }, + { + "epoch": 1.49, + "learning_rate": 1.373138029414411e-05, + "loss": 2.1089, + "step": 7330 + }, + { + "epoch": 1.49, + "learning_rate": 1.3729777657720835e-05, + "loss": 2.1351, + "step": 7331 + }, + { + "epoch": 1.49, + "learning_rate": 1.3728174910012553e-05, + "loss": 2.093, + "step": 7332 + }, + { + "epoch": 1.49, + "learning_rate": 1.372657205106708e-05, + "loss": 2.1101, + "step": 7333 + }, + { + "epoch": 1.49, + "learning_rate": 1.3724969080932238e-05, + "loss": 2.1509, + "step": 7334 + }, + { + "epoch": 1.49, + "learning_rate": 1.3723365999655859e-05, + "loss": 2.1313, + "step": 7335 + }, + { + "epoch": 1.49, + "learning_rate": 1.3721762807285774e-05, + "loss": 2.1424, + "step": 7336 + }, + { + "epoch": 1.49, + "learning_rate": 1.3720159503869816e-05, + "loss": 2.2016, + "step": 7337 + }, + { + "epoch": 1.49, + "learning_rate": 1.3718556089455821e-05, + "loss": 2.1159, + "step": 7338 + }, + { + "epoch": 1.49, + "learning_rate": 1.3716952564091632e-05, + "loss": 2.0783, + "step": 7339 + }, + { + "epoch": 1.49, + "learning_rate": 1.3715348927825095e-05, + "loss": 2.1386, + "step": 7340 + }, + { + "epoch": 1.49, + "learning_rate": 1.371374518070405e-05, + "loss": 2.1457, + "step": 7341 + }, + { + "epoch": 1.49, + "learning_rate": 1.3712141322776358e-05, + "loss": 2.1199, + "step": 7342 + }, + { + "epoch": 1.49, + "learning_rate": 1.3710537354089863e-05, + "loss": 2.0732, + "step": 7343 + }, + { + "epoch": 1.49, + "learning_rate": 1.3708933274692431e-05, + "loss": 2.1391, + "step": 7344 + }, + { + "epoch": 1.49, + "learning_rate": 1.3707329084631918e-05, + "loss": 2.1838, + "step": 7345 + }, + { + "epoch": 1.49, + "learning_rate": 1.3705724783956191e-05, + "loss": 2.1854, + "step": 7346 + }, + { + "epoch": 1.49, + "learning_rate": 1.3704120372713114e-05, + "loss": 2.1854, + "step": 7347 + }, + { + "epoch": 1.49, + "learning_rate": 1.3702515850950562e-05, + "loss": 2.1349, + "step": 7348 + }, + { + "epoch": 1.49, + "learning_rate": 1.3700911218716404e-05, + "loss": 2.1654, + "step": 7349 + }, + { + "epoch": 1.49, + "learning_rate": 1.3699306476058523e-05, + "loss": 2.2046, + "step": 7350 + }, + { + "epoch": 1.49, + "learning_rate": 1.369770162302479e-05, + "loss": 2.0701, + "step": 7351 + }, + { + "epoch": 1.49, + "learning_rate": 1.3696096659663101e-05, + "loss": 2.1593, + "step": 7352 + }, + { + "epoch": 1.49, + "learning_rate": 1.3694491586021335e-05, + "loss": 2.1742, + "step": 7353 + }, + { + "epoch": 1.49, + "learning_rate": 1.3692886402147385e-05, + "loss": 2.0775, + "step": 7354 + }, + { + "epoch": 1.49, + "learning_rate": 1.3691281108089144e-05, + "loss": 2.1598, + "step": 7355 + }, + { + "epoch": 1.49, + "learning_rate": 1.368967570389451e-05, + "loss": 2.1274, + "step": 7356 + }, + { + "epoch": 1.49, + "learning_rate": 1.3688070189611383e-05, + "loss": 2.0743, + "step": 7357 + }, + { + "epoch": 1.49, + "learning_rate": 1.3686464565287665e-05, + "loss": 2.1015, + "step": 7358 + }, + { + "epoch": 1.49, + "learning_rate": 1.3684858830971263e-05, + "loss": 2.064, + "step": 7359 + }, + { + "epoch": 1.49, + "learning_rate": 1.3683252986710092e-05, + "loss": 2.1427, + "step": 7360 + }, + { + "epoch": 1.49, + "learning_rate": 1.3681647032552059e-05, + "loss": 2.0994, + "step": 7361 + }, + { + "epoch": 1.49, + "learning_rate": 1.3680040968545086e-05, + "loss": 2.1888, + "step": 7362 + }, + { + "epoch": 1.49, + "learning_rate": 1.3678434794737085e-05, + "loss": 2.0942, + "step": 7363 + }, + { + "epoch": 1.49, + "learning_rate": 1.3676828511175989e-05, + "loss": 2.0921, + "step": 7364 + }, + { + "epoch": 1.49, + "learning_rate": 1.3675222117909716e-05, + "loss": 2.1375, + "step": 7365 + }, + { + "epoch": 1.5, + "learning_rate": 1.3673615614986205e-05, + "loss": 2.03, + "step": 7366 + }, + { + "epoch": 1.5, + "learning_rate": 1.3672009002453379e-05, + "loss": 2.1205, + "step": 7367 + }, + { + "epoch": 1.5, + "learning_rate": 1.3670402280359184e-05, + "loss": 2.2137, + "step": 7368 + }, + { + "epoch": 1.5, + "learning_rate": 1.366879544875155e-05, + "loss": 2.0899, + "step": 7369 + }, + { + "epoch": 1.5, + "learning_rate": 1.366718850767843e-05, + "loss": 2.1345, + "step": 7370 + }, + { + "epoch": 1.5, + "learning_rate": 1.366558145718776e-05, + "loss": 2.0635, + "step": 7371 + }, + { + "epoch": 1.5, + "learning_rate": 1.3663974297327498e-05, + "loss": 2.1488, + "step": 7372 + }, + { + "epoch": 1.5, + "learning_rate": 1.3662367028145591e-05, + "loss": 2.1455, + "step": 7373 + }, + { + "epoch": 1.5, + "learning_rate": 1.366075964969e-05, + "loss": 2.0768, + "step": 7374 + }, + { + "epoch": 1.5, + "learning_rate": 1.3659152162008678e-05, + "loss": 2.1302, + "step": 7375 + }, + { + "epoch": 1.5, + "learning_rate": 1.365754456514959e-05, + "loss": 2.0753, + "step": 7376 + }, + { + "epoch": 1.5, + "learning_rate": 1.3655936859160705e-05, + "loss": 2.1988, + "step": 7377 + }, + { + "epoch": 1.5, + "learning_rate": 1.3654329044089986e-05, + "loss": 2.1563, + "step": 7378 + }, + { + "epoch": 1.5, + "learning_rate": 1.3652721119985413e-05, + "loss": 2.0901, + "step": 7379 + }, + { + "epoch": 1.5, + "learning_rate": 1.3651113086894955e-05, + "loss": 2.2141, + "step": 7380 + }, + { + "epoch": 1.5, + "learning_rate": 1.3649504944866589e-05, + "loss": 2.1227, + "step": 7381 + }, + { + "epoch": 1.5, + "learning_rate": 1.3647896693948306e-05, + "loss": 2.1006, + "step": 7382 + }, + { + "epoch": 1.5, + "learning_rate": 1.3646288334188083e-05, + "loss": 2.0853, + "step": 7383 + }, + { + "epoch": 1.5, + "learning_rate": 1.3644679865633912e-05, + "loss": 2.1652, + "step": 7384 + }, + { + "epoch": 1.5, + "learning_rate": 1.3643071288333782e-05, + "loss": 2.1205, + "step": 7385 + }, + { + "epoch": 1.5, + "learning_rate": 1.3641462602335694e-05, + "loss": 2.1704, + "step": 7386 + }, + { + "epoch": 1.5, + "learning_rate": 1.363985380768764e-05, + "loss": 2.1223, + "step": 7387 + }, + { + "epoch": 1.5, + "learning_rate": 1.3638244904437623e-05, + "loss": 2.1359, + "step": 7388 + }, + { + "epoch": 1.5, + "learning_rate": 1.363663589263365e-05, + "loss": 2.1546, + "step": 7389 + }, + { + "epoch": 1.5, + "learning_rate": 1.3635026772323727e-05, + "loss": 2.1368, + "step": 7390 + }, + { + "epoch": 1.5, + "learning_rate": 1.3633417543555862e-05, + "loss": 2.0673, + "step": 7391 + }, + { + "epoch": 1.5, + "learning_rate": 1.3631808206378079e-05, + "loss": 2.1009, + "step": 7392 + }, + { + "epoch": 1.5, + "learning_rate": 1.3630198760838385e-05, + "loss": 2.0842, + "step": 7393 + }, + { + "epoch": 1.5, + "learning_rate": 1.3628589206984808e-05, + "loss": 2.1083, + "step": 7394 + }, + { + "epoch": 1.5, + "learning_rate": 1.3626979544865366e-05, + "loss": 2.115, + "step": 7395 + }, + { + "epoch": 1.5, + "learning_rate": 1.3625369774528094e-05, + "loss": 2.1221, + "step": 7396 + }, + { + "epoch": 1.5, + "learning_rate": 1.3623759896021018e-05, + "loss": 2.192, + "step": 7397 + }, + { + "epoch": 1.5, + "learning_rate": 1.3622149909392169e-05, + "loss": 2.1228, + "step": 7398 + }, + { + "epoch": 1.5, + "learning_rate": 1.362053981468959e-05, + "loss": 2.1175, + "step": 7399 + }, + { + "epoch": 1.5, + "learning_rate": 1.3618929611961316e-05, + "loss": 2.0614, + "step": 7400 + }, + { + "epoch": 1.5, + "learning_rate": 1.3617319301255395e-05, + "loss": 2.0583, + "step": 7401 + }, + { + "epoch": 1.5, + "learning_rate": 1.3615708882619872e-05, + "loss": 2.0872, + "step": 7402 + }, + { + "epoch": 1.5, + "learning_rate": 1.3614098356102794e-05, + "loss": 2.1335, + "step": 7403 + }, + { + "epoch": 1.5, + "learning_rate": 1.3612487721752219e-05, + "loss": 2.1159, + "step": 7404 + }, + { + "epoch": 1.5, + "learning_rate": 1.3610876979616199e-05, + "loss": 2.0947, + "step": 7405 + }, + { + "epoch": 1.5, + "learning_rate": 1.3609266129742797e-05, + "loss": 2.2339, + "step": 7406 + }, + { + "epoch": 1.5, + "learning_rate": 1.3607655172180072e-05, + "loss": 2.1637, + "step": 7407 + }, + { + "epoch": 1.5, + "learning_rate": 1.3606044106976096e-05, + "loss": 2.038, + "step": 7408 + }, + { + "epoch": 1.5, + "learning_rate": 1.360443293417893e-05, + "loss": 2.039, + "step": 7409 + }, + { + "epoch": 1.5, + "learning_rate": 1.3602821653836654e-05, + "loss": 2.1455, + "step": 7410 + }, + { + "epoch": 1.5, + "learning_rate": 1.3601210265997338e-05, + "loss": 2.0766, + "step": 7411 + }, + { + "epoch": 1.5, + "learning_rate": 1.3599598770709065e-05, + "loss": 2.0861, + "step": 7412 + }, + { + "epoch": 1.5, + "learning_rate": 1.3597987168019916e-05, + "loss": 2.095, + "step": 7413 + }, + { + "epoch": 1.5, + "learning_rate": 1.3596375457977974e-05, + "loss": 2.2079, + "step": 7414 + }, + { + "epoch": 1.51, + "learning_rate": 1.359476364063133e-05, + "loss": 2.0777, + "step": 7415 + }, + { + "epoch": 1.51, + "learning_rate": 1.3593151716028072e-05, + "loss": 2.1432, + "step": 7416 + }, + { + "epoch": 1.51, + "learning_rate": 1.3591539684216301e-05, + "loss": 2.135, + "step": 7417 + }, + { + "epoch": 1.51, + "learning_rate": 1.3589927545244111e-05, + "loss": 2.1005, + "step": 7418 + }, + { + "epoch": 1.51, + "learning_rate": 1.3588315299159602e-05, + "loss": 2.1617, + "step": 7419 + }, + { + "epoch": 1.51, + "learning_rate": 1.3586702946010882e-05, + "loss": 2.1741, + "step": 7420 + }, + { + "epoch": 1.51, + "learning_rate": 1.3585090485846054e-05, + "loss": 2.1387, + "step": 7421 + }, + { + "epoch": 1.51, + "learning_rate": 1.3583477918713234e-05, + "loss": 2.0837, + "step": 7422 + }, + { + "epoch": 1.51, + "learning_rate": 1.3581865244660532e-05, + "loss": 2.0399, + "step": 7423 + }, + { + "epoch": 1.51, + "learning_rate": 1.358025246373607e-05, + "loss": 2.0591, + "step": 7424 + }, + { + "epoch": 1.51, + "learning_rate": 1.357863957598796e-05, + "loss": 2.0282, + "step": 7425 + }, + { + "epoch": 1.51, + "learning_rate": 1.3577026581464337e-05, + "loss": 2.194, + "step": 7426 + }, + { + "epoch": 1.51, + "learning_rate": 1.3575413480213318e-05, + "loss": 2.1073, + "step": 7427 + }, + { + "epoch": 1.51, + "learning_rate": 1.3573800272283032e-05, + "loss": 2.1792, + "step": 7428 + }, + { + "epoch": 1.51, + "learning_rate": 1.3572186957721624e-05, + "loss": 2.1201, + "step": 7429 + }, + { + "epoch": 1.51, + "learning_rate": 1.3570573536577218e-05, + "loss": 2.1304, + "step": 7430 + }, + { + "epoch": 1.51, + "learning_rate": 1.356896000889796e-05, + "loss": 2.1664, + "step": 7431 + }, + { + "epoch": 1.51, + "learning_rate": 1.3567346374731991e-05, + "loss": 2.0814, + "step": 7432 + }, + { + "epoch": 1.51, + "learning_rate": 1.3565732634127457e-05, + "loss": 2.0993, + "step": 7433 + }, + { + "epoch": 1.51, + "learning_rate": 1.3564118787132507e-05, + "loss": 2.2272, + "step": 7434 + }, + { + "epoch": 1.51, + "learning_rate": 1.3562504833795293e-05, + "loss": 2.0515, + "step": 7435 + }, + { + "epoch": 1.51, + "learning_rate": 1.3560890774163974e-05, + "loss": 2.092, + "step": 7436 + }, + { + "epoch": 1.51, + "learning_rate": 1.3559276608286698e-05, + "loss": 2.1619, + "step": 7437 + }, + { + "epoch": 1.51, + "learning_rate": 1.355766233621164e-05, + "loss": 2.1269, + "step": 7438 + }, + { + "epoch": 1.51, + "learning_rate": 1.3556047957986956e-05, + "loss": 2.1855, + "step": 7439 + }, + { + "epoch": 1.51, + "learning_rate": 1.3554433473660814e-05, + "loss": 2.1709, + "step": 7440 + }, + { + "epoch": 1.51, + "learning_rate": 1.3552818883281393e-05, + "loss": 2.1487, + "step": 7441 + }, + { + "epoch": 1.51, + "learning_rate": 1.3551204186896858e-05, + "loss": 2.1455, + "step": 7442 + }, + { + "epoch": 1.51, + "learning_rate": 1.3549589384555396e-05, + "loss": 2.1476, + "step": 7443 + }, + { + "epoch": 1.51, + "learning_rate": 1.3547974476305177e-05, + "loss": 2.1096, + "step": 7444 + }, + { + "epoch": 1.51, + "learning_rate": 1.3546359462194393e-05, + "loss": 2.199, + "step": 7445 + }, + { + "epoch": 1.51, + "learning_rate": 1.3544744342271228e-05, + "loss": 2.1327, + "step": 7446 + }, + { + "epoch": 1.51, + "learning_rate": 1.354312911658387e-05, + "loss": 2.05, + "step": 7447 + }, + { + "epoch": 1.51, + "learning_rate": 1.3541513785180517e-05, + "loss": 2.1174, + "step": 7448 + }, + { + "epoch": 1.51, + "learning_rate": 1.3539898348109366e-05, + "loss": 2.0723, + "step": 7449 + }, + { + "epoch": 1.51, + "learning_rate": 1.353828280541861e-05, + "loss": 2.112, + "step": 7450 + }, + { + "epoch": 1.51, + "learning_rate": 1.3536667157156459e-05, + "loss": 2.1351, + "step": 7451 + }, + { + "epoch": 1.51, + "learning_rate": 1.353505140337111e-05, + "loss": 2.1795, + "step": 7452 + }, + { + "epoch": 1.51, + "learning_rate": 1.3533435544110783e-05, + "loss": 2.114, + "step": 7453 + }, + { + "epoch": 1.51, + "learning_rate": 1.3531819579423684e-05, + "loss": 2.1014, + "step": 7454 + }, + { + "epoch": 1.51, + "learning_rate": 1.353020350935803e-05, + "loss": 2.1202, + "step": 7455 + }, + { + "epoch": 1.51, + "learning_rate": 1.3528587333962036e-05, + "loss": 2.1117, + "step": 7456 + }, + { + "epoch": 1.51, + "learning_rate": 1.3526971053283926e-05, + "loss": 2.1145, + "step": 7457 + }, + { + "epoch": 1.51, + "learning_rate": 1.352535466737193e-05, + "loss": 2.1077, + "step": 7458 + }, + { + "epoch": 1.51, + "learning_rate": 1.352373817627427e-05, + "loss": 2.1076, + "step": 7459 + }, + { + "epoch": 1.51, + "learning_rate": 1.3522121580039177e-05, + "loss": 2.0955, + "step": 7460 + }, + { + "epoch": 1.51, + "learning_rate": 1.3520504878714888e-05, + "loss": 2.1341, + "step": 7461 + }, + { + "epoch": 1.51, + "learning_rate": 1.3518888072349637e-05, + "loss": 2.1599, + "step": 7462 + }, + { + "epoch": 1.51, + "learning_rate": 1.3517271160991667e-05, + "loss": 2.1281, + "step": 7463 + }, + { + "epoch": 1.52, + "learning_rate": 1.3515654144689224e-05, + "loss": 2.1079, + "step": 7464 + }, + { + "epoch": 1.52, + "learning_rate": 1.3514037023490549e-05, + "loss": 2.1371, + "step": 7465 + }, + { + "epoch": 1.52, + "learning_rate": 1.3512419797443896e-05, + "loss": 2.1455, + "step": 7466 + }, + { + "epoch": 1.52, + "learning_rate": 1.3510802466597516e-05, + "loss": 2.139, + "step": 7467 + }, + { + "epoch": 1.52, + "learning_rate": 1.3509185030999668e-05, + "loss": 2.1757, + "step": 7468 + }, + { + "epoch": 1.52, + "learning_rate": 1.3507567490698608e-05, + "loss": 2.078, + "step": 7469 + }, + { + "epoch": 1.52, + "learning_rate": 1.3505949845742599e-05, + "loss": 2.0456, + "step": 7470 + }, + { + "epoch": 1.52, + "learning_rate": 1.3504332096179908e-05, + "loss": 2.1368, + "step": 7471 + }, + { + "epoch": 1.52, + "learning_rate": 1.3502714242058801e-05, + "loss": 2.0795, + "step": 7472 + }, + { + "epoch": 1.52, + "learning_rate": 1.3501096283427552e-05, + "loss": 2.2082, + "step": 7473 + }, + { + "epoch": 1.52, + "learning_rate": 1.3499478220334437e-05, + "loss": 2.1188, + "step": 7474 + }, + { + "epoch": 1.52, + "learning_rate": 1.3497860052827731e-05, + "loss": 2.008, + "step": 7475 + }, + { + "epoch": 1.52, + "learning_rate": 1.3496241780955717e-05, + "loss": 2.0053, + "step": 7476 + }, + { + "epoch": 1.52, + "learning_rate": 1.3494623404766678e-05, + "loss": 2.1112, + "step": 7477 + }, + { + "epoch": 1.52, + "learning_rate": 1.3493004924308902e-05, + "loss": 2.1277, + "step": 7478 + }, + { + "epoch": 1.52, + "learning_rate": 1.349138633963068e-05, + "loss": 2.116, + "step": 7479 + }, + { + "epoch": 1.52, + "learning_rate": 1.3489767650780305e-05, + "loss": 2.0778, + "step": 7480 + }, + { + "epoch": 1.52, + "learning_rate": 1.3488148857806077e-05, + "loss": 2.1975, + "step": 7481 + }, + { + "epoch": 1.52, + "learning_rate": 1.3486529960756287e-05, + "loss": 2.1542, + "step": 7482 + }, + { + "epoch": 1.52, + "learning_rate": 1.3484910959679245e-05, + "loss": 2.0782, + "step": 7483 + }, + { + "epoch": 1.52, + "learning_rate": 1.3483291854623256e-05, + "loss": 2.1534, + "step": 7484 + }, + { + "epoch": 1.52, + "learning_rate": 1.3481672645636627e-05, + "loss": 2.0891, + "step": 7485 + }, + { + "epoch": 1.52, + "learning_rate": 1.3480053332767676e-05, + "loss": 2.1007, + "step": 7486 + }, + { + "epoch": 1.52, + "learning_rate": 1.3478433916064708e-05, + "loss": 2.1364, + "step": 7487 + }, + { + "epoch": 1.52, + "learning_rate": 1.3476814395576053e-05, + "loss": 2.1546, + "step": 7488 + }, + { + "epoch": 1.52, + "learning_rate": 1.347519477135002e-05, + "loss": 2.2309, + "step": 7489 + }, + { + "epoch": 1.52, + "learning_rate": 1.3473575043434946e-05, + "loss": 2.1101, + "step": 7490 + }, + { + "epoch": 1.52, + "learning_rate": 1.347195521187915e-05, + "loss": 2.0888, + "step": 7491 + }, + { + "epoch": 1.52, + "learning_rate": 1.3470335276730966e-05, + "loss": 2.0202, + "step": 7492 + }, + { + "epoch": 1.52, + "learning_rate": 1.346871523803873e-05, + "loss": 2.1267, + "step": 7493 + }, + { + "epoch": 1.52, + "learning_rate": 1.3467095095850773e-05, + "loss": 2.1189, + "step": 7494 + }, + { + "epoch": 1.52, + "learning_rate": 1.346547485021544e-05, + "loss": 2.0776, + "step": 7495 + }, + { + "epoch": 1.52, + "learning_rate": 1.3463854501181073e-05, + "loss": 2.1467, + "step": 7496 + }, + { + "epoch": 1.52, + "learning_rate": 1.3462234048796017e-05, + "loss": 2.1754, + "step": 7497 + }, + { + "epoch": 1.52, + "learning_rate": 1.3460613493108623e-05, + "loss": 2.1845, + "step": 7498 + }, + { + "epoch": 1.52, + "learning_rate": 1.3458992834167241e-05, + "loss": 2.1241, + "step": 7499 + }, + { + "epoch": 1.52, + "learning_rate": 1.345737207202023e-05, + "loss": 2.2199, + "step": 7500 + }, + { + "epoch": 1.52, + "learning_rate": 1.3455751206715946e-05, + "loss": 2.1137, + "step": 7501 + }, + { + "epoch": 1.52, + "learning_rate": 1.345413023830275e-05, + "loss": 2.0975, + "step": 7502 + }, + { + "epoch": 1.52, + "learning_rate": 1.3452509166829007e-05, + "loss": 2.1607, + "step": 7503 + }, + { + "epoch": 1.52, + "learning_rate": 1.3450887992343084e-05, + "loss": 2.1272, + "step": 7504 + }, + { + "epoch": 1.52, + "learning_rate": 1.3449266714893358e-05, + "loss": 2.1874, + "step": 7505 + }, + { + "epoch": 1.52, + "learning_rate": 1.3447645334528196e-05, + "loss": 2.1508, + "step": 7506 + }, + { + "epoch": 1.52, + "learning_rate": 1.3446023851295977e-05, + "loss": 2.1721, + "step": 7507 + }, + { + "epoch": 1.52, + "learning_rate": 1.3444402265245083e-05, + "loss": 2.1254, + "step": 7508 + }, + { + "epoch": 1.52, + "learning_rate": 1.3442780576423891e-05, + "loss": 2.1997, + "step": 7509 + }, + { + "epoch": 1.52, + "learning_rate": 1.3441158784880795e-05, + "loss": 2.1273, + "step": 7510 + }, + { + "epoch": 1.52, + "learning_rate": 1.343953689066418e-05, + "loss": 2.1802, + "step": 7511 + }, + { + "epoch": 1.52, + "learning_rate": 1.3437914893822436e-05, + "loss": 2.0857, + "step": 7512 + }, + { + "epoch": 1.53, + "learning_rate": 1.3436292794403965e-05, + "loss": 2.1107, + "step": 7513 + }, + { + "epoch": 1.53, + "learning_rate": 1.343467059245716e-05, + "loss": 2.1486, + "step": 7514 + }, + { + "epoch": 1.53, + "learning_rate": 1.3433048288030426e-05, + "loss": 2.1009, + "step": 7515 + }, + { + "epoch": 1.53, + "learning_rate": 1.3431425881172163e-05, + "loss": 2.1315, + "step": 7516 + }, + { + "epoch": 1.53, + "learning_rate": 1.3429803371930783e-05, + "loss": 2.2202, + "step": 7517 + }, + { + "epoch": 1.53, + "learning_rate": 1.3428180760354693e-05, + "loss": 2.1809, + "step": 7518 + }, + { + "epoch": 1.53, + "learning_rate": 1.3426558046492313e-05, + "loss": 2.102, + "step": 7519 + }, + { + "epoch": 1.53, + "learning_rate": 1.3424935230392052e-05, + "loss": 2.1909, + "step": 7520 + }, + { + "epoch": 1.53, + "learning_rate": 1.3423312312102334e-05, + "loss": 2.0478, + "step": 7521 + }, + { + "epoch": 1.53, + "learning_rate": 1.342168929167158e-05, + "loss": 2.123, + "step": 7522 + }, + { + "epoch": 1.53, + "learning_rate": 1.342006616914822e-05, + "loss": 2.0378, + "step": 7523 + }, + { + "epoch": 1.53, + "learning_rate": 1.3418442944580678e-05, + "loss": 2.0975, + "step": 7524 + }, + { + "epoch": 1.53, + "learning_rate": 1.3416819618017389e-05, + "loss": 2.0951, + "step": 7525 + }, + { + "epoch": 1.53, + "learning_rate": 1.3415196189506785e-05, + "loss": 2.1296, + "step": 7526 + }, + { + "epoch": 1.53, + "learning_rate": 1.3413572659097309e-05, + "loss": 2.0872, + "step": 7527 + }, + { + "epoch": 1.53, + "learning_rate": 1.3411949026837399e-05, + "loss": 2.1842, + "step": 7528 + }, + { + "epoch": 1.53, + "learning_rate": 1.3410325292775496e-05, + "loss": 2.0623, + "step": 7529 + }, + { + "epoch": 1.53, + "learning_rate": 1.3408701456960054e-05, + "loss": 2.0757, + "step": 7530 + }, + { + "epoch": 1.53, + "learning_rate": 1.3407077519439517e-05, + "loss": 2.196, + "step": 7531 + }, + { + "epoch": 1.53, + "learning_rate": 1.3405453480262342e-05, + "loss": 2.0992, + "step": 7532 + }, + { + "epoch": 1.53, + "learning_rate": 1.3403829339476988e-05, + "loss": 2.0447, + "step": 7533 + }, + { + "epoch": 1.53, + "learning_rate": 1.3402205097131906e-05, + "loss": 2.1002, + "step": 7534 + }, + { + "epoch": 1.53, + "learning_rate": 1.3400580753275565e-05, + "loss": 2.1457, + "step": 7535 + }, + { + "epoch": 1.53, + "learning_rate": 1.3398956307956427e-05, + "loss": 2.1242, + "step": 7536 + }, + { + "epoch": 1.53, + "learning_rate": 1.3397331761222964e-05, + "loss": 2.1038, + "step": 7537 + }, + { + "epoch": 1.53, + "learning_rate": 1.3395707113123644e-05, + "loss": 2.1372, + "step": 7538 + }, + { + "epoch": 1.53, + "learning_rate": 1.3394082363706944e-05, + "loss": 2.1169, + "step": 7539 + }, + { + "epoch": 1.53, + "learning_rate": 1.3392457513021337e-05, + "loss": 2.2256, + "step": 7540 + }, + { + "epoch": 1.53, + "learning_rate": 1.339083256111531e-05, + "loss": 2.1315, + "step": 7541 + }, + { + "epoch": 1.53, + "learning_rate": 1.338920750803734e-05, + "loss": 2.0651, + "step": 7542 + }, + { + "epoch": 1.53, + "learning_rate": 1.3387582353835921e-05, + "loss": 2.1317, + "step": 7543 + }, + { + "epoch": 1.53, + "learning_rate": 1.3385957098559535e-05, + "loss": 2.131, + "step": 7544 + }, + { + "epoch": 1.53, + "learning_rate": 1.338433174225668e-05, + "loss": 2.0984, + "step": 7545 + }, + { + "epoch": 1.53, + "learning_rate": 1.3382706284975849e-05, + "loss": 2.079, + "step": 7546 + }, + { + "epoch": 1.53, + "learning_rate": 1.3381080726765543e-05, + "loss": 2.0631, + "step": 7547 + }, + { + "epoch": 1.53, + "learning_rate": 1.3379455067674258e-05, + "loss": 2.1108, + "step": 7548 + }, + { + "epoch": 1.53, + "learning_rate": 1.3377829307750506e-05, + "loss": 2.1186, + "step": 7549 + }, + { + "epoch": 1.53, + "learning_rate": 1.3376203447042789e-05, + "loss": 2.1394, + "step": 7550 + }, + { + "epoch": 1.53, + "learning_rate": 1.337457748559962e-05, + "loss": 2.1002, + "step": 7551 + }, + { + "epoch": 1.53, + "learning_rate": 1.3372951423469514e-05, + "loss": 2.0673, + "step": 7552 + }, + { + "epoch": 1.53, + "learning_rate": 1.3371325260700985e-05, + "loss": 2.1497, + "step": 7553 + }, + { + "epoch": 1.53, + "learning_rate": 1.3369698997342553e-05, + "loss": 2.1321, + "step": 7554 + }, + { + "epoch": 1.53, + "learning_rate": 1.3368072633442744e-05, + "loss": 2.231, + "step": 7555 + }, + { + "epoch": 1.53, + "learning_rate": 1.3366446169050075e-05, + "loss": 2.1999, + "step": 7556 + }, + { + "epoch": 1.53, + "learning_rate": 1.3364819604213085e-05, + "loss": 2.1182, + "step": 7557 + }, + { + "epoch": 1.53, + "learning_rate": 1.3363192938980302e-05, + "loss": 2.1901, + "step": 7558 + }, + { + "epoch": 1.53, + "learning_rate": 1.3361566173400258e-05, + "loss": 2.1904, + "step": 7559 + }, + { + "epoch": 1.53, + "learning_rate": 1.3359939307521494e-05, + "loss": 2.1336, + "step": 7560 + }, + { + "epoch": 1.53, + "learning_rate": 1.335831234139255e-05, + "loss": 2.1087, + "step": 7561 + }, + { + "epoch": 1.53, + "learning_rate": 1.3356685275061967e-05, + "loss": 2.1489, + "step": 7562 + }, + { + "epoch": 1.54, + "learning_rate": 1.3355058108578294e-05, + "loss": 2.2125, + "step": 7563 + }, + { + "epoch": 1.54, + "learning_rate": 1.3353430841990078e-05, + "loss": 2.2166, + "step": 7564 + }, + { + "epoch": 1.54, + "learning_rate": 1.3351803475345877e-05, + "loss": 2.1211, + "step": 7565 + }, + { + "epoch": 1.54, + "learning_rate": 1.3350176008694242e-05, + "loss": 2.1251, + "step": 7566 + }, + { + "epoch": 1.54, + "learning_rate": 1.3348548442083735e-05, + "loss": 2.1182, + "step": 7567 + }, + { + "epoch": 1.54, + "learning_rate": 1.3346920775562913e-05, + "loss": 2.1204, + "step": 7568 + }, + { + "epoch": 1.54, + "learning_rate": 1.3345293009180345e-05, + "loss": 2.0869, + "step": 7569 + }, + { + "epoch": 1.54, + "learning_rate": 1.3343665142984594e-05, + "loss": 2.163, + "step": 7570 + }, + { + "epoch": 1.54, + "learning_rate": 1.3342037177024234e-05, + "loss": 2.1397, + "step": 7571 + }, + { + "epoch": 1.54, + "learning_rate": 1.3340409111347839e-05, + "loss": 2.1704, + "step": 7572 + }, + { + "epoch": 1.54, + "learning_rate": 1.333878094600398e-05, + "loss": 2.1795, + "step": 7573 + }, + { + "epoch": 1.54, + "learning_rate": 1.3337152681041243e-05, + "loss": 2.0544, + "step": 7574 + }, + { + "epoch": 1.54, + "learning_rate": 1.3335524316508208e-05, + "loss": 2.1773, + "step": 7575 + }, + { + "epoch": 1.54, + "learning_rate": 1.3333895852453458e-05, + "loss": 2.1168, + "step": 7576 + }, + { + "epoch": 1.54, + "learning_rate": 1.3332267288925587e-05, + "loss": 2.1063, + "step": 7577 + }, + { + "epoch": 1.54, + "learning_rate": 1.3330638625973179e-05, + "loss": 2.0976, + "step": 7578 + }, + { + "epoch": 1.54, + "learning_rate": 1.3329009863644832e-05, + "loss": 2.0989, + "step": 7579 + }, + { + "epoch": 1.54, + "learning_rate": 1.3327381001989145e-05, + "loss": 2.1601, + "step": 7580 + }, + { + "epoch": 1.54, + "learning_rate": 1.3325752041054714e-05, + "loss": 2.136, + "step": 7581 + }, + { + "epoch": 1.54, + "learning_rate": 1.3324122980890147e-05, + "loss": 2.0323, + "step": 7582 + }, + { + "epoch": 1.54, + "learning_rate": 1.3322493821544044e-05, + "loss": 2.1301, + "step": 7583 + }, + { + "epoch": 1.54, + "learning_rate": 1.3320864563065021e-05, + "loss": 2.1493, + "step": 7584 + }, + { + "epoch": 1.54, + "learning_rate": 1.3319235205501684e-05, + "loss": 2.1259, + "step": 7585 + }, + { + "epoch": 1.54, + "learning_rate": 1.3317605748902652e-05, + "loss": 2.0572, + "step": 7586 + }, + { + "epoch": 1.54, + "learning_rate": 1.331597619331654e-05, + "loss": 2.186, + "step": 7587 + }, + { + "epoch": 1.54, + "learning_rate": 1.331434653879197e-05, + "loss": 2.1762, + "step": 7588 + }, + { + "epoch": 1.54, + "learning_rate": 1.331271678537757e-05, + "loss": 2.0869, + "step": 7589 + }, + { + "epoch": 1.54, + "learning_rate": 1.3311086933121961e-05, + "loss": 2.0361, + "step": 7590 + }, + { + "epoch": 1.54, + "learning_rate": 1.3309456982073777e-05, + "loss": 2.2293, + "step": 7591 + }, + { + "epoch": 1.54, + "learning_rate": 1.3307826932281648e-05, + "loss": 2.0864, + "step": 7592 + }, + { + "epoch": 1.54, + "learning_rate": 1.3306196783794205e-05, + "loss": 2.1192, + "step": 7593 + }, + { + "epoch": 1.54, + "learning_rate": 1.3304566536660097e-05, + "loss": 2.1916, + "step": 7594 + }, + { + "epoch": 1.54, + "learning_rate": 1.330293619092796e-05, + "loss": 2.0225, + "step": 7595 + }, + { + "epoch": 1.54, + "learning_rate": 1.3301305746646436e-05, + "loss": 2.1514, + "step": 7596 + }, + { + "epoch": 1.54, + "learning_rate": 1.3299675203864175e-05, + "loss": 2.1253, + "step": 7597 + }, + { + "epoch": 1.54, + "learning_rate": 1.3298044562629828e-05, + "loss": 2.1948, + "step": 7598 + }, + { + "epoch": 1.54, + "learning_rate": 1.3296413822992049e-05, + "loss": 2.087, + "step": 7599 + }, + { + "epoch": 1.54, + "learning_rate": 1.3294782984999494e-05, + "loss": 2.118, + "step": 7600 + }, + { + "epoch": 1.54, + "learning_rate": 1.3293152048700819e-05, + "loss": 2.0881, + "step": 7601 + }, + { + "epoch": 1.54, + "learning_rate": 1.329152101414469e-05, + "loss": 2.1365, + "step": 7602 + }, + { + "epoch": 1.54, + "learning_rate": 1.3289889881379767e-05, + "loss": 2.1097, + "step": 7603 + }, + { + "epoch": 1.54, + "learning_rate": 1.3288258650454725e-05, + "loss": 2.1268, + "step": 7604 + }, + { + "epoch": 1.54, + "learning_rate": 1.3286627321418229e-05, + "loss": 2.2114, + "step": 7605 + }, + { + "epoch": 1.54, + "learning_rate": 1.3284995894318953e-05, + "loss": 2.1519, + "step": 7606 + }, + { + "epoch": 1.54, + "learning_rate": 1.3283364369205579e-05, + "loss": 2.1989, + "step": 7607 + }, + { + "epoch": 1.54, + "learning_rate": 1.328173274612678e-05, + "loss": 2.1208, + "step": 7608 + }, + { + "epoch": 1.54, + "learning_rate": 1.3280101025131242e-05, + "loss": 2.0885, + "step": 7609 + }, + { + "epoch": 1.54, + "learning_rate": 1.3278469206267654e-05, + "loss": 2.1053, + "step": 7610 + }, + { + "epoch": 1.54, + "learning_rate": 1.3276837289584697e-05, + "loss": 2.1247, + "step": 7611 + }, + { + "epoch": 1.55, + "learning_rate": 1.3275205275131068e-05, + "loss": 2.1142, + "step": 7612 + }, + { + "epoch": 1.55, + "learning_rate": 1.327357316295546e-05, + "loss": 2.0538, + "step": 7613 + }, + { + "epoch": 1.55, + "learning_rate": 1.3271940953106568e-05, + "loss": 2.2058, + "step": 7614 + }, + { + "epoch": 1.55, + "learning_rate": 1.3270308645633092e-05, + "loss": 2.1056, + "step": 7615 + }, + { + "epoch": 1.55, + "learning_rate": 1.326867624058374e-05, + "loss": 2.1823, + "step": 7616 + }, + { + "epoch": 1.55, + "learning_rate": 1.3267043738007214e-05, + "loss": 2.1122, + "step": 7617 + }, + { + "epoch": 1.55, + "learning_rate": 1.3265411137952223e-05, + "loss": 2.1093, + "step": 7618 + }, + { + "epoch": 1.55, + "learning_rate": 1.3263778440467478e-05, + "loss": 2.0924, + "step": 7619 + }, + { + "epoch": 1.55, + "learning_rate": 1.3262145645601693e-05, + "loss": 2.1186, + "step": 7620 + }, + { + "epoch": 1.55, + "learning_rate": 1.3260512753403591e-05, + "loss": 2.1585, + "step": 7621 + }, + { + "epoch": 1.55, + "learning_rate": 1.3258879763921888e-05, + "loss": 2.0998, + "step": 7622 + }, + { + "epoch": 1.55, + "learning_rate": 1.3257246677205306e-05, + "loss": 2.1201, + "step": 7623 + }, + { + "epoch": 1.55, + "learning_rate": 1.3255613493302574e-05, + "loss": 2.0661, + "step": 7624 + }, + { + "epoch": 1.55, + "learning_rate": 1.325398021226242e-05, + "loss": 2.1283, + "step": 7625 + }, + { + "epoch": 1.55, + "learning_rate": 1.3252346834133578e-05, + "loss": 2.1, + "step": 7626 + }, + { + "epoch": 1.55, + "learning_rate": 1.325071335896478e-05, + "loss": 2.0994, + "step": 7627 + }, + { + "epoch": 1.55, + "learning_rate": 1.3249079786804765e-05, + "loss": 2.1218, + "step": 7628 + }, + { + "epoch": 1.55, + "learning_rate": 1.3247446117702274e-05, + "loss": 2.1581, + "step": 7629 + }, + { + "epoch": 1.55, + "learning_rate": 1.3245812351706053e-05, + "loss": 2.1467, + "step": 7630 + }, + { + "epoch": 1.55, + "learning_rate": 1.3244178488864843e-05, + "loss": 2.1058, + "step": 7631 + }, + { + "epoch": 1.55, + "learning_rate": 1.32425445292274e-05, + "loss": 2.1091, + "step": 7632 + }, + { + "epoch": 1.55, + "learning_rate": 1.324091047284247e-05, + "loss": 2.1404, + "step": 7633 + }, + { + "epoch": 1.55, + "learning_rate": 1.3239276319758812e-05, + "loss": 2.1557, + "step": 7634 + }, + { + "epoch": 1.55, + "learning_rate": 1.3237642070025181e-05, + "loss": 2.1433, + "step": 7635 + }, + { + "epoch": 1.55, + "learning_rate": 1.3236007723690343e-05, + "loss": 2.119, + "step": 7636 + }, + { + "epoch": 1.55, + "learning_rate": 1.323437328080306e-05, + "loss": 2.1629, + "step": 7637 + }, + { + "epoch": 1.55, + "learning_rate": 1.3232738741412093e-05, + "loss": 2.1679, + "step": 7638 + }, + { + "epoch": 1.55, + "learning_rate": 1.323110410556622e-05, + "loss": 2.1311, + "step": 7639 + }, + { + "epoch": 1.55, + "learning_rate": 1.3229469373314206e-05, + "loss": 2.1861, + "step": 7640 + }, + { + "epoch": 1.55, + "learning_rate": 1.3227834544704832e-05, + "loss": 2.0869, + "step": 7641 + }, + { + "epoch": 1.55, + "learning_rate": 1.3226199619786875e-05, + "loss": 2.0979, + "step": 7642 + }, + { + "epoch": 1.55, + "learning_rate": 1.322456459860911e-05, + "loss": 2.2223, + "step": 7643 + }, + { + "epoch": 1.55, + "learning_rate": 1.3222929481220335e-05, + "loss": 2.0776, + "step": 7644 + }, + { + "epoch": 1.55, + "learning_rate": 1.322129426766932e-05, + "loss": 2.1152, + "step": 7645 + }, + { + "epoch": 1.55, + "learning_rate": 1.3219658958004868e-05, + "loss": 2.2008, + "step": 7646 + }, + { + "epoch": 1.55, + "learning_rate": 1.3218023552275762e-05, + "loss": 2.1049, + "step": 7647 + }, + { + "epoch": 1.55, + "learning_rate": 1.3216388050530806e-05, + "loss": 2.1387, + "step": 7648 + }, + { + "epoch": 1.55, + "learning_rate": 1.3214752452818794e-05, + "loss": 2.1826, + "step": 7649 + }, + { + "epoch": 1.55, + "learning_rate": 1.3213116759188525e-05, + "loss": 2.2101, + "step": 7650 + }, + { + "epoch": 1.55, + "learning_rate": 1.3211480969688806e-05, + "loss": 2.1113, + "step": 7651 + }, + { + "epoch": 1.55, + "learning_rate": 1.3209845084368444e-05, + "loss": 2.0811, + "step": 7652 + }, + { + "epoch": 1.55, + "learning_rate": 1.3208209103276243e-05, + "loss": 2.1841, + "step": 7653 + }, + { + "epoch": 1.55, + "learning_rate": 1.3206573026461027e-05, + "loss": 2.061, + "step": 7654 + }, + { + "epoch": 1.55, + "learning_rate": 1.3204936853971604e-05, + "loss": 2.1399, + "step": 7655 + }, + { + "epoch": 1.55, + "learning_rate": 1.320330058585679e-05, + "loss": 2.1682, + "step": 7656 + }, + { + "epoch": 1.55, + "learning_rate": 1.3201664222165413e-05, + "loss": 2.0716, + "step": 7657 + }, + { + "epoch": 1.55, + "learning_rate": 1.3200027762946292e-05, + "loss": 2.1494, + "step": 7658 + }, + { + "epoch": 1.55, + "learning_rate": 1.3198391208248259e-05, + "loss": 2.2042, + "step": 7659 + }, + { + "epoch": 1.55, + "learning_rate": 1.3196754558120132e-05, + "loss": 2.1258, + "step": 7660 + }, + { + "epoch": 1.56, + "learning_rate": 1.3195117812610758e-05, + "loss": 2.0894, + "step": 7661 + }, + { + "epoch": 1.56, + "learning_rate": 1.3193480971768964e-05, + "loss": 2.1108, + "step": 7662 + }, + { + "epoch": 1.56, + "learning_rate": 1.3191844035643594e-05, + "loss": 2.1945, + "step": 7663 + }, + { + "epoch": 1.56, + "learning_rate": 1.3190207004283482e-05, + "loss": 2.1135, + "step": 7664 + }, + { + "epoch": 1.56, + "learning_rate": 1.3188569877737474e-05, + "loss": 2.16, + "step": 7665 + }, + { + "epoch": 1.56, + "learning_rate": 1.318693265605442e-05, + "loss": 2.1079, + "step": 7666 + }, + { + "epoch": 1.56, + "learning_rate": 1.3185295339283166e-05, + "loss": 2.1092, + "step": 7667 + }, + { + "epoch": 1.56, + "learning_rate": 1.318365792747257e-05, + "loss": 2.0937, + "step": 7668 + }, + { + "epoch": 1.56, + "learning_rate": 1.318202042067148e-05, + "loss": 2.1184, + "step": 7669 + }, + { + "epoch": 1.56, + "learning_rate": 1.318038281892876e-05, + "loss": 2.0756, + "step": 7670 + }, + { + "epoch": 1.56, + "learning_rate": 1.3178745122293268e-05, + "loss": 2.0872, + "step": 7671 + }, + { + "epoch": 1.56, + "learning_rate": 1.317710733081387e-05, + "loss": 2.1795, + "step": 7672 + }, + { + "epoch": 1.56, + "learning_rate": 1.3175469444539428e-05, + "loss": 2.1629, + "step": 7673 + }, + { + "epoch": 1.56, + "learning_rate": 1.3173831463518818e-05, + "loss": 2.0724, + "step": 7674 + }, + { + "epoch": 1.56, + "learning_rate": 1.3172193387800904e-05, + "loss": 2.1271, + "step": 7675 + }, + { + "epoch": 1.56, + "learning_rate": 1.3170555217434571e-05, + "loss": 2.0843, + "step": 7676 + }, + { + "epoch": 1.56, + "learning_rate": 1.3168916952468689e-05, + "loss": 2.1632, + "step": 7677 + }, + { + "epoch": 1.56, + "learning_rate": 1.3167278592952143e-05, + "loss": 2.1014, + "step": 7678 + }, + { + "epoch": 1.56, + "learning_rate": 1.3165640138933814e-05, + "loss": 2.1471, + "step": 7679 + }, + { + "epoch": 1.56, + "learning_rate": 1.3164001590462592e-05, + "loss": 2.0815, + "step": 7680 + }, + { + "epoch": 1.56, + "learning_rate": 1.3162362947587361e-05, + "loss": 2.1882, + "step": 7681 + }, + { + "epoch": 1.56, + "learning_rate": 1.3160724210357017e-05, + "loss": 2.0952, + "step": 7682 + }, + { + "epoch": 1.56, + "learning_rate": 1.3159085378820454e-05, + "loss": 2.1362, + "step": 7683 + }, + { + "epoch": 1.56, + "learning_rate": 1.315744645302657e-05, + "loss": 2.1014, + "step": 7684 + }, + { + "epoch": 1.56, + "learning_rate": 1.3155807433024262e-05, + "loss": 2.1511, + "step": 7685 + }, + { + "epoch": 1.56, + "learning_rate": 1.3154168318862439e-05, + "loss": 2.1295, + "step": 7686 + }, + { + "epoch": 1.56, + "learning_rate": 1.315252911059e-05, + "loss": 2.1405, + "step": 7687 + }, + { + "epoch": 1.56, + "learning_rate": 1.3150889808255862e-05, + "loss": 2.0771, + "step": 7688 + }, + { + "epoch": 1.56, + "learning_rate": 1.3149250411908932e-05, + "loss": 2.1395, + "step": 7689 + }, + { + "epoch": 1.56, + "learning_rate": 1.3147610921598123e-05, + "loss": 2.0793, + "step": 7690 + }, + { + "epoch": 1.56, + "learning_rate": 1.3145971337372357e-05, + "loss": 2.1729, + "step": 7691 + }, + { + "epoch": 1.56, + "learning_rate": 1.3144331659280551e-05, + "loss": 2.1271, + "step": 7692 + }, + { + "epoch": 1.56, + "learning_rate": 1.3142691887371628e-05, + "loss": 2.1636, + "step": 7693 + }, + { + "epoch": 1.56, + "learning_rate": 1.3141052021694515e-05, + "loss": 2.1795, + "step": 7694 + }, + { + "epoch": 1.56, + "learning_rate": 1.3139412062298141e-05, + "loss": 2.1258, + "step": 7695 + }, + { + "epoch": 1.56, + "learning_rate": 1.3137772009231436e-05, + "loss": 2.1139, + "step": 7696 + }, + { + "epoch": 1.56, + "learning_rate": 1.3136131862543331e-05, + "loss": 2.1159, + "step": 7697 + }, + { + "epoch": 1.56, + "learning_rate": 1.313449162228277e-05, + "loss": 2.0615, + "step": 7698 + }, + { + "epoch": 1.56, + "learning_rate": 1.3132851288498687e-05, + "loss": 2.1348, + "step": 7699 + }, + { + "epoch": 1.56, + "learning_rate": 1.3131210861240027e-05, + "loss": 2.1705, + "step": 7700 + }, + { + "epoch": 1.56, + "learning_rate": 1.3129570340555736e-05, + "loss": 2.1079, + "step": 7701 + }, + { + "epoch": 1.56, + "learning_rate": 1.3127929726494758e-05, + "loss": 2.1143, + "step": 7702 + }, + { + "epoch": 1.56, + "learning_rate": 1.3126289019106046e-05, + "loss": 2.0878, + "step": 7703 + }, + { + "epoch": 1.56, + "learning_rate": 1.3124648218438555e-05, + "loss": 2.0743, + "step": 7704 + }, + { + "epoch": 1.56, + "learning_rate": 1.3123007324541242e-05, + "loss": 2.0976, + "step": 7705 + }, + { + "epoch": 1.56, + "learning_rate": 1.3121366337463064e-05, + "loss": 2.1714, + "step": 7706 + }, + { + "epoch": 1.56, + "learning_rate": 1.3119725257252982e-05, + "loss": 2.0873, + "step": 7707 + }, + { + "epoch": 1.56, + "learning_rate": 1.3118084083959963e-05, + "loss": 2.0839, + "step": 7708 + }, + { + "epoch": 1.56, + "learning_rate": 1.3116442817632974e-05, + "loss": 2.1377, + "step": 7709 + }, + { + "epoch": 1.57, + "learning_rate": 1.3114801458320988e-05, + "loss": 2.2557, + "step": 7710 + }, + { + "epoch": 1.57, + "learning_rate": 1.3113160006072973e-05, + "loss": 2.0734, + "step": 7711 + }, + { + "epoch": 1.57, + "learning_rate": 1.3111518460937907e-05, + "loss": 2.0465, + "step": 7712 + }, + { + "epoch": 1.57, + "learning_rate": 1.3109876822964766e-05, + "loss": 2.134, + "step": 7713 + }, + { + "epoch": 1.57, + "learning_rate": 1.310823509220254e-05, + "loss": 2.0822, + "step": 7714 + }, + { + "epoch": 1.57, + "learning_rate": 1.3106593268700203e-05, + "loss": 2.1191, + "step": 7715 + }, + { + "epoch": 1.57, + "learning_rate": 1.3104951352506747e-05, + "loss": 2.1059, + "step": 7716 + }, + { + "epoch": 1.57, + "learning_rate": 1.3103309343671162e-05, + "loss": 2.1031, + "step": 7717 + }, + { + "epoch": 1.57, + "learning_rate": 1.310166724224244e-05, + "loss": 2.201, + "step": 7718 + }, + { + "epoch": 1.57, + "learning_rate": 1.3100025048269573e-05, + "loss": 2.1486, + "step": 7719 + }, + { + "epoch": 1.57, + "learning_rate": 1.3098382761801567e-05, + "loss": 2.1014, + "step": 7720 + }, + { + "epoch": 1.57, + "learning_rate": 1.3096740382887414e-05, + "loss": 2.0848, + "step": 7721 + }, + { + "epoch": 1.57, + "learning_rate": 1.3095097911576119e-05, + "loss": 2.1774, + "step": 7722 + }, + { + "epoch": 1.57, + "learning_rate": 1.3093455347916692e-05, + "loss": 2.1211, + "step": 7723 + }, + { + "epoch": 1.57, + "learning_rate": 1.3091812691958138e-05, + "loss": 2.1008, + "step": 7724 + }, + { + "epoch": 1.57, + "learning_rate": 1.3090169943749475e-05, + "loss": 2.0765, + "step": 7725 + }, + { + "epoch": 1.57, + "learning_rate": 1.3088527103339713e-05, + "loss": 2.0915, + "step": 7726 + }, + { + "epoch": 1.57, + "learning_rate": 1.308688417077787e-05, + "loss": 2.1991, + "step": 7727 + }, + { + "epoch": 1.57, + "learning_rate": 1.308524114611296e-05, + "loss": 2.1759, + "step": 7728 + }, + { + "epoch": 1.57, + "learning_rate": 1.3083598029394016e-05, + "loss": 2.1274, + "step": 7729 + }, + { + "epoch": 1.57, + "learning_rate": 1.308195482067006e-05, + "loss": 2.1594, + "step": 7730 + }, + { + "epoch": 1.57, + "learning_rate": 1.3080311519990117e-05, + "loss": 2.1676, + "step": 7731 + }, + { + "epoch": 1.57, + "learning_rate": 1.307866812740322e-05, + "loss": 2.1948, + "step": 7732 + }, + { + "epoch": 1.57, + "learning_rate": 1.3077024642958403e-05, + "loss": 2.163, + "step": 7733 + }, + { + "epoch": 1.57, + "learning_rate": 1.3075381066704705e-05, + "loss": 2.1173, + "step": 7734 + }, + { + "epoch": 1.57, + "learning_rate": 1.307373739869116e-05, + "loss": 2.2165, + "step": 7735 + }, + { + "epoch": 1.57, + "learning_rate": 1.3072093638966812e-05, + "loss": 2.2504, + "step": 7736 + }, + { + "epoch": 1.57, + "learning_rate": 1.307044978758071e-05, + "loss": 2.1141, + "step": 7737 + }, + { + "epoch": 1.57, + "learning_rate": 1.3068805844581893e-05, + "loss": 2.1448, + "step": 7738 + }, + { + "epoch": 1.57, + "learning_rate": 1.3067161810019418e-05, + "loss": 2.0031, + "step": 7739 + }, + { + "epoch": 1.57, + "learning_rate": 1.3065517683942337e-05, + "loss": 2.2619, + "step": 7740 + }, + { + "epoch": 1.57, + "learning_rate": 1.3063873466399705e-05, + "loss": 2.1424, + "step": 7741 + }, + { + "epoch": 1.57, + "learning_rate": 1.306222915744058e-05, + "loss": 2.112, + "step": 7742 + }, + { + "epoch": 1.57, + "learning_rate": 1.306058475711402e-05, + "loss": 2.1897, + "step": 7743 + }, + { + "epoch": 1.57, + "learning_rate": 1.3058940265469093e-05, + "loss": 2.1352, + "step": 7744 + }, + { + "epoch": 1.57, + "learning_rate": 1.3057295682554867e-05, + "loss": 2.1172, + "step": 7745 + }, + { + "epoch": 1.57, + "learning_rate": 1.3055651008420407e-05, + "loss": 2.1723, + "step": 7746 + }, + { + "epoch": 1.57, + "learning_rate": 1.3054006243114785e-05, + "loss": 2.0035, + "step": 7747 + }, + { + "epoch": 1.57, + "learning_rate": 1.3052361386687075e-05, + "loss": 2.0465, + "step": 7748 + }, + { + "epoch": 1.57, + "learning_rate": 1.3050716439186362e-05, + "loss": 2.1758, + "step": 7749 + }, + { + "epoch": 1.57, + "learning_rate": 1.3049071400661716e-05, + "loss": 2.119, + "step": 7750 + }, + { + "epoch": 1.57, + "learning_rate": 1.304742627116223e-05, + "loss": 2.1357, + "step": 7751 + }, + { + "epoch": 1.57, + "learning_rate": 1.3045781050736982e-05, + "loss": 2.1771, + "step": 7752 + }, + { + "epoch": 1.57, + "learning_rate": 1.304413573943506e-05, + "loss": 2.1539, + "step": 7753 + }, + { + "epoch": 1.57, + "learning_rate": 1.3042490337305557e-05, + "loss": 2.2192, + "step": 7754 + }, + { + "epoch": 1.57, + "learning_rate": 1.3040844844397573e-05, + "loss": 2.1127, + "step": 7755 + }, + { + "epoch": 1.57, + "learning_rate": 1.3039199260760193e-05, + "loss": 2.1453, + "step": 7756 + }, + { + "epoch": 1.57, + "learning_rate": 1.3037553586442524e-05, + "loss": 2.1249, + "step": 7757 + }, + { + "epoch": 1.57, + "learning_rate": 1.3035907821493665e-05, + "loss": 2.1551, + "step": 7758 + }, + { + "epoch": 1.57, + "learning_rate": 1.3034261965962722e-05, + "loss": 2.1159, + "step": 7759 + }, + { + "epoch": 1.58, + "learning_rate": 1.3032616019898798e-05, + "loss": 2.1518, + "step": 7760 + }, + { + "epoch": 1.58, + "learning_rate": 1.3030969983351012e-05, + "loss": 2.1112, + "step": 7761 + }, + { + "epoch": 1.58, + "learning_rate": 1.3029323856368467e-05, + "loss": 2.1363, + "step": 7762 + }, + { + "epoch": 1.58, + "learning_rate": 1.3027677639000281e-05, + "loss": 2.0733, + "step": 7763 + }, + { + "epoch": 1.58, + "learning_rate": 1.3026031331295574e-05, + "loss": 2.0996, + "step": 7764 + }, + { + "epoch": 1.58, + "learning_rate": 1.302438493330347e-05, + "loss": 2.1316, + "step": 7765 + }, + { + "epoch": 1.58, + "learning_rate": 1.3022738445073085e-05, + "loss": 2.0711, + "step": 7766 + }, + { + "epoch": 1.58, + "learning_rate": 1.302109186665355e-05, + "loss": 2.0698, + "step": 7767 + }, + { + "epoch": 1.58, + "learning_rate": 1.3019445198093987e-05, + "loss": 2.2169, + "step": 7768 + }, + { + "epoch": 1.58, + "learning_rate": 1.3017798439443538e-05, + "loss": 2.1636, + "step": 7769 + }, + { + "epoch": 1.58, + "learning_rate": 1.3016151590751332e-05, + "loss": 2.0908, + "step": 7770 + }, + { + "epoch": 1.58, + "learning_rate": 1.3014504652066505e-05, + "loss": 2.1584, + "step": 7771 + }, + { + "epoch": 1.58, + "learning_rate": 1.3012857623438194e-05, + "loss": 2.085, + "step": 7772 + }, + { + "epoch": 1.58, + "learning_rate": 1.3011210504915548e-05, + "loss": 2.1068, + "step": 7773 + }, + { + "epoch": 1.58, + "learning_rate": 1.3009563296547709e-05, + "loss": 2.128, + "step": 7774 + }, + { + "epoch": 1.58, + "learning_rate": 1.3007915998383824e-05, + "loss": 2.2014, + "step": 7775 + }, + { + "epoch": 1.58, + "learning_rate": 1.300626861047304e-05, + "loss": 2.0759, + "step": 7776 + }, + { + "epoch": 1.58, + "learning_rate": 1.3004621132864514e-05, + "loss": 2.0906, + "step": 7777 + }, + { + "epoch": 1.58, + "learning_rate": 1.3002973565607404e-05, + "loss": 2.1068, + "step": 7778 + }, + { + "epoch": 1.58, + "learning_rate": 1.3001325908750863e-05, + "loss": 2.1428, + "step": 7779 + }, + { + "epoch": 1.58, + "learning_rate": 1.2999678162344056e-05, + "loss": 2.2051, + "step": 7780 + }, + { + "epoch": 1.58, + "learning_rate": 1.2998030326436144e-05, + "loss": 2.1355, + "step": 7781 + }, + { + "epoch": 1.58, + "learning_rate": 1.2996382401076293e-05, + "loss": 2.1635, + "step": 7782 + }, + { + "epoch": 1.58, + "learning_rate": 1.2994734386313674e-05, + "loss": 2.1315, + "step": 7783 + }, + { + "epoch": 1.58, + "learning_rate": 1.2993086282197455e-05, + "loss": 2.0738, + "step": 7784 + }, + { + "epoch": 1.58, + "learning_rate": 1.2991438088776818e-05, + "loss": 2.0958, + "step": 7785 + }, + { + "epoch": 1.58, + "learning_rate": 1.2989789806100934e-05, + "loss": 2.1009, + "step": 7786 + }, + { + "epoch": 1.58, + "learning_rate": 1.2988141434218981e-05, + "loss": 2.1736, + "step": 7787 + }, + { + "epoch": 1.58, + "learning_rate": 1.2986492973180148e-05, + "loss": 2.2146, + "step": 7788 + }, + { + "epoch": 1.58, + "learning_rate": 1.2984844423033614e-05, + "loss": 2.1393, + "step": 7789 + }, + { + "epoch": 1.58, + "learning_rate": 1.2983195783828568e-05, + "loss": 2.1919, + "step": 7790 + }, + { + "epoch": 1.58, + "learning_rate": 1.2981547055614202e-05, + "loss": 2.1618, + "step": 7791 + }, + { + "epoch": 1.58, + "learning_rate": 1.2979898238439706e-05, + "loss": 2.1261, + "step": 7792 + }, + { + "epoch": 1.58, + "learning_rate": 1.297824933235428e-05, + "loss": 2.1412, + "step": 7793 + }, + { + "epoch": 1.58, + "learning_rate": 1.297660033740712e-05, + "loss": 2.1781, + "step": 7794 + }, + { + "epoch": 1.58, + "learning_rate": 1.2974951253647427e-05, + "loss": 2.1345, + "step": 7795 + }, + { + "epoch": 1.58, + "learning_rate": 1.2973302081124401e-05, + "loss": 2.1372, + "step": 7796 + }, + { + "epoch": 1.58, + "learning_rate": 1.2971652819887252e-05, + "loss": 2.1425, + "step": 7797 + }, + { + "epoch": 1.58, + "learning_rate": 1.297000346998519e-05, + "loss": 2.1828, + "step": 7798 + }, + { + "epoch": 1.58, + "learning_rate": 1.2968354031467425e-05, + "loss": 2.0962, + "step": 7799 + }, + { + "epoch": 1.58, + "learning_rate": 1.296670450438317e-05, + "loss": 2.0984, + "step": 7800 + }, + { + "epoch": 1.58, + "learning_rate": 1.2965054888781642e-05, + "loss": 2.1292, + "step": 7801 + }, + { + "epoch": 1.58, + "learning_rate": 1.296340518471206e-05, + "loss": 2.1495, + "step": 7802 + }, + { + "epoch": 1.58, + "learning_rate": 1.2961755392223652e-05, + "loss": 2.1909, + "step": 7803 + }, + { + "epoch": 1.58, + "learning_rate": 1.2960105511365632e-05, + "loss": 2.1236, + "step": 7804 + }, + { + "epoch": 1.58, + "learning_rate": 1.2958455542187238e-05, + "loss": 2.1316, + "step": 7805 + }, + { + "epoch": 1.58, + "learning_rate": 1.2956805484737694e-05, + "loss": 2.122, + "step": 7806 + }, + { + "epoch": 1.58, + "learning_rate": 1.2955155339066233e-05, + "loss": 2.1418, + "step": 7807 + }, + { + "epoch": 1.58, + "learning_rate": 1.2953505105222088e-05, + "loss": 2.175, + "step": 7808 + }, + { + "epoch": 1.59, + "learning_rate": 1.2951854783254505e-05, + "loss": 2.0761, + "step": 7809 + }, + { + "epoch": 1.59, + "learning_rate": 1.2950204373212718e-05, + "loss": 2.1648, + "step": 7810 + }, + { + "epoch": 1.59, + "learning_rate": 1.294855387514597e-05, + "loss": 2.1352, + "step": 7811 + }, + { + "epoch": 1.59, + "learning_rate": 1.2946903289103505e-05, + "loss": 2.1095, + "step": 7812 + }, + { + "epoch": 1.59, + "learning_rate": 1.2945252615134581e-05, + "loss": 2.0834, + "step": 7813 + }, + { + "epoch": 1.59, + "learning_rate": 1.2943601853288438e-05, + "loss": 2.1927, + "step": 7814 + }, + { + "epoch": 1.59, + "learning_rate": 1.2941951003614339e-05, + "loss": 2.122, + "step": 7815 + }, + { + "epoch": 1.59, + "learning_rate": 1.2940300066161532e-05, + "loss": 2.1108, + "step": 7816 + }, + { + "epoch": 1.59, + "learning_rate": 1.2938649040979279e-05, + "loss": 2.1009, + "step": 7817 + }, + { + "epoch": 1.59, + "learning_rate": 1.2936997928116841e-05, + "loss": 2.1856, + "step": 7818 + }, + { + "epoch": 1.59, + "learning_rate": 1.2935346727623488e-05, + "loss": 2.1395, + "step": 7819 + }, + { + "epoch": 1.59, + "learning_rate": 1.2933695439548475e-05, + "loss": 2.1072, + "step": 7820 + }, + { + "epoch": 1.59, + "learning_rate": 1.2932044063941082e-05, + "loss": 2.1479, + "step": 7821 + }, + { + "epoch": 1.59, + "learning_rate": 1.2930392600850574e-05, + "loss": 2.1368, + "step": 7822 + }, + { + "epoch": 1.59, + "learning_rate": 1.2928741050326233e-05, + "loss": 2.1769, + "step": 7823 + }, + { + "epoch": 1.59, + "learning_rate": 1.2927089412417327e-05, + "loss": 2.2169, + "step": 7824 + }, + { + "epoch": 1.59, + "learning_rate": 1.2925437687173144e-05, + "loss": 2.1058, + "step": 7825 + }, + { + "epoch": 1.59, + "learning_rate": 1.292378587464296e-05, + "loss": 2.169, + "step": 7826 + }, + { + "epoch": 1.59, + "learning_rate": 1.2922133974876063e-05, + "loss": 2.0944, + "step": 7827 + }, + { + "epoch": 1.59, + "learning_rate": 1.2920481987921737e-05, + "loss": 2.148, + "step": 7828 + }, + { + "epoch": 1.59, + "learning_rate": 1.2918829913829281e-05, + "loss": 2.1105, + "step": 7829 + }, + { + "epoch": 1.59, + "learning_rate": 1.291717775264798e-05, + "loss": 2.1045, + "step": 7830 + }, + { + "epoch": 1.59, + "learning_rate": 1.2915525504427133e-05, + "loss": 2.0594, + "step": 7831 + }, + { + "epoch": 1.59, + "learning_rate": 1.2913873169216034e-05, + "loss": 2.2204, + "step": 7832 + }, + { + "epoch": 1.59, + "learning_rate": 1.2912220747063985e-05, + "loss": 2.1132, + "step": 7833 + }, + { + "epoch": 1.59, + "learning_rate": 1.2910568238020296e-05, + "loss": 2.0681, + "step": 7834 + }, + { + "epoch": 1.59, + "learning_rate": 1.2908915642134263e-05, + "loss": 1.9575, + "step": 7835 + }, + { + "epoch": 1.59, + "learning_rate": 1.29072629594552e-05, + "loss": 2.0805, + "step": 7836 + }, + { + "epoch": 1.59, + "learning_rate": 1.2905610190032412e-05, + "loss": 2.1928, + "step": 7837 + }, + { + "epoch": 1.59, + "learning_rate": 1.290395733391522e-05, + "loss": 2.1999, + "step": 7838 + }, + { + "epoch": 1.59, + "learning_rate": 1.2902304391152938e-05, + "loss": 2.1593, + "step": 7839 + }, + { + "epoch": 1.59, + "learning_rate": 1.290065136179488e-05, + "loss": 2.104, + "step": 7840 + }, + { + "epoch": 1.59, + "learning_rate": 1.2898998245890372e-05, + "loss": 2.1324, + "step": 7841 + }, + { + "epoch": 1.59, + "learning_rate": 1.2897345043488737e-05, + "loss": 2.149, + "step": 7842 + }, + { + "epoch": 1.59, + "learning_rate": 1.2895691754639303e-05, + "loss": 2.0993, + "step": 7843 + }, + { + "epoch": 1.59, + "learning_rate": 1.2894038379391393e-05, + "loss": 2.1913, + "step": 7844 + }, + { + "epoch": 1.59, + "learning_rate": 1.2892384917794347e-05, + "loss": 2.1457, + "step": 7845 + }, + { + "epoch": 1.59, + "learning_rate": 1.2890731369897496e-05, + "loss": 2.1672, + "step": 7846 + }, + { + "epoch": 1.59, + "learning_rate": 1.288907773575017e-05, + "loss": 2.074, + "step": 7847 + }, + { + "epoch": 1.59, + "learning_rate": 1.2887424015401717e-05, + "loss": 2.1681, + "step": 7848 + }, + { + "epoch": 1.59, + "learning_rate": 1.2885770208901475e-05, + "loss": 2.2018, + "step": 7849 + }, + { + "epoch": 1.59, + "learning_rate": 1.2884116316298792e-05, + "loss": 2.1501, + "step": 7850 + }, + { + "epoch": 1.59, + "learning_rate": 1.2882462337643011e-05, + "loss": 2.1188, + "step": 7851 + }, + { + "epoch": 1.59, + "learning_rate": 1.288080827298348e-05, + "loss": 2.1187, + "step": 7852 + }, + { + "epoch": 1.59, + "learning_rate": 1.2879154122369557e-05, + "loss": 2.1316, + "step": 7853 + }, + { + "epoch": 1.59, + "learning_rate": 1.2877499885850596e-05, + "loss": 2.0754, + "step": 7854 + }, + { + "epoch": 1.59, + "learning_rate": 1.287584556347595e-05, + "loss": 2.0744, + "step": 7855 + }, + { + "epoch": 1.59, + "learning_rate": 1.2874191155294981e-05, + "loss": 2.0526, + "step": 7856 + }, + { + "epoch": 1.59, + "learning_rate": 1.2872536661357046e-05, + "loss": 2.2006, + "step": 7857 + }, + { + "epoch": 1.6, + "learning_rate": 1.2870882081711522e-05, + "loss": 2.2172, + "step": 7858 + }, + { + "epoch": 1.6, + "learning_rate": 1.2869227416407771e-05, + "loss": 2.0754, + "step": 7859 + }, + { + "epoch": 1.6, + "learning_rate": 1.286757266549516e-05, + "loss": 2.1124, + "step": 7860 + }, + { + "epoch": 1.6, + "learning_rate": 1.2865917829023063e-05, + "loss": 2.1422, + "step": 7861 + }, + { + "epoch": 1.6, + "learning_rate": 1.2864262907040855e-05, + "loss": 2.0972, + "step": 7862 + }, + { + "epoch": 1.6, + "learning_rate": 1.2862607899597915e-05, + "loss": 2.0721, + "step": 7863 + }, + { + "epoch": 1.6, + "learning_rate": 1.2860952806743623e-05, + "loss": 2.1616, + "step": 7864 + }, + { + "epoch": 1.6, + "learning_rate": 1.285929762852736e-05, + "loss": 2.1072, + "step": 7865 + }, + { + "epoch": 1.6, + "learning_rate": 1.2857642364998515e-05, + "loss": 2.1774, + "step": 7866 + }, + { + "epoch": 1.6, + "learning_rate": 1.2855987016206474e-05, + "loss": 2.0885, + "step": 7867 + }, + { + "epoch": 1.6, + "learning_rate": 1.2854331582200627e-05, + "loss": 2.1778, + "step": 7868 + }, + { + "epoch": 1.6, + "learning_rate": 1.2852676063030369e-05, + "loss": 2.187, + "step": 7869 + }, + { + "epoch": 1.6, + "learning_rate": 1.2851020458745097e-05, + "loss": 2.1232, + "step": 7870 + }, + { + "epoch": 1.6, + "learning_rate": 1.2849364769394201e-05, + "loss": 2.0524, + "step": 7871 + }, + { + "epoch": 1.6, + "learning_rate": 1.2847708995027087e-05, + "loss": 2.1352, + "step": 7872 + }, + { + "epoch": 1.6, + "learning_rate": 1.2846053135693159e-05, + "loss": 2.1099, + "step": 7873 + }, + { + "epoch": 1.6, + "learning_rate": 1.2844397191441825e-05, + "loss": 2.0908, + "step": 7874 + }, + { + "epoch": 1.6, + "learning_rate": 1.2842741162322487e-05, + "loss": 2.0724, + "step": 7875 + }, + { + "epoch": 1.6, + "learning_rate": 1.2841085048384557e-05, + "loss": 2.1108, + "step": 7876 + }, + { + "epoch": 1.6, + "learning_rate": 1.2839428849677453e-05, + "loss": 2.1127, + "step": 7877 + }, + { + "epoch": 1.6, + "learning_rate": 1.2837772566250588e-05, + "loss": 2.1112, + "step": 7878 + }, + { + "epoch": 1.6, + "learning_rate": 1.2836116198153381e-05, + "loss": 2.1512, + "step": 7879 + }, + { + "epoch": 1.6, + "learning_rate": 1.283445974543525e-05, + "loss": 2.0342, + "step": 7880 + }, + { + "epoch": 1.6, + "learning_rate": 1.283280320814562e-05, + "loss": 2.073, + "step": 7881 + }, + { + "epoch": 1.6, + "learning_rate": 1.2831146586333918e-05, + "loss": 2.1678, + "step": 7882 + }, + { + "epoch": 1.6, + "learning_rate": 1.2829489880049575e-05, + "loss": 2.1149, + "step": 7883 + }, + { + "epoch": 1.6, + "learning_rate": 1.2827833089342016e-05, + "loss": 2.1729, + "step": 7884 + }, + { + "epoch": 1.6, + "learning_rate": 1.2826176214260677e-05, + "loss": 2.0736, + "step": 7885 + }, + { + "epoch": 1.6, + "learning_rate": 1.2824519254854996e-05, + "loss": 2.164, + "step": 7886 + }, + { + "epoch": 1.6, + "learning_rate": 1.2822862211174412e-05, + "loss": 2.0595, + "step": 7887 + }, + { + "epoch": 1.6, + "learning_rate": 1.2821205083268362e-05, + "loss": 2.2282, + "step": 7888 + }, + { + "epoch": 1.6, + "learning_rate": 1.281954787118629e-05, + "loss": 2.1634, + "step": 7889 + }, + { + "epoch": 1.6, + "learning_rate": 1.2817890574977648e-05, + "loss": 2.1049, + "step": 7890 + }, + { + "epoch": 1.6, + "learning_rate": 1.2816233194691876e-05, + "loss": 2.1352, + "step": 7891 + }, + { + "epoch": 1.6, + "learning_rate": 1.2814575730378429e-05, + "loss": 2.2282, + "step": 7892 + }, + { + "epoch": 1.6, + "learning_rate": 1.2812918182086764e-05, + "loss": 2.1262, + "step": 7893 + }, + { + "epoch": 1.6, + "learning_rate": 1.2811260549866336e-05, + "loss": 2.0583, + "step": 7894 + }, + { + "epoch": 1.6, + "learning_rate": 1.28096028337666e-05, + "loss": 2.1376, + "step": 7895 + }, + { + "epoch": 1.6, + "learning_rate": 1.2807945033837013e-05, + "loss": 2.1443, + "step": 7896 + }, + { + "epoch": 1.6, + "learning_rate": 1.280628715012705e-05, + "loss": 2.1609, + "step": 7897 + }, + { + "epoch": 1.6, + "learning_rate": 1.2804629182686172e-05, + "loss": 2.0998, + "step": 7898 + }, + { + "epoch": 1.6, + "learning_rate": 1.2802971131563849e-05, + "loss": 2.0918, + "step": 7899 + }, + { + "epoch": 1.6, + "learning_rate": 1.2801312996809547e-05, + "loss": 2.0879, + "step": 7900 + }, + { + "epoch": 1.6, + "learning_rate": 1.2799654778472741e-05, + "loss": 2.1377, + "step": 7901 + }, + { + "epoch": 1.6, + "learning_rate": 1.2797996476602913e-05, + "loss": 2.1623, + "step": 7902 + }, + { + "epoch": 1.6, + "learning_rate": 1.2796338091249537e-05, + "loss": 2.0944, + "step": 7903 + }, + { + "epoch": 1.6, + "learning_rate": 1.2794679622462094e-05, + "loss": 2.1233, + "step": 7904 + }, + { + "epoch": 1.6, + "learning_rate": 1.2793021070290065e-05, + "loss": 2.114, + "step": 7905 + }, + { + "epoch": 1.6, + "learning_rate": 1.2791362434782944e-05, + "loss": 2.1321, + "step": 7906 + }, + { + "epoch": 1.6, + "learning_rate": 1.2789703715990212e-05, + "loss": 2.175, + "step": 7907 + }, + { + "epoch": 1.61, + "learning_rate": 1.2788044913961366e-05, + "loss": 2.1585, + "step": 7908 + }, + { + "epoch": 1.61, + "learning_rate": 1.2786386028745894e-05, + "loss": 2.1811, + "step": 7909 + }, + { + "epoch": 1.61, + "learning_rate": 1.2784727060393294e-05, + "loss": 2.135, + "step": 7910 + }, + { + "epoch": 1.61, + "learning_rate": 1.2783068008953066e-05, + "loss": 2.1297, + "step": 7911 + }, + { + "epoch": 1.61, + "learning_rate": 1.278140887447471e-05, + "loss": 2.0902, + "step": 7912 + }, + { + "epoch": 1.61, + "learning_rate": 1.2779749657007727e-05, + "loss": 2.1771, + "step": 7913 + }, + { + "epoch": 1.61, + "learning_rate": 1.277809035660163e-05, + "loss": 2.141, + "step": 7914 + }, + { + "epoch": 1.61, + "learning_rate": 1.2776430973305918e-05, + "loss": 2.1867, + "step": 7915 + }, + { + "epoch": 1.61, + "learning_rate": 1.2774771507170106e-05, + "loss": 2.116, + "step": 7916 + }, + { + "epoch": 1.61, + "learning_rate": 1.277311195824371e-05, + "loss": 2.1396, + "step": 7917 + }, + { + "epoch": 1.61, + "learning_rate": 1.2771452326576243e-05, + "loss": 2.0416, + "step": 7918 + }, + { + "epoch": 1.61, + "learning_rate": 1.2769792612217226e-05, + "loss": 2.0787, + "step": 7919 + }, + { + "epoch": 1.61, + "learning_rate": 1.2768132815216177e-05, + "loss": 2.1001, + "step": 7920 + }, + { + "epoch": 1.61, + "learning_rate": 1.2766472935622614e-05, + "loss": 2.088, + "step": 7921 + }, + { + "epoch": 1.61, + "learning_rate": 1.2764812973486074e-05, + "loss": 2.0949, + "step": 7922 + }, + { + "epoch": 1.61, + "learning_rate": 1.276315292885608e-05, + "loss": 2.1277, + "step": 7923 + }, + { + "epoch": 1.61, + "learning_rate": 1.276149280178216e-05, + "loss": 2.0989, + "step": 7924 + }, + { + "epoch": 1.61, + "learning_rate": 1.275983259231385e-05, + "loss": 2.2156, + "step": 7925 + }, + { + "epoch": 1.61, + "learning_rate": 1.2758172300500682e-05, + "loss": 2.1406, + "step": 7926 + }, + { + "epoch": 1.61, + "learning_rate": 1.2756511926392202e-05, + "loss": 2.121, + "step": 7927 + }, + { + "epoch": 1.61, + "learning_rate": 1.275485147003794e-05, + "loss": 2.1449, + "step": 7928 + }, + { + "epoch": 1.61, + "learning_rate": 1.2753190931487444e-05, + "loss": 2.1203, + "step": 7929 + }, + { + "epoch": 1.61, + "learning_rate": 1.2751530310790263e-05, + "loss": 2.1239, + "step": 7930 + }, + { + "epoch": 1.61, + "learning_rate": 1.274986960799594e-05, + "loss": 2.0475, + "step": 7931 + }, + { + "epoch": 1.61, + "learning_rate": 1.2748208823154024e-05, + "loss": 2.0704, + "step": 7932 + }, + { + "epoch": 1.61, + "learning_rate": 1.2746547956314071e-05, + "loss": 2.0787, + "step": 7933 + }, + { + "epoch": 1.61, + "learning_rate": 1.2744887007525637e-05, + "loss": 2.1537, + "step": 7934 + }, + { + "epoch": 1.61, + "learning_rate": 1.2743225976838277e-05, + "loss": 2.1135, + "step": 7935 + }, + { + "epoch": 1.61, + "learning_rate": 1.274156486430155e-05, + "loss": 2.0973, + "step": 7936 + }, + { + "epoch": 1.61, + "learning_rate": 1.2739903669965019e-05, + "loss": 2.1637, + "step": 7937 + }, + { + "epoch": 1.61, + "learning_rate": 1.2738242393878253e-05, + "loss": 2.1416, + "step": 7938 + }, + { + "epoch": 1.61, + "learning_rate": 1.2736581036090816e-05, + "loss": 2.0975, + "step": 7939 + }, + { + "epoch": 1.61, + "learning_rate": 1.2734919596652277e-05, + "loss": 2.0879, + "step": 7940 + }, + { + "epoch": 1.61, + "learning_rate": 1.273325807561221e-05, + "loss": 2.0918, + "step": 7941 + }, + { + "epoch": 1.61, + "learning_rate": 1.2731596473020187e-05, + "loss": 2.1493, + "step": 7942 + }, + { + "epoch": 1.61, + "learning_rate": 1.272993478892579e-05, + "loss": 2.1335, + "step": 7943 + }, + { + "epoch": 1.61, + "learning_rate": 1.2728273023378595e-05, + "loss": 2.133, + "step": 7944 + }, + { + "epoch": 1.61, + "learning_rate": 1.2726611176428185e-05, + "loss": 2.056, + "step": 7945 + }, + { + "epoch": 1.61, + "learning_rate": 1.272494924812414e-05, + "loss": 2.1021, + "step": 7946 + }, + { + "epoch": 1.61, + "learning_rate": 1.2723287238516053e-05, + "loss": 2.1561, + "step": 7947 + }, + { + "epoch": 1.61, + "learning_rate": 1.2721625147653511e-05, + "loss": 2.0499, + "step": 7948 + }, + { + "epoch": 1.61, + "learning_rate": 1.2719962975586104e-05, + "loss": 2.114, + "step": 7949 + }, + { + "epoch": 1.61, + "learning_rate": 1.2718300722363431e-05, + "loss": 2.1186, + "step": 7950 + }, + { + "epoch": 1.61, + "learning_rate": 1.271663838803508e-05, + "loss": 2.1746, + "step": 7951 + }, + { + "epoch": 1.61, + "learning_rate": 1.271497597265066e-05, + "loss": 2.0165, + "step": 7952 + }, + { + "epoch": 1.61, + "learning_rate": 1.2713313476259765e-05, + "loss": 2.1845, + "step": 7953 + }, + { + "epoch": 1.61, + "learning_rate": 1.2711650898912003e-05, + "loss": 2.162, + "step": 7954 + }, + { + "epoch": 1.61, + "learning_rate": 1.2709988240656972e-05, + "loss": 2.1268, + "step": 7955 + }, + { + "epoch": 1.61, + "learning_rate": 1.2708325501544292e-05, + "loss": 2.166, + "step": 7956 + }, + { + "epoch": 1.62, + "learning_rate": 1.2706662681623566e-05, + "loss": 2.0912, + "step": 7957 + }, + { + "epoch": 1.62, + "learning_rate": 1.2704999780944416e-05, + "loss": 2.0812, + "step": 7958 + }, + { + "epoch": 1.62, + "learning_rate": 1.2703336799556447e-05, + "loss": 2.1061, + "step": 7959 + }, + { + "epoch": 1.62, + "learning_rate": 1.2701673737509282e-05, + "loss": 2.1428, + "step": 7960 + }, + { + "epoch": 1.62, + "learning_rate": 1.270001059485254e-05, + "loss": 2.1029, + "step": 7961 + }, + { + "epoch": 1.62, + "learning_rate": 1.2698347371635852e-05, + "loss": 2.1292, + "step": 7962 + }, + { + "epoch": 1.62, + "learning_rate": 1.2696684067908833e-05, + "loss": 2.1414, + "step": 7963 + }, + { + "epoch": 1.62, + "learning_rate": 1.2695020683721118e-05, + "loss": 2.0944, + "step": 7964 + }, + { + "epoch": 1.62, + "learning_rate": 1.2693357219122326e-05, + "loss": 2.1665, + "step": 7965 + }, + { + "epoch": 1.62, + "learning_rate": 1.2691693674162107e-05, + "loss": 2.1082, + "step": 7966 + }, + { + "epoch": 1.62, + "learning_rate": 1.2690030048890085e-05, + "loss": 2.1396, + "step": 7967 + }, + { + "epoch": 1.62, + "learning_rate": 1.2688366343355899e-05, + "loss": 2.1133, + "step": 7968 + }, + { + "epoch": 1.62, + "learning_rate": 1.268670255760919e-05, + "loss": 2.1989, + "step": 7969 + }, + { + "epoch": 1.62, + "learning_rate": 1.2685038691699592e-05, + "loss": 2.1462, + "step": 7970 + }, + { + "epoch": 1.62, + "learning_rate": 1.2683374745676765e-05, + "loss": 2.1155, + "step": 7971 + }, + { + "epoch": 1.62, + "learning_rate": 1.2681710719590345e-05, + "loss": 2.1108, + "step": 7972 + }, + { + "epoch": 1.62, + "learning_rate": 1.2680046613489985e-05, + "loss": 2.1603, + "step": 7973 + }, + { + "epoch": 1.62, + "learning_rate": 1.2678382427425335e-05, + "loss": 2.2446, + "step": 7974 + }, + { + "epoch": 1.62, + "learning_rate": 1.2676718161446053e-05, + "loss": 2.1508, + "step": 7975 + }, + { + "epoch": 1.62, + "learning_rate": 1.267505381560179e-05, + "loss": 2.0979, + "step": 7976 + }, + { + "epoch": 1.62, + "learning_rate": 1.2673389389942208e-05, + "loss": 2.1311, + "step": 7977 + }, + { + "epoch": 1.62, + "learning_rate": 1.2671724884516972e-05, + "loss": 2.138, + "step": 7978 + }, + { + "epoch": 1.62, + "learning_rate": 1.267006029937574e-05, + "loss": 2.1219, + "step": 7979 + }, + { + "epoch": 1.62, + "learning_rate": 1.2668395634568175e-05, + "loss": 2.1445, + "step": 7980 + }, + { + "epoch": 1.62, + "learning_rate": 1.2666730890143955e-05, + "loss": 2.1431, + "step": 7981 + }, + { + "epoch": 1.62, + "learning_rate": 1.2665066066152745e-05, + "loss": 2.0563, + "step": 7982 + }, + { + "epoch": 1.62, + "learning_rate": 1.266340116264422e-05, + "loss": 2.1389, + "step": 7983 + }, + { + "epoch": 1.62, + "learning_rate": 1.2661736179668056e-05, + "loss": 2.0819, + "step": 7984 + }, + { + "epoch": 1.62, + "learning_rate": 1.2660071117273924e-05, + "loss": 2.1278, + "step": 7985 + }, + { + "epoch": 1.62, + "learning_rate": 1.2658405975511514e-05, + "loss": 2.1163, + "step": 7986 + }, + { + "epoch": 1.62, + "learning_rate": 1.2656740754430506e-05, + "loss": 2.1641, + "step": 7987 + }, + { + "epoch": 1.62, + "learning_rate": 1.2655075454080584e-05, + "loss": 2.1322, + "step": 7988 + }, + { + "epoch": 1.62, + "learning_rate": 1.2653410074511434e-05, + "loss": 2.1784, + "step": 7989 + }, + { + "epoch": 1.62, + "learning_rate": 1.2651744615772742e-05, + "loss": 2.1131, + "step": 7990 + }, + { + "epoch": 1.62, + "learning_rate": 1.2650079077914212e-05, + "loss": 2.0622, + "step": 7991 + }, + { + "epoch": 1.62, + "learning_rate": 1.264841346098553e-05, + "loss": 2.1703, + "step": 7992 + }, + { + "epoch": 1.62, + "learning_rate": 1.2646747765036396e-05, + "loss": 2.1359, + "step": 7993 + }, + { + "epoch": 1.62, + "learning_rate": 1.26450819901165e-05, + "loss": 2.0576, + "step": 7994 + }, + { + "epoch": 1.62, + "learning_rate": 1.2643416136275557e-05, + "loss": 2.1712, + "step": 7995 + }, + { + "epoch": 1.62, + "learning_rate": 1.2641750203563266e-05, + "loss": 2.13, + "step": 7996 + }, + { + "epoch": 1.62, + "learning_rate": 1.2640084192029329e-05, + "loss": 2.1508, + "step": 7997 + }, + { + "epoch": 1.62, + "learning_rate": 1.2638418101723461e-05, + "loss": 2.1475, + "step": 7998 + }, + { + "epoch": 1.62, + "learning_rate": 1.263675193269537e-05, + "loss": 2.1607, + "step": 7999 + }, + { + "epoch": 1.62, + "learning_rate": 1.2635085684994768e-05, + "loss": 2.1709, + "step": 8000 + }, + { + "epoch": 1.62, + "learning_rate": 1.263341935867137e-05, + "loss": 2.1857, + "step": 8001 + }, + { + "epoch": 1.62, + "learning_rate": 1.2631752953774901e-05, + "loss": 2.1349, + "step": 8002 + }, + { + "epoch": 1.62, + "learning_rate": 1.2630086470355073e-05, + "loss": 2.1594, + "step": 8003 + }, + { + "epoch": 1.62, + "learning_rate": 1.2628419908461613e-05, + "loss": 2.1024, + "step": 8004 + }, + { + "epoch": 1.62, + "learning_rate": 1.2626753268144243e-05, + "loss": 2.0389, + "step": 8005 + }, + { + "epoch": 1.63, + "learning_rate": 1.2625086549452694e-05, + "loss": 2.0645, + "step": 8006 + }, + { + "epoch": 1.63, + "learning_rate": 1.2623419752436694e-05, + "loss": 2.1904, + "step": 8007 + }, + { + "epoch": 1.63, + "learning_rate": 1.2621752877145977e-05, + "loss": 2.1388, + "step": 8008 + }, + { + "epoch": 1.63, + "learning_rate": 1.2620085923630274e-05, + "loss": 2.0864, + "step": 8009 + }, + { + "epoch": 1.63, + "learning_rate": 1.261841889193932e-05, + "loss": 2.1269, + "step": 8010 + }, + { + "epoch": 1.63, + "learning_rate": 1.2616751782122863e-05, + "loss": 2.1169, + "step": 8011 + }, + { + "epoch": 1.63, + "learning_rate": 1.2615084594230638e-05, + "loss": 2.1887, + "step": 8012 + }, + { + "epoch": 1.63, + "learning_rate": 1.261341732831239e-05, + "loss": 2.1275, + "step": 8013 + }, + { + "epoch": 1.63, + "learning_rate": 1.2611749984417863e-05, + "loss": 2.0409, + "step": 8014 + }, + { + "epoch": 1.63, + "learning_rate": 1.2610082562596804e-05, + "loss": 2.1228, + "step": 8015 + }, + { + "epoch": 1.63, + "learning_rate": 1.2608415062898971e-05, + "loss": 2.1149, + "step": 8016 + }, + { + "epoch": 1.63, + "learning_rate": 1.2606747485374112e-05, + "loss": 2.1083, + "step": 8017 + }, + { + "epoch": 1.63, + "learning_rate": 1.2605079830071983e-05, + "loss": 2.129, + "step": 8018 + }, + { + "epoch": 1.63, + "learning_rate": 1.260341209704234e-05, + "loss": 2.0977, + "step": 8019 + }, + { + "epoch": 1.63, + "learning_rate": 1.2601744286334945e-05, + "loss": 2.02, + "step": 8020 + }, + { + "epoch": 1.63, + "learning_rate": 1.2600076397999561e-05, + "loss": 2.1158, + "step": 8021 + }, + { + "epoch": 1.63, + "learning_rate": 1.2598408432085951e-05, + "loss": 2.1066, + "step": 8022 + }, + { + "epoch": 1.63, + "learning_rate": 1.2596740388643885e-05, + "loss": 2.105, + "step": 8023 + }, + { + "epoch": 1.63, + "learning_rate": 1.2595072267723127e-05, + "loss": 2.1034, + "step": 8024 + }, + { + "epoch": 1.63, + "learning_rate": 1.2593404069373452e-05, + "loss": 2.1401, + "step": 8025 + }, + { + "epoch": 1.63, + "learning_rate": 1.2591735793644633e-05, + "loss": 2.0672, + "step": 8026 + }, + { + "epoch": 1.63, + "learning_rate": 1.2590067440586449e-05, + "loss": 2.1237, + "step": 8027 + }, + { + "epoch": 1.63, + "learning_rate": 1.2588399010248674e-05, + "loss": 2.0367, + "step": 8028 + }, + { + "epoch": 1.63, + "learning_rate": 1.2586730502681092e-05, + "loss": 2.0712, + "step": 8029 + }, + { + "epoch": 1.63, + "learning_rate": 1.2585061917933482e-05, + "loss": 2.0957, + "step": 8030 + }, + { + "epoch": 1.63, + "learning_rate": 1.2583393256055632e-05, + "loss": 2.13, + "step": 8031 + }, + { + "epoch": 1.63, + "learning_rate": 1.2581724517097333e-05, + "loss": 2.1859, + "step": 8032 + }, + { + "epoch": 1.63, + "learning_rate": 1.2580055701108371e-05, + "loss": 2.1113, + "step": 8033 + }, + { + "epoch": 1.63, + "learning_rate": 1.257838680813854e-05, + "loss": 2.1205, + "step": 8034 + }, + { + "epoch": 1.63, + "learning_rate": 1.2576717838237629e-05, + "loss": 2.0755, + "step": 8035 + }, + { + "epoch": 1.63, + "learning_rate": 1.2575048791455445e-05, + "loss": 2.096, + "step": 8036 + }, + { + "epoch": 1.63, + "learning_rate": 1.257337966784178e-05, + "loss": 2.1394, + "step": 8037 + }, + { + "epoch": 1.63, + "learning_rate": 1.2571710467446436e-05, + "loss": 2.2226, + "step": 8038 + }, + { + "epoch": 1.63, + "learning_rate": 1.2570041190319222e-05, + "loss": 2.1045, + "step": 8039 + }, + { + "epoch": 1.63, + "learning_rate": 1.2568371836509936e-05, + "loss": 2.124, + "step": 8040 + }, + { + "epoch": 1.63, + "learning_rate": 1.2566702406068394e-05, + "loss": 2.0943, + "step": 8041 + }, + { + "epoch": 1.63, + "learning_rate": 1.25650328990444e-05, + "loss": 2.1615, + "step": 8042 + }, + { + "epoch": 1.63, + "learning_rate": 1.2563363315487773e-05, + "loss": 2.0393, + "step": 8043 + }, + { + "epoch": 1.63, + "learning_rate": 1.2561693655448321e-05, + "loss": 2.1622, + "step": 8044 + }, + { + "epoch": 1.63, + "learning_rate": 1.256002391897587e-05, + "loss": 2.1657, + "step": 8045 + }, + { + "epoch": 1.63, + "learning_rate": 1.2558354106120234e-05, + "loss": 2.1314, + "step": 8046 + }, + { + "epoch": 1.63, + "learning_rate": 1.2556684216931239e-05, + "loss": 2.1828, + "step": 8047 + }, + { + "epoch": 1.63, + "learning_rate": 1.2555014251458704e-05, + "loss": 2.1222, + "step": 8048 + }, + { + "epoch": 1.63, + "learning_rate": 1.2553344209752455e-05, + "loss": 2.1116, + "step": 8049 + }, + { + "epoch": 1.63, + "learning_rate": 1.2551674091862332e-05, + "loss": 2.1121, + "step": 8050 + }, + { + "epoch": 1.63, + "learning_rate": 1.2550003897838155e-05, + "loss": 2.1551, + "step": 8051 + }, + { + "epoch": 1.63, + "learning_rate": 1.2548333627729763e-05, + "loss": 2.2298, + "step": 8052 + }, + { + "epoch": 1.63, + "learning_rate": 1.2546663281586988e-05, + "loss": 2.1119, + "step": 8053 + }, + { + "epoch": 1.63, + "learning_rate": 1.2544992859459667e-05, + "loss": 2.1288, + "step": 8054 + }, + { + "epoch": 1.64, + "learning_rate": 1.2543322361397645e-05, + "loss": 2.1043, + "step": 8055 + }, + { + "epoch": 1.64, + "learning_rate": 1.2541651787450764e-05, + "loss": 2.0979, + "step": 8056 + }, + { + "epoch": 1.64, + "learning_rate": 1.2539981137668866e-05, + "loss": 2.1668, + "step": 8057 + }, + { + "epoch": 1.64, + "learning_rate": 1.2538310412101797e-05, + "loss": 2.077, + "step": 8058 + }, + { + "epoch": 1.64, + "learning_rate": 1.2536639610799408e-05, + "loss": 2.1612, + "step": 8059 + }, + { + "epoch": 1.64, + "learning_rate": 1.2534968733811554e-05, + "loss": 2.1641, + "step": 8060 + }, + { + "epoch": 1.64, + "learning_rate": 1.2533297781188085e-05, + "loss": 2.0957, + "step": 8061 + }, + { + "epoch": 1.64, + "learning_rate": 1.2531626752978856e-05, + "loss": 2.1693, + "step": 8062 + }, + { + "epoch": 1.64, + "learning_rate": 1.252995564923373e-05, + "loss": 2.1205, + "step": 8063 + }, + { + "epoch": 1.64, + "learning_rate": 1.2528284470002561e-05, + "loss": 2.0932, + "step": 8064 + }, + { + "epoch": 1.64, + "learning_rate": 1.2526613215335216e-05, + "loss": 2.1644, + "step": 8065 + }, + { + "epoch": 1.64, + "learning_rate": 1.252494188528156e-05, + "loss": 2.0963, + "step": 8066 + }, + { + "epoch": 1.64, + "learning_rate": 1.2523270479891463e-05, + "loss": 2.1383, + "step": 8067 + }, + { + "epoch": 1.64, + "learning_rate": 1.2521598999214788e-05, + "loss": 2.1342, + "step": 8068 + }, + { + "epoch": 1.64, + "learning_rate": 1.2519927443301407e-05, + "loss": 2.0462, + "step": 8069 + }, + { + "epoch": 1.64, + "learning_rate": 1.2518255812201203e-05, + "loss": 2.1084, + "step": 8070 + }, + { + "epoch": 1.64, + "learning_rate": 1.2516584105964045e-05, + "loss": 2.1032, + "step": 8071 + }, + { + "epoch": 1.64, + "learning_rate": 1.2514912324639811e-05, + "loss": 2.0959, + "step": 8072 + }, + { + "epoch": 1.64, + "learning_rate": 1.2513240468278387e-05, + "loss": 2.1432, + "step": 8073 + }, + { + "epoch": 1.64, + "learning_rate": 1.2511568536929648e-05, + "loss": 2.1322, + "step": 8074 + }, + { + "epoch": 1.64, + "learning_rate": 1.2509896530643488e-05, + "loss": 2.1043, + "step": 8075 + }, + { + "epoch": 1.64, + "learning_rate": 1.250822444946979e-05, + "loss": 2.1496, + "step": 8076 + }, + { + "epoch": 1.64, + "learning_rate": 1.2506552293458444e-05, + "loss": 2.147, + "step": 8077 + }, + { + "epoch": 1.64, + "learning_rate": 1.2504880062659342e-05, + "loss": 2.0841, + "step": 8078 + }, + { + "epoch": 1.64, + "learning_rate": 1.2503207757122375e-05, + "loss": 2.1342, + "step": 8079 + }, + { + "epoch": 1.64, + "learning_rate": 1.2501535376897448e-05, + "loss": 2.084, + "step": 8080 + }, + { + "epoch": 1.64, + "learning_rate": 1.2499862922034453e-05, + "loss": 2.1444, + "step": 8081 + }, + { + "epoch": 1.64, + "learning_rate": 1.2498190392583292e-05, + "loss": 2.1654, + "step": 8082 + }, + { + "epoch": 1.64, + "learning_rate": 1.2496517788593869e-05, + "loss": 2.1719, + "step": 8083 + }, + { + "epoch": 1.64, + "learning_rate": 1.2494845110116085e-05, + "loss": 2.1557, + "step": 8084 + }, + { + "epoch": 1.64, + "learning_rate": 1.2493172357199856e-05, + "loss": 2.0653, + "step": 8085 + }, + { + "epoch": 1.64, + "learning_rate": 1.2491499529895083e-05, + "loss": 2.1465, + "step": 8086 + }, + { + "epoch": 1.64, + "learning_rate": 1.2489826628251687e-05, + "loss": 2.1459, + "step": 8087 + }, + { + "epoch": 1.64, + "learning_rate": 1.2488153652319575e-05, + "loss": 2.1023, + "step": 8088 + }, + { + "epoch": 1.64, + "learning_rate": 1.2486480602148665e-05, + "loss": 2.1553, + "step": 8089 + }, + { + "epoch": 1.64, + "learning_rate": 1.2484807477788877e-05, + "loss": 2.1085, + "step": 8090 + }, + { + "epoch": 1.64, + "learning_rate": 1.248313427929013e-05, + "loss": 2.0755, + "step": 8091 + }, + { + "epoch": 1.64, + "learning_rate": 1.2481461006702349e-05, + "loss": 2.1455, + "step": 8092 + }, + { + "epoch": 1.64, + "learning_rate": 1.247978766007546e-05, + "loss": 2.0879, + "step": 8093 + }, + { + "epoch": 1.64, + "learning_rate": 1.2478114239459384e-05, + "loss": 2.0979, + "step": 8094 + }, + { + "epoch": 1.64, + "learning_rate": 1.247644074490406e-05, + "loss": 2.0646, + "step": 8095 + }, + { + "epoch": 1.64, + "learning_rate": 1.2474767176459415e-05, + "loss": 2.1184, + "step": 8096 + }, + { + "epoch": 1.64, + "learning_rate": 1.2473093534175384e-05, + "loss": 2.1413, + "step": 8097 + }, + { + "epoch": 1.64, + "learning_rate": 1.2471419818101904e-05, + "loss": 2.115, + "step": 8098 + }, + { + "epoch": 1.64, + "learning_rate": 1.2469746028288908e-05, + "loss": 2.1807, + "step": 8099 + }, + { + "epoch": 1.64, + "learning_rate": 1.2468072164786342e-05, + "loss": 2.171, + "step": 8100 + }, + { + "epoch": 1.64, + "learning_rate": 1.2466398227644153e-05, + "loss": 2.1571, + "step": 8101 + }, + { + "epoch": 1.64, + "learning_rate": 1.2464724216912277e-05, + "loss": 2.0939, + "step": 8102 + }, + { + "epoch": 1.64, + "learning_rate": 1.2463050132640666e-05, + "loss": 2.1516, + "step": 8103 + }, + { + "epoch": 1.64, + "learning_rate": 1.2461375974879268e-05, + "loss": 2.2197, + "step": 8104 + }, + { + "epoch": 1.65, + "learning_rate": 1.245970174367804e-05, + "loss": 2.1057, + "step": 8105 + }, + { + "epoch": 1.65, + "learning_rate": 1.2458027439086927e-05, + "loss": 2.142, + "step": 8106 + }, + { + "epoch": 1.65, + "learning_rate": 1.245635306115589e-05, + "loss": 2.123, + "step": 8107 + }, + { + "epoch": 1.65, + "learning_rate": 1.2454678609934889e-05, + "loss": 2.1232, + "step": 8108 + }, + { + "epoch": 1.65, + "learning_rate": 1.2453004085473878e-05, + "loss": 2.1466, + "step": 8109 + }, + { + "epoch": 1.65, + "learning_rate": 1.2451329487822827e-05, + "loss": 2.0892, + "step": 8110 + }, + { + "epoch": 1.65, + "learning_rate": 1.2449654817031697e-05, + "loss": 2.1468, + "step": 8111 + }, + { + "epoch": 1.65, + "learning_rate": 1.2447980073150459e-05, + "loss": 2.133, + "step": 8112 + }, + { + "epoch": 1.65, + "learning_rate": 1.2446305256229076e-05, + "loss": 2.1171, + "step": 8113 + }, + { + "epoch": 1.65, + "learning_rate": 1.2444630366317518e-05, + "loss": 2.1774, + "step": 8114 + }, + { + "epoch": 1.65, + "learning_rate": 1.2442955403465768e-05, + "loss": 2.057, + "step": 8115 + }, + { + "epoch": 1.65, + "learning_rate": 1.2441280367723797e-05, + "loss": 2.246, + "step": 8116 + }, + { + "epoch": 1.65, + "learning_rate": 1.243960525914158e-05, + "loss": 2.117, + "step": 8117 + }, + { + "epoch": 1.65, + "learning_rate": 1.2437930077769102e-05, + "loss": 2.167, + "step": 8118 + }, + { + "epoch": 1.65, + "learning_rate": 1.2436254823656338e-05, + "loss": 2.1321, + "step": 8119 + }, + { + "epoch": 1.65, + "learning_rate": 1.2434579496853278e-05, + "loss": 2.1347, + "step": 8120 + }, + { + "epoch": 1.65, + "learning_rate": 1.243290409740991e-05, + "loss": 2.1471, + "step": 8121 + }, + { + "epoch": 1.65, + "learning_rate": 1.2431228625376221e-05, + "loss": 2.0923, + "step": 8122 + }, + { + "epoch": 1.65, + "learning_rate": 1.2429553080802197e-05, + "loss": 2.1018, + "step": 8123 + }, + { + "epoch": 1.65, + "learning_rate": 1.2427877463737837e-05, + "loss": 2.0647, + "step": 8124 + }, + { + "epoch": 1.65, + "learning_rate": 1.2426201774233136e-05, + "loss": 2.1619, + "step": 8125 + }, + { + "epoch": 1.65, + "learning_rate": 1.2424526012338087e-05, + "loss": 2.0452, + "step": 8126 + }, + { + "epoch": 1.65, + "learning_rate": 1.2422850178102695e-05, + "loss": 2.1524, + "step": 8127 + }, + { + "epoch": 1.65, + "learning_rate": 1.2421174271576958e-05, + "loss": 2.1002, + "step": 8128 + }, + { + "epoch": 1.65, + "learning_rate": 1.241949829281088e-05, + "loss": 2.1193, + "step": 8129 + }, + { + "epoch": 1.65, + "learning_rate": 1.2417822241854466e-05, + "loss": 2.2114, + "step": 8130 + }, + { + "epoch": 1.65, + "learning_rate": 1.241614611875773e-05, + "loss": 2.1845, + "step": 8131 + }, + { + "epoch": 1.65, + "learning_rate": 1.241446992357068e-05, + "loss": 2.0889, + "step": 8132 + }, + { + "epoch": 1.65, + "learning_rate": 1.2412793656343325e-05, + "loss": 2.1957, + "step": 8133 + }, + { + "epoch": 1.65, + "learning_rate": 1.2411117317125676e-05, + "loss": 2.1328, + "step": 8134 + }, + { + "epoch": 1.65, + "learning_rate": 1.2409440905967763e-05, + "loss": 2.0961, + "step": 8135 + }, + { + "epoch": 1.65, + "learning_rate": 1.2407764422919595e-05, + "loss": 2.0745, + "step": 8136 + }, + { + "epoch": 1.65, + "learning_rate": 1.2406087868031193e-05, + "loss": 2.0584, + "step": 8137 + }, + { + "epoch": 1.65, + "learning_rate": 1.2404411241352581e-05, + "loss": 2.1278, + "step": 8138 + }, + { + "epoch": 1.65, + "learning_rate": 1.240273454293379e-05, + "loss": 2.1566, + "step": 8139 + }, + { + "epoch": 1.65, + "learning_rate": 1.2401057772824843e-05, + "loss": 2.0674, + "step": 8140 + }, + { + "epoch": 1.65, + "learning_rate": 1.2399380931075769e-05, + "loss": 2.1623, + "step": 8141 + }, + { + "epoch": 1.65, + "learning_rate": 1.2397704017736603e-05, + "loss": 2.083, + "step": 8142 + }, + { + "epoch": 1.65, + "learning_rate": 1.239602703285737e-05, + "loss": 2.1763, + "step": 8143 + }, + { + "epoch": 1.65, + "learning_rate": 1.2394349976488116e-05, + "loss": 2.0923, + "step": 8144 + }, + { + "epoch": 1.65, + "learning_rate": 1.2392672848678877e-05, + "loss": 2.0927, + "step": 8145 + }, + { + "epoch": 1.65, + "learning_rate": 1.2390995649479694e-05, + "loss": 2.1602, + "step": 8146 + }, + { + "epoch": 1.65, + "learning_rate": 1.2389318378940602e-05, + "loss": 2.1511, + "step": 8147 + }, + { + "epoch": 1.65, + "learning_rate": 1.2387641037111653e-05, + "loss": 2.0186, + "step": 8148 + }, + { + "epoch": 1.65, + "learning_rate": 1.2385963624042894e-05, + "loss": 2.1414, + "step": 8149 + }, + { + "epoch": 1.65, + "learning_rate": 1.238428613978437e-05, + "loss": 2.0677, + "step": 8150 + }, + { + "epoch": 1.65, + "learning_rate": 1.2382608584386136e-05, + "loss": 2.1596, + "step": 8151 + }, + { + "epoch": 1.65, + "learning_rate": 1.2380930957898238e-05, + "loss": 2.044, + "step": 8152 + }, + { + "epoch": 1.65, + "learning_rate": 1.2379253260370738e-05, + "loss": 2.1541, + "step": 8153 + }, + { + "epoch": 1.66, + "learning_rate": 1.237757549185369e-05, + "loss": 2.1519, + "step": 8154 + }, + { + "epoch": 1.66, + "learning_rate": 1.2375897652397158e-05, + "loss": 2.1461, + "step": 8155 + }, + { + "epoch": 1.66, + "learning_rate": 1.2374219742051197e-05, + "loss": 2.0988, + "step": 8156 + }, + { + "epoch": 1.66, + "learning_rate": 1.2372541760865876e-05, + "loss": 2.0843, + "step": 8157 + }, + { + "epoch": 1.66, + "learning_rate": 1.2370863708891253e-05, + "loss": 2.2101, + "step": 8158 + }, + { + "epoch": 1.66, + "learning_rate": 1.2369185586177404e-05, + "loss": 2.1031, + "step": 8159 + }, + { + "epoch": 1.66, + "learning_rate": 1.2367507392774398e-05, + "loss": 2.1207, + "step": 8160 + }, + { + "epoch": 1.66, + "learning_rate": 1.2365829128732305e-05, + "loss": 2.1295, + "step": 8161 + }, + { + "epoch": 1.66, + "learning_rate": 1.23641507941012e-05, + "loss": 2.1537, + "step": 8162 + }, + { + "epoch": 1.66, + "learning_rate": 1.2362472388931155e-05, + "loss": 2.0483, + "step": 8163 + }, + { + "epoch": 1.66, + "learning_rate": 1.2360793913272255e-05, + "loss": 2.2263, + "step": 8164 + }, + { + "epoch": 1.66, + "learning_rate": 1.2359115367174579e-05, + "loss": 2.1609, + "step": 8165 + }, + { + "epoch": 1.66, + "learning_rate": 1.2357436750688208e-05, + "loss": 2.0753, + "step": 8166 + }, + { + "epoch": 1.66, + "learning_rate": 1.2355758063863224e-05, + "loss": 2.0484, + "step": 8167 + }, + { + "epoch": 1.66, + "learning_rate": 1.2354079306749717e-05, + "loss": 2.1554, + "step": 8168 + }, + { + "epoch": 1.66, + "learning_rate": 1.2352400479397779e-05, + "loss": 2.1051, + "step": 8169 + }, + { + "epoch": 1.66, + "learning_rate": 1.2350721581857497e-05, + "loss": 2.1096, + "step": 8170 + }, + { + "epoch": 1.66, + "learning_rate": 1.2349042614178963e-05, + "loss": 2.0444, + "step": 8171 + }, + { + "epoch": 1.66, + "learning_rate": 1.2347363576412277e-05, + "loss": 2.0829, + "step": 8172 + }, + { + "epoch": 1.66, + "learning_rate": 1.2345684468607531e-05, + "loss": 2.1226, + "step": 8173 + }, + { + "epoch": 1.66, + "learning_rate": 1.2344005290814826e-05, + "loss": 2.0669, + "step": 8174 + }, + { + "epoch": 1.66, + "learning_rate": 1.2342326043084268e-05, + "loss": 2.0609, + "step": 8175 + }, + { + "epoch": 1.66, + "learning_rate": 1.2340646725465955e-05, + "loss": 2.1403, + "step": 8176 + }, + { + "epoch": 1.66, + "learning_rate": 1.2338967338009995e-05, + "loss": 2.0866, + "step": 8177 + }, + { + "epoch": 1.66, + "learning_rate": 1.233728788076649e-05, + "loss": 2.1149, + "step": 8178 + }, + { + "epoch": 1.66, + "learning_rate": 1.2335608353785561e-05, + "loss": 2.1223, + "step": 8179 + }, + { + "epoch": 1.66, + "learning_rate": 1.2333928757117312e-05, + "loss": 2.0676, + "step": 8180 + }, + { + "epoch": 1.66, + "learning_rate": 1.233224909081186e-05, + "loss": 2.1414, + "step": 8181 + }, + { + "epoch": 1.66, + "learning_rate": 1.2330569354919321e-05, + "loss": 2.1956, + "step": 8182 + }, + { + "epoch": 1.66, + "learning_rate": 1.2328889549489807e-05, + "loss": 2.0268, + "step": 8183 + }, + { + "epoch": 1.66, + "learning_rate": 1.2327209674573445e-05, + "loss": 2.1267, + "step": 8184 + }, + { + "epoch": 1.66, + "learning_rate": 1.2325529730220358e-05, + "loss": 2.1375, + "step": 8185 + }, + { + "epoch": 1.66, + "learning_rate": 1.2323849716480667e-05, + "loss": 2.0864, + "step": 8186 + }, + { + "epoch": 1.66, + "learning_rate": 1.2322169633404498e-05, + "loss": 2.1418, + "step": 8187 + }, + { + "epoch": 1.66, + "learning_rate": 1.2320489481041978e-05, + "loss": 2.1116, + "step": 8188 + }, + { + "epoch": 1.66, + "learning_rate": 1.2318809259443243e-05, + "loss": 2.0487, + "step": 8189 + }, + { + "epoch": 1.66, + "learning_rate": 1.2317128968658424e-05, + "loss": 2.0086, + "step": 8190 + }, + { + "epoch": 1.66, + "learning_rate": 1.2315448608737653e-05, + "loss": 2.1386, + "step": 8191 + }, + { + "epoch": 1.66, + "learning_rate": 1.2313768179731067e-05, + "loss": 2.1029, + "step": 8192 + }, + { + "epoch": 1.66, + "learning_rate": 1.2312087681688807e-05, + "loss": 2.1787, + "step": 8193 + }, + { + "epoch": 1.66, + "learning_rate": 1.2310407114661014e-05, + "loss": 2.1608, + "step": 8194 + }, + { + "epoch": 1.66, + "learning_rate": 1.2308726478697828e-05, + "loss": 2.1573, + "step": 8195 + }, + { + "epoch": 1.66, + "learning_rate": 1.2307045773849398e-05, + "loss": 2.062, + "step": 8196 + }, + { + "epoch": 1.66, + "learning_rate": 1.2305365000165868e-05, + "loss": 2.024, + "step": 8197 + }, + { + "epoch": 1.66, + "learning_rate": 1.2303684157697382e-05, + "loss": 2.1233, + "step": 8198 + }, + { + "epoch": 1.66, + "learning_rate": 1.2302003246494103e-05, + "loss": 2.0284, + "step": 8199 + }, + { + "epoch": 1.66, + "learning_rate": 1.2300322266606176e-05, + "loss": 2.1503, + "step": 8200 + }, + { + "epoch": 1.66, + "learning_rate": 1.2298641218083762e-05, + "loss": 2.2017, + "step": 8201 + }, + { + "epoch": 1.66, + "learning_rate": 1.2296960100977013e-05, + "loss": 2.2318, + "step": 8202 + }, + { + "epoch": 1.67, + "learning_rate": 1.2295278915336087e-05, + "loss": 2.1124, + "step": 8203 + }, + { + "epoch": 1.67, + "learning_rate": 1.229359766121115e-05, + "loss": 2.1465, + "step": 8204 + }, + { + "epoch": 1.67, + "learning_rate": 1.2291916338652365e-05, + "loss": 2.1191, + "step": 8205 + }, + { + "epoch": 1.67, + "learning_rate": 1.2290234947709895e-05, + "loss": 2.1159, + "step": 8206 + }, + { + "epoch": 1.67, + "learning_rate": 1.228855348843391e-05, + "loss": 2.1245, + "step": 8207 + }, + { + "epoch": 1.67, + "learning_rate": 1.2286871960874575e-05, + "loss": 2.1173, + "step": 8208 + }, + { + "epoch": 1.67, + "learning_rate": 1.228519036508207e-05, + "loss": 2.1866, + "step": 8209 + }, + { + "epoch": 1.67, + "learning_rate": 1.2283508701106559e-05, + "loss": 2.1779, + "step": 8210 + }, + { + "epoch": 1.67, + "learning_rate": 1.2281826968998224e-05, + "loss": 2.1007, + "step": 8211 + }, + { + "epoch": 1.67, + "learning_rate": 1.228014516880724e-05, + "loss": 2.202, + "step": 8212 + }, + { + "epoch": 1.67, + "learning_rate": 1.2278463300583787e-05, + "loss": 2.1917, + "step": 8213 + }, + { + "epoch": 1.67, + "learning_rate": 1.227678136437805e-05, + "loss": 2.1106, + "step": 8214 + }, + { + "epoch": 1.67, + "learning_rate": 1.2275099360240206e-05, + "loss": 2.1463, + "step": 8215 + }, + { + "epoch": 1.67, + "learning_rate": 1.2273417288220448e-05, + "loss": 2.1325, + "step": 8216 + }, + { + "epoch": 1.67, + "learning_rate": 1.227173514836896e-05, + "loss": 2.1678, + "step": 8217 + }, + { + "epoch": 1.67, + "learning_rate": 1.227005294073593e-05, + "loss": 2.1292, + "step": 8218 + }, + { + "epoch": 1.67, + "learning_rate": 1.2268370665371552e-05, + "loss": 2.1519, + "step": 8219 + }, + { + "epoch": 1.67, + "learning_rate": 1.2266688322326024e-05, + "loss": 2.0388, + "step": 8220 + }, + { + "epoch": 1.67, + "learning_rate": 1.2265005911649537e-05, + "loss": 2.1103, + "step": 8221 + }, + { + "epoch": 1.67, + "learning_rate": 1.226332343339229e-05, + "loss": 2.1489, + "step": 8222 + }, + { + "epoch": 1.67, + "learning_rate": 1.2261640887604478e-05, + "loss": 2.1643, + "step": 8223 + }, + { + "epoch": 1.67, + "learning_rate": 1.2259958274336314e-05, + "loss": 2.1552, + "step": 8224 + }, + { + "epoch": 1.67, + "learning_rate": 1.2258275593637994e-05, + "loss": 2.1396, + "step": 8225 + }, + { + "epoch": 1.67, + "learning_rate": 1.2256592845559727e-05, + "loss": 2.0997, + "step": 8226 + }, + { + "epoch": 1.67, + "learning_rate": 1.2254910030151716e-05, + "loss": 2.1618, + "step": 8227 + }, + { + "epoch": 1.67, + "learning_rate": 1.2253227147464178e-05, + "loss": 2.171, + "step": 8228 + }, + { + "epoch": 1.67, + "learning_rate": 1.2251544197547323e-05, + "loss": 2.1078, + "step": 8229 + }, + { + "epoch": 1.67, + "learning_rate": 1.2249861180451361e-05, + "loss": 2.1056, + "step": 8230 + }, + { + "epoch": 1.67, + "learning_rate": 1.2248178096226511e-05, + "loss": 2.1373, + "step": 8231 + }, + { + "epoch": 1.67, + "learning_rate": 1.224649494492299e-05, + "loss": 2.1702, + "step": 8232 + }, + { + "epoch": 1.67, + "learning_rate": 1.2244811726591022e-05, + "loss": 2.0539, + "step": 8233 + }, + { + "epoch": 1.67, + "learning_rate": 1.2243128441280824e-05, + "loss": 2.0517, + "step": 8234 + }, + { + "epoch": 1.67, + "learning_rate": 1.2241445089042623e-05, + "loss": 2.1042, + "step": 8235 + }, + { + "epoch": 1.67, + "learning_rate": 1.2239761669926645e-05, + "loss": 2.0809, + "step": 8236 + }, + { + "epoch": 1.67, + "learning_rate": 1.2238078183983114e-05, + "loss": 2.1485, + "step": 8237 + }, + { + "epoch": 1.67, + "learning_rate": 1.2236394631262265e-05, + "loss": 2.1318, + "step": 8238 + }, + { + "epoch": 1.67, + "learning_rate": 1.2234711011814326e-05, + "loss": 2.0968, + "step": 8239 + }, + { + "epoch": 1.67, + "learning_rate": 1.2233027325689535e-05, + "loss": 2.083, + "step": 8240 + }, + { + "epoch": 1.67, + "learning_rate": 1.2231343572938126e-05, + "loss": 2.0388, + "step": 8241 + }, + { + "epoch": 1.67, + "learning_rate": 1.2229659753610336e-05, + "loss": 2.1084, + "step": 8242 + }, + { + "epoch": 1.67, + "learning_rate": 1.2227975867756405e-05, + "loss": 2.1175, + "step": 8243 + }, + { + "epoch": 1.67, + "learning_rate": 1.2226291915426577e-05, + "loss": 2.0834, + "step": 8244 + }, + { + "epoch": 1.67, + "learning_rate": 1.2224607896671095e-05, + "loss": 2.1217, + "step": 8245 + }, + { + "epoch": 1.67, + "learning_rate": 1.2222923811540206e-05, + "loss": 2.1876, + "step": 8246 + }, + { + "epoch": 1.67, + "learning_rate": 1.222123966008415e-05, + "loss": 2.0961, + "step": 8247 + }, + { + "epoch": 1.67, + "learning_rate": 1.221955544235319e-05, + "loss": 2.1621, + "step": 8248 + }, + { + "epoch": 1.67, + "learning_rate": 1.221787115839757e-05, + "loss": 2.1092, + "step": 8249 + }, + { + "epoch": 1.67, + "learning_rate": 1.2216186808267544e-05, + "loss": 2.0806, + "step": 8250 + }, + { + "epoch": 1.67, + "learning_rate": 1.221450239201337e-05, + "loss": 2.1969, + "step": 8251 + }, + { + "epoch": 1.68, + "learning_rate": 1.2212817909685302e-05, + "loss": 2.2153, + "step": 8252 + }, + { + "epoch": 1.68, + "learning_rate": 1.2211133361333604e-05, + "loss": 2.0404, + "step": 8253 + }, + { + "epoch": 1.68, + "learning_rate": 1.2209448747008536e-05, + "loss": 2.0818, + "step": 8254 + }, + { + "epoch": 1.68, + "learning_rate": 1.2207764066760362e-05, + "loss": 2.138, + "step": 8255 + }, + { + "epoch": 1.68, + "learning_rate": 1.2206079320639348e-05, + "loss": 2.1481, + "step": 8256 + }, + { + "epoch": 1.68, + "learning_rate": 1.2204394508695758e-05, + "loss": 2.166, + "step": 8257 + }, + { + "epoch": 1.68, + "learning_rate": 1.2202709630979868e-05, + "loss": 2.0897, + "step": 8258 + }, + { + "epoch": 1.68, + "learning_rate": 1.2201024687541942e-05, + "loss": 2.1482, + "step": 8259 + }, + { + "epoch": 1.68, + "learning_rate": 1.219933967843226e-05, + "loss": 2.1894, + "step": 8260 + }, + { + "epoch": 1.68, + "learning_rate": 1.2197654603701098e-05, + "loss": 2.1405, + "step": 8261 + }, + { + "epoch": 1.68, + "learning_rate": 1.2195969463398725e-05, + "loss": 2.115, + "step": 8262 + }, + { + "epoch": 1.68, + "learning_rate": 1.2194284257575425e-05, + "loss": 1.9935, + "step": 8263 + }, + { + "epoch": 1.68, + "learning_rate": 1.2192598986281485e-05, + "loss": 2.0789, + "step": 8264 + }, + { + "epoch": 1.68, + "learning_rate": 1.2190913649567185e-05, + "loss": 2.1094, + "step": 8265 + }, + { + "epoch": 1.68, + "learning_rate": 1.2189228247482805e-05, + "loss": 2.1145, + "step": 8266 + }, + { + "epoch": 1.68, + "learning_rate": 1.2187542780078631e-05, + "loss": 2.168, + "step": 8267 + }, + { + "epoch": 1.68, + "learning_rate": 1.2185857247404961e-05, + "loss": 2.1093, + "step": 8268 + }, + { + "epoch": 1.68, + "learning_rate": 1.2184171649512084e-05, + "loss": 2.153, + "step": 8269 + }, + { + "epoch": 1.68, + "learning_rate": 1.2182485986450292e-05, + "loss": 2.0999, + "step": 8270 + }, + { + "epoch": 1.68, + "learning_rate": 1.2180800258269878e-05, + "loss": 2.1385, + "step": 8271 + }, + { + "epoch": 1.68, + "learning_rate": 1.2179114465021134e-05, + "loss": 2.1547, + "step": 8272 + }, + { + "epoch": 1.68, + "learning_rate": 1.217742860675437e-05, + "loss": 2.1697, + "step": 8273 + }, + { + "epoch": 1.68, + "learning_rate": 1.2175742683519882e-05, + "loss": 2.1375, + "step": 8274 + }, + { + "epoch": 1.68, + "learning_rate": 1.217405669536797e-05, + "loss": 2.1093, + "step": 8275 + }, + { + "epoch": 1.68, + "learning_rate": 1.2172370642348943e-05, + "loss": 2.065, + "step": 8276 + }, + { + "epoch": 1.68, + "learning_rate": 1.21706845245131e-05, + "loss": 2.2003, + "step": 8277 + }, + { + "epoch": 1.68, + "learning_rate": 1.216899834191076e-05, + "loss": 2.1163, + "step": 8278 + }, + { + "epoch": 1.68, + "learning_rate": 1.2167312094592227e-05, + "loss": 2.1316, + "step": 8279 + }, + { + "epoch": 1.68, + "learning_rate": 1.2165625782607817e-05, + "loss": 2.1418, + "step": 8280 + }, + { + "epoch": 1.68, + "learning_rate": 1.216393940600784e-05, + "loss": 2.1101, + "step": 8281 + }, + { + "epoch": 1.68, + "learning_rate": 1.2162252964842615e-05, + "loss": 2.1332, + "step": 8282 + }, + { + "epoch": 1.68, + "learning_rate": 1.216056645916246e-05, + "loss": 2.1661, + "step": 8283 + }, + { + "epoch": 1.68, + "learning_rate": 1.2158879889017694e-05, + "loss": 2.1526, + "step": 8284 + }, + { + "epoch": 1.68, + "learning_rate": 1.2157193254458642e-05, + "loss": 2.0694, + "step": 8285 + }, + { + "epoch": 1.68, + "learning_rate": 1.2155506555535624e-05, + "loss": 2.1716, + "step": 8286 + }, + { + "epoch": 1.68, + "learning_rate": 1.2153819792298965e-05, + "loss": 2.1345, + "step": 8287 + }, + { + "epoch": 1.68, + "learning_rate": 1.2152132964798997e-05, + "loss": 2.1785, + "step": 8288 + }, + { + "epoch": 1.68, + "learning_rate": 1.215044607308605e-05, + "loss": 2.1308, + "step": 8289 + }, + { + "epoch": 1.68, + "learning_rate": 1.2148759117210453e-05, + "loss": 2.1288, + "step": 8290 + }, + { + "epoch": 1.68, + "learning_rate": 1.2147072097222541e-05, + "loss": 2.0828, + "step": 8291 + }, + { + "epoch": 1.68, + "learning_rate": 1.2145385013172646e-05, + "loss": 2.0344, + "step": 8292 + }, + { + "epoch": 1.68, + "learning_rate": 1.2143697865111111e-05, + "loss": 2.0246, + "step": 8293 + }, + { + "epoch": 1.68, + "learning_rate": 1.214201065308827e-05, + "loss": 2.0964, + "step": 8294 + }, + { + "epoch": 1.68, + "learning_rate": 1.2140323377154468e-05, + "loss": 2.0451, + "step": 8295 + }, + { + "epoch": 1.68, + "learning_rate": 1.2138636037360046e-05, + "loss": 2.1217, + "step": 8296 + }, + { + "epoch": 1.68, + "learning_rate": 1.2136948633755347e-05, + "loss": 2.055, + "step": 8297 + }, + { + "epoch": 1.68, + "learning_rate": 1.2135261166390724e-05, + "loss": 2.1098, + "step": 8298 + }, + { + "epoch": 1.68, + "learning_rate": 1.2133573635316523e-05, + "loss": 2.0656, + "step": 8299 + }, + { + "epoch": 1.68, + "learning_rate": 1.213188604058309e-05, + "loss": 2.1495, + "step": 8300 + }, + { + "epoch": 1.68, + "learning_rate": 1.2130198382240786e-05, + "loss": 2.1548, + "step": 8301 + }, + { + "epoch": 1.69, + "learning_rate": 1.2128510660339959e-05, + "loss": 2.1135, + "step": 8302 + }, + { + "epoch": 1.69, + "learning_rate": 1.2126822874930964e-05, + "loss": 2.0972, + "step": 8303 + }, + { + "epoch": 1.69, + "learning_rate": 1.2125135026064167e-05, + "loss": 2.1118, + "step": 8304 + }, + { + "epoch": 1.69, + "learning_rate": 1.2123447113789925e-05, + "loss": 2.199, + "step": 8305 + }, + { + "epoch": 1.69, + "learning_rate": 1.2121759138158598e-05, + "loss": 2.0947, + "step": 8306 + }, + { + "epoch": 1.69, + "learning_rate": 1.2120071099220545e-05, + "loss": 2.1406, + "step": 8307 + }, + { + "epoch": 1.69, + "learning_rate": 1.2118382997026146e-05, + "loss": 2.0511, + "step": 8308 + }, + { + "epoch": 1.69, + "learning_rate": 1.2116694831625757e-05, + "loss": 2.1134, + "step": 8309 + }, + { + "epoch": 1.69, + "learning_rate": 1.2115006603069752e-05, + "loss": 2.2137, + "step": 8310 + }, + { + "epoch": 1.69, + "learning_rate": 1.2113318311408504e-05, + "loss": 2.0209, + "step": 8311 + }, + { + "epoch": 1.69, + "learning_rate": 1.2111629956692377e-05, + "loss": 2.1476, + "step": 8312 + }, + { + "epoch": 1.69, + "learning_rate": 1.2109941538971758e-05, + "loss": 2.0665, + "step": 8313 + }, + { + "epoch": 1.69, + "learning_rate": 1.2108253058297021e-05, + "loss": 2.1155, + "step": 8314 + }, + { + "epoch": 1.69, + "learning_rate": 1.2106564514718544e-05, + "loss": 2.0447, + "step": 8315 + }, + { + "epoch": 1.69, + "learning_rate": 1.2104875908286702e-05, + "loss": 2.1134, + "step": 8316 + }, + { + "epoch": 1.69, + "learning_rate": 1.2103187239051885e-05, + "loss": 2.1432, + "step": 8317 + }, + { + "epoch": 1.69, + "learning_rate": 1.210149850706448e-05, + "loss": 2.0721, + "step": 8318 + }, + { + "epoch": 1.69, + "learning_rate": 1.2099809712374868e-05, + "loss": 2.2054, + "step": 8319 + }, + { + "epoch": 1.69, + "learning_rate": 1.2098120855033434e-05, + "loss": 2.2025, + "step": 8320 + }, + { + "epoch": 1.69, + "learning_rate": 1.2096431935090579e-05, + "loss": 2.1356, + "step": 8321 + }, + { + "epoch": 1.69, + "learning_rate": 1.2094742952596688e-05, + "loss": 2.0682, + "step": 8322 + }, + { + "epoch": 1.69, + "learning_rate": 1.2093053907602155e-05, + "loss": 2.1145, + "step": 8323 + }, + { + "epoch": 1.69, + "learning_rate": 1.2091364800157377e-05, + "loss": 2.1618, + "step": 8324 + }, + { + "epoch": 1.69, + "learning_rate": 1.2089675630312755e-05, + "loss": 2.0864, + "step": 8325 + }, + { + "epoch": 1.69, + "learning_rate": 1.2087986398118679e-05, + "loss": 2.123, + "step": 8326 + }, + { + "epoch": 1.69, + "learning_rate": 1.208629710362556e-05, + "loss": 2.1717, + "step": 8327 + }, + { + "epoch": 1.69, + "learning_rate": 1.20846077468838e-05, + "loss": 2.1059, + "step": 8328 + }, + { + "epoch": 1.69, + "learning_rate": 1.2082918327943801e-05, + "loss": 2.1384, + "step": 8329 + }, + { + "epoch": 1.69, + "learning_rate": 1.2081228846855975e-05, + "loss": 2.0643, + "step": 8330 + }, + { + "epoch": 1.69, + "learning_rate": 1.207953930367072e-05, + "loss": 2.0539, + "step": 8331 + }, + { + "epoch": 1.69, + "learning_rate": 1.207784969843846e-05, + "loss": 2.1647, + "step": 8332 + }, + { + "epoch": 1.69, + "learning_rate": 1.20761600312096e-05, + "loss": 2.1283, + "step": 8333 + }, + { + "epoch": 1.69, + "learning_rate": 1.2074470302034558e-05, + "loss": 2.1344, + "step": 8334 + }, + { + "epoch": 1.69, + "learning_rate": 1.2072780510963746e-05, + "loss": 2.0588, + "step": 8335 + }, + { + "epoch": 1.69, + "learning_rate": 1.2071090658047582e-05, + "loss": 2.161, + "step": 8336 + }, + { + "epoch": 1.69, + "learning_rate": 1.2069400743336492e-05, + "loss": 2.1302, + "step": 8337 + }, + { + "epoch": 1.69, + "learning_rate": 1.2067710766880893e-05, + "loss": 2.1903, + "step": 8338 + }, + { + "epoch": 1.69, + "learning_rate": 1.2066020728731212e-05, + "loss": 2.0678, + "step": 8339 + }, + { + "epoch": 1.69, + "learning_rate": 1.206433062893787e-05, + "loss": 2.1494, + "step": 8340 + }, + { + "epoch": 1.69, + "learning_rate": 1.2062640467551297e-05, + "loss": 2.0474, + "step": 8341 + }, + { + "epoch": 1.69, + "learning_rate": 1.2060950244621923e-05, + "loss": 2.1006, + "step": 8342 + }, + { + "epoch": 1.69, + "learning_rate": 1.2059259960200179e-05, + "loss": 2.1577, + "step": 8343 + }, + { + "epoch": 1.69, + "learning_rate": 1.2057569614336493e-05, + "loss": 2.1581, + "step": 8344 + }, + { + "epoch": 1.69, + "learning_rate": 1.2055879207081307e-05, + "loss": 2.0979, + "step": 8345 + }, + { + "epoch": 1.69, + "learning_rate": 1.2054188738485053e-05, + "loss": 2.159, + "step": 8346 + }, + { + "epoch": 1.69, + "learning_rate": 1.2052498208598172e-05, + "loss": 2.1223, + "step": 8347 + }, + { + "epoch": 1.69, + "learning_rate": 1.2050807617471098e-05, + "loss": 2.1174, + "step": 8348 + }, + { + "epoch": 1.69, + "learning_rate": 1.2049116965154282e-05, + "loss": 2.0937, + "step": 8349 + }, + { + "epoch": 1.69, + "learning_rate": 1.2047426251698161e-05, + "loss": 2.1814, + "step": 8350 + }, + { + "epoch": 1.7, + "learning_rate": 1.2045735477153181e-05, + "loss": 2.1041, + "step": 8351 + }, + { + "epoch": 1.7, + "learning_rate": 1.2044044641569797e-05, + "loss": 2.1355, + "step": 8352 + }, + { + "epoch": 1.7, + "learning_rate": 1.2042353744998452e-05, + "loss": 2.0957, + "step": 8353 + }, + { + "epoch": 1.7, + "learning_rate": 1.2040662787489596e-05, + "loss": 2.0945, + "step": 8354 + }, + { + "epoch": 1.7, + "learning_rate": 1.2038971769093685e-05, + "loss": 2.1099, + "step": 8355 + }, + { + "epoch": 1.7, + "learning_rate": 1.2037280689861169e-05, + "loss": 2.1596, + "step": 8356 + }, + { + "epoch": 1.7, + "learning_rate": 1.2035589549842512e-05, + "loss": 2.1902, + "step": 8357 + }, + { + "epoch": 1.7, + "learning_rate": 1.203389834908817e-05, + "loss": 2.0834, + "step": 8358 + }, + { + "epoch": 1.7, + "learning_rate": 1.2032207087648598e-05, + "loss": 2.1029, + "step": 8359 + }, + { + "epoch": 1.7, + "learning_rate": 1.2030515765574262e-05, + "loss": 2.1074, + "step": 8360 + }, + { + "epoch": 1.7, + "learning_rate": 1.2028824382915624e-05, + "loss": 2.1256, + "step": 8361 + }, + { + "epoch": 1.7, + "learning_rate": 1.2027132939723156e-05, + "loss": 2.0895, + "step": 8362 + }, + { + "epoch": 1.7, + "learning_rate": 1.2025441436047319e-05, + "loss": 2.1661, + "step": 8363 + }, + { + "epoch": 1.7, + "learning_rate": 1.2023749871938581e-05, + "loss": 2.1758, + "step": 8364 + }, + { + "epoch": 1.7, + "learning_rate": 1.202205824744742e-05, + "loss": 2.1306, + "step": 8365 + }, + { + "epoch": 1.7, + "learning_rate": 1.2020366562624301e-05, + "loss": 2.1171, + "step": 8366 + }, + { + "epoch": 1.7, + "learning_rate": 1.2018674817519705e-05, + "loss": 2.0826, + "step": 8367 + }, + { + "epoch": 1.7, + "learning_rate": 1.2016983012184102e-05, + "loss": 2.1013, + "step": 8368 + }, + { + "epoch": 1.7, + "learning_rate": 1.201529114666798e-05, + "loss": 2.1017, + "step": 8369 + }, + { + "epoch": 1.7, + "learning_rate": 1.201359922102181e-05, + "loss": 2.0462, + "step": 8370 + }, + { + "epoch": 1.7, + "learning_rate": 1.2011907235296075e-05, + "loss": 2.1044, + "step": 8371 + }, + { + "epoch": 1.7, + "learning_rate": 1.201021518954126e-05, + "loss": 2.1129, + "step": 8372 + }, + { + "epoch": 1.7, + "learning_rate": 1.2008523083807855e-05, + "loss": 2.2012, + "step": 8373 + }, + { + "epoch": 1.7, + "learning_rate": 1.2006830918146342e-05, + "loss": 2.1189, + "step": 8374 + }, + { + "epoch": 1.7, + "learning_rate": 1.2005138692607211e-05, + "loss": 2.0145, + "step": 8375 + }, + { + "epoch": 1.7, + "learning_rate": 1.2003446407240949e-05, + "loss": 2.1394, + "step": 8376 + }, + { + "epoch": 1.7, + "learning_rate": 1.2001754062098057e-05, + "loss": 2.067, + "step": 8377 + }, + { + "epoch": 1.7, + "learning_rate": 1.2000061657229022e-05, + "loss": 2.1317, + "step": 8378 + }, + { + "epoch": 1.7, + "learning_rate": 1.1998369192684344e-05, + "loss": 2.0464, + "step": 8379 + }, + { + "epoch": 1.7, + "learning_rate": 1.1996676668514522e-05, + "loss": 2.1195, + "step": 8380 + }, + { + "epoch": 1.7, + "learning_rate": 1.1994984084770047e-05, + "loss": 2.0762, + "step": 8381 + }, + { + "epoch": 1.7, + "learning_rate": 1.199329144150143e-05, + "loss": 2.0966, + "step": 8382 + }, + { + "epoch": 1.7, + "learning_rate": 1.1991598738759175e-05, + "loss": 2.1243, + "step": 8383 + }, + { + "epoch": 1.7, + "learning_rate": 1.198990597659378e-05, + "loss": 2.1533, + "step": 8384 + }, + { + "epoch": 1.7, + "learning_rate": 1.1988213155055754e-05, + "loss": 2.139, + "step": 8385 + }, + { + "epoch": 1.7, + "learning_rate": 1.1986520274195606e-05, + "loss": 2.1614, + "step": 8386 + }, + { + "epoch": 1.7, + "learning_rate": 1.198482733406385e-05, + "loss": 2.1033, + "step": 8387 + }, + { + "epoch": 1.7, + "learning_rate": 1.198313433471099e-05, + "loss": 2.1816, + "step": 8388 + }, + { + "epoch": 1.7, + "learning_rate": 1.1981441276187548e-05, + "loss": 2.1332, + "step": 8389 + }, + { + "epoch": 1.7, + "learning_rate": 1.1979748158544039e-05, + "loss": 2.0552, + "step": 8390 + }, + { + "epoch": 1.7, + "learning_rate": 1.197805498183097e-05, + "loss": 2.0839, + "step": 8391 + }, + { + "epoch": 1.7, + "learning_rate": 1.1976361746098873e-05, + "loss": 2.0779, + "step": 8392 + }, + { + "epoch": 1.7, + "learning_rate": 1.1974668451398264e-05, + "loss": 2.1581, + "step": 8393 + }, + { + "epoch": 1.7, + "learning_rate": 1.1972975097779664e-05, + "loss": 2.1024, + "step": 8394 + }, + { + "epoch": 1.7, + "learning_rate": 1.1971281685293602e-05, + "loss": 2.1841, + "step": 8395 + }, + { + "epoch": 1.7, + "learning_rate": 1.1969588213990596e-05, + "loss": 2.1382, + "step": 8396 + }, + { + "epoch": 1.7, + "learning_rate": 1.196789468392118e-05, + "loss": 2.2116, + "step": 8397 + }, + { + "epoch": 1.7, + "learning_rate": 1.1966201095135885e-05, + "loss": 2.1202, + "step": 8398 + }, + { + "epoch": 1.7, + "learning_rate": 1.196450744768524e-05, + "loss": 2.1278, + "step": 8399 + }, + { + "epoch": 1.71, + "learning_rate": 1.1962813741619779e-05, + "loss": 2.0855, + "step": 8400 + }, + { + "epoch": 1.71, + "learning_rate": 1.1961119976990033e-05, + "loss": 2.1479, + "step": 8401 + }, + { + "epoch": 1.71, + "learning_rate": 1.1959426153846542e-05, + "loss": 2.1668, + "step": 8402 + }, + { + "epoch": 1.71, + "learning_rate": 1.1957732272239848e-05, + "loss": 2.0873, + "step": 8403 + }, + { + "epoch": 1.71, + "learning_rate": 1.1956038332220485e-05, + "loss": 2.138, + "step": 8404 + }, + { + "epoch": 1.71, + "learning_rate": 1.1954344333838997e-05, + "loss": 2.0875, + "step": 8405 + }, + { + "epoch": 1.71, + "learning_rate": 1.1952650277145928e-05, + "loss": 2.1422, + "step": 8406 + }, + { + "epoch": 1.71, + "learning_rate": 1.1950956162191825e-05, + "loss": 2.1263, + "step": 8407 + }, + { + "epoch": 1.71, + "learning_rate": 1.1949261989027232e-05, + "loss": 2.1664, + "step": 8408 + }, + { + "epoch": 1.71, + "learning_rate": 1.1947567757702703e-05, + "loss": 2.1843, + "step": 8409 + }, + { + "epoch": 1.71, + "learning_rate": 1.194587346826878e-05, + "loss": 2.1837, + "step": 8410 + }, + { + "epoch": 1.71, + "learning_rate": 1.1944179120776025e-05, + "loss": 2.129, + "step": 8411 + }, + { + "epoch": 1.71, + "learning_rate": 1.1942484715274984e-05, + "loss": 2.1326, + "step": 8412 + }, + { + "epoch": 1.71, + "learning_rate": 1.194079025181622e-05, + "loss": 2.1112, + "step": 8413 + }, + { + "epoch": 1.71, + "learning_rate": 1.1939095730450285e-05, + "loss": 2.1205, + "step": 8414 + }, + { + "epoch": 1.71, + "learning_rate": 1.193740115122774e-05, + "loss": 2.0777, + "step": 8415 + }, + { + "epoch": 1.71, + "learning_rate": 1.1935706514199146e-05, + "loss": 2.1616, + "step": 8416 + }, + { + "epoch": 1.71, + "learning_rate": 1.1934011819415068e-05, + "loss": 2.0797, + "step": 8417 + }, + { + "epoch": 1.71, + "learning_rate": 1.1932317066926066e-05, + "loss": 2.0462, + "step": 8418 + }, + { + "epoch": 1.71, + "learning_rate": 1.1930622256782711e-05, + "loss": 2.1382, + "step": 8419 + }, + { + "epoch": 1.71, + "learning_rate": 1.1928927389035566e-05, + "loss": 2.1487, + "step": 8420 + }, + { + "epoch": 1.71, + "learning_rate": 1.1927232463735203e-05, + "loss": 2.136, + "step": 8421 + }, + { + "epoch": 1.71, + "learning_rate": 1.1925537480932196e-05, + "loss": 2.1234, + "step": 8422 + }, + { + "epoch": 1.71, + "learning_rate": 1.1923842440677116e-05, + "loss": 2.247, + "step": 8423 + }, + { + "epoch": 1.71, + "learning_rate": 1.1922147343020536e-05, + "loss": 2.0888, + "step": 8424 + }, + { + "epoch": 1.71, + "learning_rate": 1.1920452188013028e-05, + "loss": 2.1837, + "step": 8425 + }, + { + "epoch": 1.71, + "learning_rate": 1.1918756975705182e-05, + "loss": 2.0525, + "step": 8426 + }, + { + "epoch": 1.71, + "learning_rate": 1.191706170614757e-05, + "loss": 2.0231, + "step": 8427 + }, + { + "epoch": 1.71, + "learning_rate": 1.1915366379390777e-05, + "loss": 2.0805, + "step": 8428 + }, + { + "epoch": 1.71, + "learning_rate": 1.191367099548538e-05, + "loss": 2.1934, + "step": 8429 + }, + { + "epoch": 1.71, + "learning_rate": 1.191197555448197e-05, + "loss": 2.1789, + "step": 8430 + }, + { + "epoch": 1.71, + "learning_rate": 1.1910280056431136e-05, + "loss": 2.2356, + "step": 8431 + }, + { + "epoch": 1.71, + "learning_rate": 1.1908584501383458e-05, + "loss": 2.1362, + "step": 8432 + }, + { + "epoch": 1.71, + "learning_rate": 1.1906888889389534e-05, + "loss": 2.1228, + "step": 8433 + }, + { + "epoch": 1.71, + "learning_rate": 1.1905193220499954e-05, + "loss": 2.0802, + "step": 8434 + }, + { + "epoch": 1.71, + "learning_rate": 1.1903497494765306e-05, + "loss": 2.1565, + "step": 8435 + }, + { + "epoch": 1.71, + "learning_rate": 1.190180171223619e-05, + "loss": 2.1292, + "step": 8436 + }, + { + "epoch": 1.71, + "learning_rate": 1.1900105872963204e-05, + "loss": 2.1473, + "step": 8437 + }, + { + "epoch": 1.71, + "learning_rate": 1.1898409976996943e-05, + "loss": 2.1645, + "step": 8438 + }, + { + "epoch": 1.71, + "learning_rate": 1.1896714024388012e-05, + "loss": 2.1218, + "step": 8439 + }, + { + "epoch": 1.71, + "learning_rate": 1.1895018015187004e-05, + "loss": 2.1643, + "step": 8440 + }, + { + "epoch": 1.71, + "learning_rate": 1.1893321949444534e-05, + "loss": 2.1114, + "step": 8441 + }, + { + "epoch": 1.71, + "learning_rate": 1.1891625827211202e-05, + "loss": 2.1482, + "step": 8442 + }, + { + "epoch": 1.71, + "learning_rate": 1.1889929648537615e-05, + "loss": 2.156, + "step": 8443 + }, + { + "epoch": 1.71, + "learning_rate": 1.188823341347438e-05, + "loss": 2.1457, + "step": 8444 + }, + { + "epoch": 1.71, + "learning_rate": 1.1886537122072106e-05, + "loss": 2.1381, + "step": 8445 + }, + { + "epoch": 1.71, + "learning_rate": 1.1884840774381415e-05, + "loss": 2.1836, + "step": 8446 + }, + { + "epoch": 1.71, + "learning_rate": 1.188314437045291e-05, + "loss": 2.1138, + "step": 8447 + }, + { + "epoch": 1.71, + "learning_rate": 1.1881447910337215e-05, + "loss": 2.0969, + "step": 8448 + }, + { + "epoch": 1.72, + "learning_rate": 1.1879751394084937e-05, + "loss": 2.1947, + "step": 8449 + }, + { + "epoch": 1.72, + "learning_rate": 1.1878054821746701e-05, + "loss": 2.178, + "step": 8450 + }, + { + "epoch": 1.72, + "learning_rate": 1.187635819337313e-05, + "loss": 2.1946, + "step": 8451 + }, + { + "epoch": 1.72, + "learning_rate": 1.1874661509014843e-05, + "loss": 2.104, + "step": 8452 + }, + { + "epoch": 1.72, + "learning_rate": 1.1872964768722459e-05, + "loss": 2.2376, + "step": 8453 + }, + { + "epoch": 1.72, + "learning_rate": 1.1871267972546614e-05, + "loss": 2.0932, + "step": 8454 + }, + { + "epoch": 1.72, + "learning_rate": 1.1869571120537924e-05, + "loss": 2.0871, + "step": 8455 + }, + { + "epoch": 1.72, + "learning_rate": 1.1867874212747027e-05, + "loss": 2.13, + "step": 8456 + }, + { + "epoch": 1.72, + "learning_rate": 1.186617724922455e-05, + "loss": 2.1537, + "step": 8457 + }, + { + "epoch": 1.72, + "learning_rate": 1.1864480230021123e-05, + "loss": 2.0848, + "step": 8458 + }, + { + "epoch": 1.72, + "learning_rate": 1.1862783155187382e-05, + "loss": 2.03, + "step": 8459 + }, + { + "epoch": 1.72, + "learning_rate": 1.186108602477396e-05, + "loss": 2.1135, + "step": 8460 + }, + { + "epoch": 1.72, + "learning_rate": 1.1859388838831497e-05, + "loss": 2.0645, + "step": 8461 + }, + { + "epoch": 1.72, + "learning_rate": 1.1857691597410634e-05, + "loss": 2.1476, + "step": 8462 + }, + { + "epoch": 1.72, + "learning_rate": 1.1855994300562008e-05, + "loss": 2.1618, + "step": 8463 + }, + { + "epoch": 1.72, + "learning_rate": 1.1854296948336262e-05, + "loss": 2.1757, + "step": 8464 + }, + { + "epoch": 1.72, + "learning_rate": 1.1852599540784033e-05, + "loss": 2.1577, + "step": 8465 + }, + { + "epoch": 1.72, + "learning_rate": 1.1850902077955978e-05, + "loss": 2.102, + "step": 8466 + }, + { + "epoch": 1.72, + "learning_rate": 1.1849204559902738e-05, + "loss": 2.099, + "step": 8467 + }, + { + "epoch": 1.72, + "learning_rate": 1.1847506986674964e-05, + "loss": 2.0006, + "step": 8468 + }, + { + "epoch": 1.72, + "learning_rate": 1.1845809358323301e-05, + "loss": 2.1199, + "step": 8469 + }, + { + "epoch": 1.72, + "learning_rate": 1.1844111674898404e-05, + "loss": 2.139, + "step": 8470 + }, + { + "epoch": 1.72, + "learning_rate": 1.1842413936450932e-05, + "loss": 2.0861, + "step": 8471 + }, + { + "epoch": 1.72, + "learning_rate": 1.1840716143031532e-05, + "loss": 2.0315, + "step": 8472 + }, + { + "epoch": 1.72, + "learning_rate": 1.1839018294690866e-05, + "loss": 2.18, + "step": 8473 + }, + { + "epoch": 1.72, + "learning_rate": 1.183732039147959e-05, + "loss": 2.0981, + "step": 8474 + }, + { + "epoch": 1.72, + "learning_rate": 1.1835622433448361e-05, + "loss": 2.1141, + "step": 8475 + }, + { + "epoch": 1.72, + "learning_rate": 1.183392442064785e-05, + "loss": 2.1408, + "step": 8476 + }, + { + "epoch": 1.72, + "learning_rate": 1.1832226353128714e-05, + "loss": 2.1827, + "step": 8477 + }, + { + "epoch": 1.72, + "learning_rate": 1.183052823094162e-05, + "loss": 2.098, + "step": 8478 + }, + { + "epoch": 1.72, + "learning_rate": 1.1828830054137235e-05, + "loss": 2.1095, + "step": 8479 + }, + { + "epoch": 1.72, + "learning_rate": 1.1827131822766223e-05, + "loss": 2.094, + "step": 8480 + }, + { + "epoch": 1.72, + "learning_rate": 1.182543353687926e-05, + "loss": 2.0583, + "step": 8481 + }, + { + "epoch": 1.72, + "learning_rate": 1.1823735196527015e-05, + "loss": 2.135, + "step": 8482 + }, + { + "epoch": 1.72, + "learning_rate": 1.1822036801760161e-05, + "loss": 2.1418, + "step": 8483 + }, + { + "epoch": 1.72, + "learning_rate": 1.1820338352629372e-05, + "loss": 2.1373, + "step": 8484 + }, + { + "epoch": 1.72, + "learning_rate": 1.1818639849185325e-05, + "loss": 2.0889, + "step": 8485 + }, + { + "epoch": 1.72, + "learning_rate": 1.18169412914787e-05, + "loss": 2.1244, + "step": 8486 + }, + { + "epoch": 1.72, + "learning_rate": 1.1815242679560175e-05, + "loss": 2.1629, + "step": 8487 + }, + { + "epoch": 1.72, + "learning_rate": 1.1813544013480433e-05, + "loss": 2.1007, + "step": 8488 + }, + { + "epoch": 1.72, + "learning_rate": 1.1811845293290155e-05, + "loss": 2.0773, + "step": 8489 + }, + { + "epoch": 1.72, + "learning_rate": 1.1810146519040023e-05, + "loss": 2.0848, + "step": 8490 + }, + { + "epoch": 1.72, + "learning_rate": 1.180844769078073e-05, + "loss": 2.0631, + "step": 8491 + }, + { + "epoch": 1.72, + "learning_rate": 1.1806748808562958e-05, + "loss": 2.0723, + "step": 8492 + }, + { + "epoch": 1.72, + "learning_rate": 1.1805049872437397e-05, + "loss": 2.0928, + "step": 8493 + }, + { + "epoch": 1.72, + "learning_rate": 1.1803350882454743e-05, + "loss": 2.1112, + "step": 8494 + }, + { + "epoch": 1.72, + "learning_rate": 1.180165183866568e-05, + "loss": 2.1937, + "step": 8495 + }, + { + "epoch": 1.72, + "learning_rate": 1.1799952741120912e-05, + "loss": 2.1722, + "step": 8496 + }, + { + "epoch": 1.72, + "learning_rate": 1.1798253589871127e-05, + "loss": 2.1438, + "step": 8497 + }, + { + "epoch": 1.72, + "learning_rate": 1.1796554384967027e-05, + "loss": 2.0603, + "step": 8498 + }, + { + "epoch": 1.73, + "learning_rate": 1.1794855126459308e-05, + "loss": 2.1586, + "step": 8499 + }, + { + "epoch": 1.73, + "learning_rate": 1.1793155814398673e-05, + "loss": 2.1264, + "step": 8500 + }, + { + "epoch": 1.73, + "learning_rate": 1.1791456448835823e-05, + "loss": 2.179, + "step": 8501 + }, + { + "epoch": 1.73, + "learning_rate": 1.1789757029821463e-05, + "loss": 2.1068, + "step": 8502 + }, + { + "epoch": 1.73, + "learning_rate": 1.1788057557406298e-05, + "loss": 2.1823, + "step": 8503 + }, + { + "epoch": 1.73, + "learning_rate": 1.178635803164103e-05, + "loss": 2.1468, + "step": 8504 + }, + { + "epoch": 1.73, + "learning_rate": 1.1784658452576376e-05, + "loss": 2.1709, + "step": 8505 + }, + { + "epoch": 1.73, + "learning_rate": 1.1782958820263042e-05, + "loss": 2.1416, + "step": 8506 + }, + { + "epoch": 1.73, + "learning_rate": 1.1781259134751742e-05, + "loss": 2.1086, + "step": 8507 + }, + { + "epoch": 1.73, + "learning_rate": 1.1779559396093185e-05, + "loss": 2.0793, + "step": 8508 + }, + { + "epoch": 1.73, + "learning_rate": 1.1777859604338085e-05, + "loss": 2.0628, + "step": 8509 + }, + { + "epoch": 1.73, + "learning_rate": 1.1776159759537165e-05, + "loss": 2.0777, + "step": 8510 + }, + { + "epoch": 1.73, + "learning_rate": 1.177445986174114e-05, + "loss": 2.1803, + "step": 8511 + }, + { + "epoch": 1.73, + "learning_rate": 1.1772759911000731e-05, + "loss": 2.1315, + "step": 8512 + }, + { + "epoch": 1.73, + "learning_rate": 1.1771059907366653e-05, + "loss": 2.1924, + "step": 8513 + }, + { + "epoch": 1.73, + "learning_rate": 1.1769359850889635e-05, + "loss": 2.1126, + "step": 8514 + }, + { + "epoch": 1.73, + "learning_rate": 1.1767659741620404e-05, + "loss": 2.2166, + "step": 8515 + }, + { + "epoch": 1.73, + "learning_rate": 1.1765959579609679e-05, + "loss": 2.1866, + "step": 8516 + }, + { + "epoch": 1.73, + "learning_rate": 1.176425936490819e-05, + "loss": 2.0998, + "step": 8517 + }, + { + "epoch": 1.73, + "learning_rate": 1.1762559097566668e-05, + "loss": 2.0715, + "step": 8518 + }, + { + "epoch": 1.73, + "learning_rate": 1.1760858777635842e-05, + "loss": 2.1101, + "step": 8519 + }, + { + "epoch": 1.73, + "learning_rate": 1.1759158405166446e-05, + "loss": 2.0806, + "step": 8520 + }, + { + "epoch": 1.73, + "learning_rate": 1.175745798020921e-05, + "loss": 2.1651, + "step": 8521 + }, + { + "epoch": 1.73, + "learning_rate": 1.1755757502814876e-05, + "loss": 2.0831, + "step": 8522 + }, + { + "epoch": 1.73, + "learning_rate": 1.1754056973034176e-05, + "loss": 2.1691, + "step": 8523 + }, + { + "epoch": 1.73, + "learning_rate": 1.1752356390917847e-05, + "loss": 2.2086, + "step": 8524 + }, + { + "epoch": 1.73, + "learning_rate": 1.175065575651663e-05, + "loss": 2.1257, + "step": 8525 + }, + { + "epoch": 1.73, + "learning_rate": 1.1748955069881274e-05, + "loss": 2.1076, + "step": 8526 + }, + { + "epoch": 1.73, + "learning_rate": 1.1747254331062515e-05, + "loss": 2.2017, + "step": 8527 + }, + { + "epoch": 1.73, + "learning_rate": 1.1745553540111099e-05, + "loss": 2.1714, + "step": 8528 + }, + { + "epoch": 1.73, + "learning_rate": 1.1743852697077766e-05, + "loss": 2.1567, + "step": 8529 + }, + { + "epoch": 1.73, + "learning_rate": 1.174215180201328e-05, + "loss": 2.0942, + "step": 8530 + }, + { + "epoch": 1.73, + "learning_rate": 1.1740450854968378e-05, + "loss": 2.1209, + "step": 8531 + }, + { + "epoch": 1.73, + "learning_rate": 1.1738749855993814e-05, + "loss": 2.1415, + "step": 8532 + }, + { + "epoch": 1.73, + "learning_rate": 1.1737048805140338e-05, + "loss": 2.0914, + "step": 8533 + }, + { + "epoch": 1.73, + "learning_rate": 1.1735347702458705e-05, + "loss": 2.1866, + "step": 8534 + }, + { + "epoch": 1.73, + "learning_rate": 1.1733646547999678e-05, + "loss": 2.1333, + "step": 8535 + }, + { + "epoch": 1.73, + "learning_rate": 1.1731945341814007e-05, + "loss": 2.1515, + "step": 8536 + }, + { + "epoch": 1.73, + "learning_rate": 1.173024408395245e-05, + "loss": 2.0964, + "step": 8537 + }, + { + "epoch": 1.73, + "learning_rate": 1.1728542774465769e-05, + "loss": 2.1349, + "step": 8538 + }, + { + "epoch": 1.73, + "learning_rate": 1.1726841413404728e-05, + "loss": 2.0753, + "step": 8539 + }, + { + "epoch": 1.73, + "learning_rate": 1.1725140000820087e-05, + "loss": 2.1337, + "step": 8540 + }, + { + "epoch": 1.73, + "learning_rate": 1.1723438536762615e-05, + "loss": 2.0884, + "step": 8541 + }, + { + "epoch": 1.73, + "learning_rate": 1.1721737021283074e-05, + "loss": 2.165, + "step": 8542 + }, + { + "epoch": 1.73, + "learning_rate": 1.1720035454432235e-05, + "loss": 2.1974, + "step": 8543 + }, + { + "epoch": 1.73, + "learning_rate": 1.1718333836260865e-05, + "loss": 2.0736, + "step": 8544 + }, + { + "epoch": 1.73, + "learning_rate": 1.1716632166819735e-05, + "loss": 2.0909, + "step": 8545 + }, + { + "epoch": 1.73, + "learning_rate": 1.171493044615962e-05, + "loss": 2.1302, + "step": 8546 + }, + { + "epoch": 1.73, + "learning_rate": 1.1713228674331294e-05, + "loss": 2.1635, + "step": 8547 + }, + { + "epoch": 1.74, + "learning_rate": 1.1711526851385534e-05, + "loss": 2.1827, + "step": 8548 + }, + { + "epoch": 1.74, + "learning_rate": 1.1709824977373105e-05, + "loss": 2.0692, + "step": 8549 + }, + { + "epoch": 1.74, + "learning_rate": 1.1708123052344803e-05, + "loss": 2.1364, + "step": 8550 + }, + { + "epoch": 1.74, + "learning_rate": 1.1706421076351402e-05, + "loss": 2.1096, + "step": 8551 + }, + { + "epoch": 1.74, + "learning_rate": 1.170471904944368e-05, + "loss": 2.1562, + "step": 8552 + }, + { + "epoch": 1.74, + "learning_rate": 1.170301697167242e-05, + "loss": 2.1267, + "step": 8553 + }, + { + "epoch": 1.74, + "learning_rate": 1.1701314843088409e-05, + "loss": 2.0986, + "step": 8554 + }, + { + "epoch": 1.74, + "learning_rate": 1.1699612663742434e-05, + "loss": 2.0648, + "step": 8555 + }, + { + "epoch": 1.74, + "learning_rate": 1.1697910433685285e-05, + "loss": 2.0782, + "step": 8556 + }, + { + "epoch": 1.74, + "learning_rate": 1.1696208152967747e-05, + "loss": 2.0622, + "step": 8557 + }, + { + "epoch": 1.74, + "learning_rate": 1.1694505821640612e-05, + "loss": 2.1352, + "step": 8558 + }, + { + "epoch": 1.74, + "learning_rate": 1.169280343975467e-05, + "loss": 2.1341, + "step": 8559 + }, + { + "epoch": 1.74, + "learning_rate": 1.169110100736072e-05, + "loss": 2.0812, + "step": 8560 + }, + { + "epoch": 1.74, + "learning_rate": 1.1689398524509552e-05, + "loss": 2.0183, + "step": 8561 + }, + { + "epoch": 1.74, + "learning_rate": 1.1687695991251968e-05, + "loss": 2.1265, + "step": 8562 + }, + { + "epoch": 1.74, + "learning_rate": 1.1685993407638762e-05, + "loss": 2.1045, + "step": 8563 + }, + { + "epoch": 1.74, + "learning_rate": 1.1684290773720736e-05, + "loss": 2.1367, + "step": 8564 + }, + { + "epoch": 1.74, + "learning_rate": 1.168258808954869e-05, + "loss": 2.1745, + "step": 8565 + }, + { + "epoch": 1.74, + "learning_rate": 1.168088535517343e-05, + "loss": 2.1333, + "step": 8566 + }, + { + "epoch": 1.74, + "learning_rate": 1.1679182570645757e-05, + "loss": 2.1904, + "step": 8567 + }, + { + "epoch": 1.74, + "learning_rate": 1.167747973601648e-05, + "loss": 2.1731, + "step": 8568 + }, + { + "epoch": 1.74, + "learning_rate": 1.1675776851336395e-05, + "loss": 2.1397, + "step": 8569 + }, + { + "epoch": 1.74, + "learning_rate": 1.1674073916656328e-05, + "loss": 2.1089, + "step": 8570 + }, + { + "epoch": 1.74, + "learning_rate": 1.1672370932027082e-05, + "loss": 2.1057, + "step": 8571 + }, + { + "epoch": 1.74, + "learning_rate": 1.1670667897499466e-05, + "loss": 2.1112, + "step": 8572 + }, + { + "epoch": 1.74, + "learning_rate": 1.1668964813124295e-05, + "loss": 2.1637, + "step": 8573 + }, + { + "epoch": 1.74, + "learning_rate": 1.166726167895238e-05, + "loss": 2.0899, + "step": 8574 + }, + { + "epoch": 1.74, + "learning_rate": 1.1665558495034546e-05, + "loss": 2.0968, + "step": 8575 + }, + { + "epoch": 1.74, + "learning_rate": 1.1663855261421607e-05, + "loss": 2.1196, + "step": 8576 + }, + { + "epoch": 1.74, + "learning_rate": 1.1662151978164377e-05, + "loss": 2.1845, + "step": 8577 + }, + { + "epoch": 1.74, + "learning_rate": 1.1660448645313683e-05, + "loss": 2.1556, + "step": 8578 + }, + { + "epoch": 1.74, + "learning_rate": 1.1658745262920344e-05, + "loss": 2.1076, + "step": 8579 + }, + { + "epoch": 1.74, + "learning_rate": 1.1657041831035186e-05, + "loss": 2.0853, + "step": 8580 + }, + { + "epoch": 1.74, + "learning_rate": 1.1655338349709034e-05, + "loss": 2.1493, + "step": 8581 + }, + { + "epoch": 1.74, + "learning_rate": 1.165363481899271e-05, + "loss": 2.1517, + "step": 8582 + }, + { + "epoch": 1.74, + "learning_rate": 1.1651931238937048e-05, + "loss": 2.1517, + "step": 8583 + }, + { + "epoch": 1.74, + "learning_rate": 1.1650227609592873e-05, + "loss": 2.0937, + "step": 8584 + }, + { + "epoch": 1.74, + "learning_rate": 1.1648523931011019e-05, + "loss": 2.1368, + "step": 8585 + }, + { + "epoch": 1.74, + "learning_rate": 1.164682020324232e-05, + "loss": 2.1622, + "step": 8586 + }, + { + "epoch": 1.74, + "learning_rate": 1.1645116426337607e-05, + "loss": 2.0474, + "step": 8587 + }, + { + "epoch": 1.74, + "learning_rate": 1.1643412600347714e-05, + "loss": 2.1273, + "step": 8588 + }, + { + "epoch": 1.74, + "learning_rate": 1.1641708725323477e-05, + "loss": 2.1518, + "step": 8589 + }, + { + "epoch": 1.74, + "learning_rate": 1.1640004801315743e-05, + "loss": 2.1659, + "step": 8590 + }, + { + "epoch": 1.74, + "learning_rate": 1.1638300828375344e-05, + "loss": 2.1459, + "step": 8591 + }, + { + "epoch": 1.74, + "learning_rate": 1.1636596806553125e-05, + "loss": 2.147, + "step": 8592 + }, + { + "epoch": 1.74, + "learning_rate": 1.1634892735899921e-05, + "loss": 2.0736, + "step": 8593 + }, + { + "epoch": 1.74, + "learning_rate": 1.1633188616466589e-05, + "loss": 2.0959, + "step": 8594 + }, + { + "epoch": 1.74, + "learning_rate": 1.1631484448303964e-05, + "loss": 2.1125, + "step": 8595 + }, + { + "epoch": 1.74, + "learning_rate": 1.16297802314629e-05, + "loss": 2.0661, + "step": 8596 + }, + { + "epoch": 1.75, + "learning_rate": 1.1628075965994242e-05, + "loss": 2.1632, + "step": 8597 + }, + { + "epoch": 1.75, + "learning_rate": 1.1626371651948835e-05, + "loss": 2.0983, + "step": 8598 + }, + { + "epoch": 1.75, + "learning_rate": 1.1624667289377543e-05, + "loss": 2.0882, + "step": 8599 + }, + { + "epoch": 1.75, + "learning_rate": 1.1622962878331209e-05, + "loss": 2.1621, + "step": 8600 + }, + { + "epoch": 1.75, + "learning_rate": 1.1621258418860693e-05, + "loss": 2.0934, + "step": 8601 + }, + { + "epoch": 1.75, + "learning_rate": 1.1619553911016843e-05, + "loss": 2.1225, + "step": 8602 + }, + { + "epoch": 1.75, + "learning_rate": 1.1617849354850523e-05, + "loss": 2.0826, + "step": 8603 + }, + { + "epoch": 1.75, + "learning_rate": 1.1616144750412591e-05, + "loss": 2.0535, + "step": 8604 + }, + { + "epoch": 1.75, + "learning_rate": 1.1614440097753907e-05, + "loss": 2.1994, + "step": 8605 + }, + { + "epoch": 1.75, + "learning_rate": 1.161273539692533e-05, + "loss": 2.1006, + "step": 8606 + }, + { + "epoch": 1.75, + "learning_rate": 1.1611030647977723e-05, + "loss": 2.0635, + "step": 8607 + }, + { + "epoch": 1.75, + "learning_rate": 1.1609325850961954e-05, + "loss": 2.1184, + "step": 8608 + }, + { + "epoch": 1.75, + "learning_rate": 1.1607621005928886e-05, + "loss": 2.1191, + "step": 8609 + }, + { + "epoch": 1.75, + "learning_rate": 1.1605916112929388e-05, + "loss": 2.1421, + "step": 8610 + }, + { + "epoch": 1.75, + "learning_rate": 1.1604211172014331e-05, + "loss": 2.0456, + "step": 8611 + }, + { + "epoch": 1.75, + "learning_rate": 1.160250618323458e-05, + "loss": 2.1616, + "step": 8612 + }, + { + "epoch": 1.75, + "learning_rate": 1.1600801146641004e-05, + "loss": 2.0904, + "step": 8613 + }, + { + "epoch": 1.75, + "learning_rate": 1.1599096062284484e-05, + "loss": 2.0776, + "step": 8614 + }, + { + "epoch": 1.75, + "learning_rate": 1.1597390930215893e-05, + "loss": 2.1499, + "step": 8615 + }, + { + "epoch": 1.75, + "learning_rate": 1.1595685750486103e-05, + "loss": 2.0832, + "step": 8616 + }, + { + "epoch": 1.75, + "learning_rate": 1.1593980523145993e-05, + "loss": 2.1628, + "step": 8617 + }, + { + "epoch": 1.75, + "learning_rate": 1.159227524824644e-05, + "loss": 2.1351, + "step": 8618 + }, + { + "epoch": 1.75, + "learning_rate": 1.1590569925838329e-05, + "loss": 2.1434, + "step": 8619 + }, + { + "epoch": 1.75, + "learning_rate": 1.1588864555972538e-05, + "loss": 2.1241, + "step": 8620 + }, + { + "epoch": 1.75, + "learning_rate": 1.1587159138699952e-05, + "loss": 2.1447, + "step": 8621 + }, + { + "epoch": 1.75, + "learning_rate": 1.1585453674071451e-05, + "loss": 2.1131, + "step": 8622 + }, + { + "epoch": 1.75, + "learning_rate": 1.1583748162137924e-05, + "loss": 2.2713, + "step": 8623 + }, + { + "epoch": 1.75, + "learning_rate": 1.1582042602950261e-05, + "loss": 2.158, + "step": 8624 + }, + { + "epoch": 1.75, + "learning_rate": 1.1580336996559347e-05, + "loss": 2.0785, + "step": 8625 + }, + { + "epoch": 1.75, + "learning_rate": 1.157863134301607e-05, + "loss": 2.1425, + "step": 8626 + }, + { + "epoch": 1.75, + "learning_rate": 1.1576925642371325e-05, + "loss": 2.1509, + "step": 8627 + }, + { + "epoch": 1.75, + "learning_rate": 1.1575219894676002e-05, + "loss": 2.1715, + "step": 8628 + }, + { + "epoch": 1.75, + "learning_rate": 1.1573514099980998e-05, + "loss": 2.1117, + "step": 8629 + }, + { + "epoch": 1.75, + "learning_rate": 1.1571808258337209e-05, + "loss": 2.1663, + "step": 8630 + }, + { + "epoch": 1.75, + "learning_rate": 1.157010236979553e-05, + "loss": 2.131, + "step": 8631 + }, + { + "epoch": 1.75, + "learning_rate": 1.1568396434406861e-05, + "loss": 2.1339, + "step": 8632 + }, + { + "epoch": 1.75, + "learning_rate": 1.1566690452222097e-05, + "loss": 2.0997, + "step": 8633 + }, + { + "epoch": 1.75, + "learning_rate": 1.1564984423292145e-05, + "loss": 2.1743, + "step": 8634 + }, + { + "epoch": 1.75, + "learning_rate": 1.1563278347667909e-05, + "loss": 2.0137, + "step": 8635 + }, + { + "epoch": 1.75, + "learning_rate": 1.1561572225400288e-05, + "loss": 2.1868, + "step": 8636 + }, + { + "epoch": 1.75, + "learning_rate": 1.1559866056540189e-05, + "loss": 2.0913, + "step": 8637 + }, + { + "epoch": 1.75, + "learning_rate": 1.1558159841138515e-05, + "loss": 2.0844, + "step": 8638 + }, + { + "epoch": 1.75, + "learning_rate": 1.1556453579246183e-05, + "loss": 2.078, + "step": 8639 + }, + { + "epoch": 1.75, + "learning_rate": 1.1554747270914098e-05, + "loss": 2.1128, + "step": 8640 + }, + { + "epoch": 1.75, + "learning_rate": 1.1553040916193168e-05, + "loss": 2.1961, + "step": 8641 + }, + { + "epoch": 1.75, + "learning_rate": 1.1551334515134307e-05, + "loss": 2.1721, + "step": 8642 + }, + { + "epoch": 1.75, + "learning_rate": 1.154962806778843e-05, + "loss": 2.1784, + "step": 8643 + }, + { + "epoch": 1.75, + "learning_rate": 1.1547921574206455e-05, + "loss": 2.1172, + "step": 8644 + }, + { + "epoch": 1.75, + "learning_rate": 1.1546215034439295e-05, + "loss": 2.1148, + "step": 8645 + }, + { + "epoch": 1.75, + "learning_rate": 1.1544508448537864e-05, + "loss": 2.1797, + "step": 8646 + }, + { + "epoch": 1.76, + "learning_rate": 1.1542801816553088e-05, + "loss": 2.1521, + "step": 8647 + }, + { + "epoch": 1.76, + "learning_rate": 1.1541095138535884e-05, + "loss": 2.1435, + "step": 8648 + }, + { + "epoch": 1.76, + "learning_rate": 1.1539388414537178e-05, + "loss": 2.1246, + "step": 8649 + }, + { + "epoch": 1.76, + "learning_rate": 1.1537681644607888e-05, + "loss": 2.185, + "step": 8650 + }, + { + "epoch": 1.76, + "learning_rate": 1.1535974828798942e-05, + "loss": 2.2, + "step": 8651 + }, + { + "epoch": 1.76, + "learning_rate": 1.1534267967161266e-05, + "loss": 2.1538, + "step": 8652 + }, + { + "epoch": 1.76, + "learning_rate": 1.1532561059745784e-05, + "loss": 2.1458, + "step": 8653 + }, + { + "epoch": 1.76, + "learning_rate": 1.1530854106603426e-05, + "loss": 2.163, + "step": 8654 + }, + { + "epoch": 1.76, + "learning_rate": 1.1529147107785129e-05, + "loss": 2.1581, + "step": 8655 + }, + { + "epoch": 1.76, + "learning_rate": 1.1527440063341817e-05, + "loss": 2.1749, + "step": 8656 + }, + { + "epoch": 1.76, + "learning_rate": 1.1525732973324424e-05, + "loss": 2.1481, + "step": 8657 + }, + { + "epoch": 1.76, + "learning_rate": 1.1524025837783883e-05, + "loss": 2.1692, + "step": 8658 + }, + { + "epoch": 1.76, + "learning_rate": 1.1522318656771137e-05, + "loss": 2.0819, + "step": 8659 + }, + { + "epoch": 1.76, + "learning_rate": 1.1520611430337116e-05, + "loss": 2.0327, + "step": 8660 + }, + { + "epoch": 1.76, + "learning_rate": 1.1518904158532762e-05, + "loss": 2.1722, + "step": 8661 + }, + { + "epoch": 1.76, + "learning_rate": 1.1517196841409012e-05, + "loss": 2.0778, + "step": 8662 + }, + { + "epoch": 1.76, + "learning_rate": 1.1515489479016803e-05, + "loss": 2.1922, + "step": 8663 + }, + { + "epoch": 1.76, + "learning_rate": 1.1513782071407089e-05, + "loss": 2.0984, + "step": 8664 + }, + { + "epoch": 1.76, + "learning_rate": 1.1512074618630806e-05, + "loss": 2.1743, + "step": 8665 + }, + { + "epoch": 1.76, + "learning_rate": 1.1510367120738899e-05, + "loss": 2.1236, + "step": 8666 + }, + { + "epoch": 1.76, + "learning_rate": 1.1508659577782317e-05, + "loss": 2.069, + "step": 8667 + }, + { + "epoch": 1.76, + "learning_rate": 1.1506951989812005e-05, + "loss": 2.0309, + "step": 8668 + }, + { + "epoch": 1.76, + "learning_rate": 1.1505244356878918e-05, + "loss": 2.1166, + "step": 8669 + }, + { + "epoch": 1.76, + "learning_rate": 1.1503536679033997e-05, + "loss": 2.0764, + "step": 8670 + }, + { + "epoch": 1.76, + "learning_rate": 1.1501828956328203e-05, + "loss": 2.1588, + "step": 8671 + }, + { + "epoch": 1.76, + "learning_rate": 1.1500121188812483e-05, + "loss": 2.1269, + "step": 8672 + }, + { + "epoch": 1.76, + "learning_rate": 1.1498413376537794e-05, + "loss": 2.1326, + "step": 8673 + }, + { + "epoch": 1.76, + "learning_rate": 1.1496705519555092e-05, + "loss": 2.1923, + "step": 8674 + }, + { + "epoch": 1.76, + "learning_rate": 1.1494997617915334e-05, + "loss": 2.0679, + "step": 8675 + }, + { + "epoch": 1.76, + "learning_rate": 1.1493289671669479e-05, + "loss": 2.2199, + "step": 8676 + }, + { + "epoch": 1.76, + "learning_rate": 1.1491581680868485e-05, + "loss": 2.093, + "step": 8677 + }, + { + "epoch": 1.76, + "learning_rate": 1.1489873645563309e-05, + "loss": 2.1284, + "step": 8678 + }, + { + "epoch": 1.76, + "learning_rate": 1.1488165565804926e-05, + "loss": 2.1607, + "step": 8679 + }, + { + "epoch": 1.76, + "learning_rate": 1.148645744164429e-05, + "loss": 2.0678, + "step": 8680 + }, + { + "epoch": 1.76, + "learning_rate": 1.1484749273132372e-05, + "loss": 2.1845, + "step": 8681 + }, + { + "epoch": 1.76, + "learning_rate": 1.1483041060320125e-05, + "loss": 2.1114, + "step": 8682 + }, + { + "epoch": 1.76, + "learning_rate": 1.1481332803258536e-05, + "loss": 2.1003, + "step": 8683 + }, + { + "epoch": 1.76, + "learning_rate": 1.1479624501998564e-05, + "loss": 2.0962, + "step": 8684 + }, + { + "epoch": 1.76, + "learning_rate": 1.147791615659118e-05, + "loss": 2.1042, + "step": 8685 + }, + { + "epoch": 1.76, + "learning_rate": 1.1476207767087355e-05, + "loss": 2.1783, + "step": 8686 + }, + { + "epoch": 1.76, + "learning_rate": 1.147449933353806e-05, + "loss": 2.0584, + "step": 8687 + }, + { + "epoch": 1.76, + "learning_rate": 1.1472790855994275e-05, + "loss": 2.2069, + "step": 8688 + }, + { + "epoch": 1.76, + "learning_rate": 1.1471082334506975e-05, + "loss": 2.1243, + "step": 8689 + }, + { + "epoch": 1.76, + "learning_rate": 1.1469373769127134e-05, + "loss": 2.186, + "step": 8690 + }, + { + "epoch": 1.76, + "learning_rate": 1.1467665159905732e-05, + "loss": 2.1178, + "step": 8691 + }, + { + "epoch": 1.76, + "learning_rate": 1.1465956506893748e-05, + "loss": 2.1024, + "step": 8692 + }, + { + "epoch": 1.76, + "learning_rate": 1.1464247810142162e-05, + "loss": 2.1441, + "step": 8693 + }, + { + "epoch": 1.76, + "learning_rate": 1.1462539069701957e-05, + "loss": 2.0915, + "step": 8694 + }, + { + "epoch": 1.76, + "learning_rate": 1.1460830285624119e-05, + "loss": 2.0179, + "step": 8695 + }, + { + "epoch": 1.77, + "learning_rate": 1.1459121457959628e-05, + "loss": 2.0966, + "step": 8696 + }, + { + "epoch": 1.77, + "learning_rate": 1.1457412586759473e-05, + "loss": 2.1817, + "step": 8697 + }, + { + "epoch": 1.77, + "learning_rate": 1.145570367207464e-05, + "loss": 2.1358, + "step": 8698 + }, + { + "epoch": 1.77, + "learning_rate": 1.1453994713956122e-05, + "loss": 2.1635, + "step": 8699 + }, + { + "epoch": 1.77, + "learning_rate": 1.1452285712454905e-05, + "loss": 2.1747, + "step": 8700 + }, + { + "epoch": 1.77, + "learning_rate": 1.1450576667621982e-05, + "loss": 2.1594, + "step": 8701 + }, + { + "epoch": 1.77, + "learning_rate": 1.1448867579508338e-05, + "loss": 2.1602, + "step": 8702 + }, + { + "epoch": 1.77, + "learning_rate": 1.1447158448164981e-05, + "loss": 2.1089, + "step": 8703 + }, + { + "epoch": 1.77, + "learning_rate": 1.1445449273642897e-05, + "loss": 2.1378, + "step": 8704 + }, + { + "epoch": 1.77, + "learning_rate": 1.1443740055993084e-05, + "loss": 2.153, + "step": 8705 + }, + { + "epoch": 1.77, + "learning_rate": 1.1442030795266543e-05, + "loss": 2.0346, + "step": 8706 + }, + { + "epoch": 1.77, + "learning_rate": 1.1440321491514263e-05, + "loss": 2.0662, + "step": 8707 + }, + { + "epoch": 1.77, + "learning_rate": 1.1438612144787255e-05, + "loss": 2.2388, + "step": 8708 + }, + { + "epoch": 1.77, + "learning_rate": 1.1436902755136521e-05, + "loss": 2.1196, + "step": 8709 + }, + { + "epoch": 1.77, + "learning_rate": 1.1435193322613057e-05, + "loss": 2.1039, + "step": 8710 + }, + { + "epoch": 1.77, + "learning_rate": 1.1433483847267869e-05, + "loss": 2.1538, + "step": 8711 + }, + { + "epoch": 1.77, + "learning_rate": 1.1431774329151962e-05, + "loss": 2.0876, + "step": 8712 + }, + { + "epoch": 1.77, + "learning_rate": 1.1430064768316347e-05, + "loss": 2.1029, + "step": 8713 + }, + { + "epoch": 1.77, + "learning_rate": 1.1428355164812028e-05, + "loss": 2.076, + "step": 8714 + }, + { + "epoch": 1.77, + "learning_rate": 1.1426645518690015e-05, + "loss": 2.1984, + "step": 8715 + }, + { + "epoch": 1.77, + "learning_rate": 1.1424935830001322e-05, + "loss": 2.0966, + "step": 8716 + }, + { + "epoch": 1.77, + "learning_rate": 1.1423226098796956e-05, + "loss": 2.1424, + "step": 8717 + }, + { + "epoch": 1.77, + "learning_rate": 1.142151632512793e-05, + "loss": 2.1285, + "step": 8718 + }, + { + "epoch": 1.77, + "learning_rate": 1.1419806509045266e-05, + "loss": 2.1809, + "step": 8719 + }, + { + "epoch": 1.77, + "learning_rate": 1.141809665059997e-05, + "loss": 2.1502, + "step": 8720 + }, + { + "epoch": 1.77, + "learning_rate": 1.1416386749843065e-05, + "loss": 2.1144, + "step": 8721 + }, + { + "epoch": 1.77, + "learning_rate": 1.1414676806825562e-05, + "loss": 2.1461, + "step": 8722 + }, + { + "epoch": 1.77, + "learning_rate": 1.141296682159849e-05, + "loss": 2.1991, + "step": 8723 + }, + { + "epoch": 1.77, + "learning_rate": 1.1411256794212864e-05, + "loss": 2.1436, + "step": 8724 + }, + { + "epoch": 1.77, + "learning_rate": 1.1409546724719708e-05, + "loss": 2.159, + "step": 8725 + }, + { + "epoch": 1.77, + "learning_rate": 1.1407836613170045e-05, + "loss": 2.1384, + "step": 8726 + }, + { + "epoch": 1.77, + "learning_rate": 1.1406126459614892e-05, + "loss": 2.1017, + "step": 8727 + }, + { + "epoch": 1.77, + "learning_rate": 1.1404416264105287e-05, + "loss": 2.1328, + "step": 8728 + }, + { + "epoch": 1.77, + "learning_rate": 1.1402706026692251e-05, + "loss": 2.1045, + "step": 8729 + }, + { + "epoch": 1.77, + "learning_rate": 1.1400995747426814e-05, + "loss": 2.1468, + "step": 8730 + }, + { + "epoch": 1.77, + "learning_rate": 1.1399285426359999e-05, + "loss": 2.1058, + "step": 8731 + }, + { + "epoch": 1.77, + "learning_rate": 1.1397575063542842e-05, + "loss": 2.1344, + "step": 8732 + }, + { + "epoch": 1.77, + "learning_rate": 1.1395864659026377e-05, + "loss": 2.1534, + "step": 8733 + }, + { + "epoch": 1.77, + "learning_rate": 1.1394154212861634e-05, + "loss": 2.1658, + "step": 8734 + }, + { + "epoch": 1.77, + "learning_rate": 1.1392443725099646e-05, + "loss": 2.1174, + "step": 8735 + }, + { + "epoch": 1.77, + "learning_rate": 1.1390733195791455e-05, + "loss": 2.13, + "step": 8736 + }, + { + "epoch": 1.77, + "learning_rate": 1.1389022624988089e-05, + "loss": 2.1479, + "step": 8737 + }, + { + "epoch": 1.77, + "learning_rate": 1.1387312012740592e-05, + "loss": 2.1695, + "step": 8738 + }, + { + "epoch": 1.77, + "learning_rate": 1.1385601359100002e-05, + "loss": 2.0976, + "step": 8739 + }, + { + "epoch": 1.77, + "learning_rate": 1.1383890664117364e-05, + "loss": 2.1233, + "step": 8740 + }, + { + "epoch": 1.77, + "learning_rate": 1.138217992784371e-05, + "loss": 2.0961, + "step": 8741 + }, + { + "epoch": 1.77, + "learning_rate": 1.1380469150330089e-05, + "loss": 2.1164, + "step": 8742 + }, + { + "epoch": 1.77, + "learning_rate": 1.1378758331627546e-05, + "loss": 2.1459, + "step": 8743 + }, + { + "epoch": 1.77, + "learning_rate": 1.1377047471787128e-05, + "loss": 2.1215, + "step": 8744 + }, + { + "epoch": 1.78, + "learning_rate": 1.1375336570859879e-05, + "loss": 2.1477, + "step": 8745 + }, + { + "epoch": 1.78, + "learning_rate": 1.1373625628896844e-05, + "loss": 2.1471, + "step": 8746 + }, + { + "epoch": 1.78, + "learning_rate": 1.1371914645949074e-05, + "loss": 2.1705, + "step": 8747 + }, + { + "epoch": 1.78, + "learning_rate": 1.1370203622067624e-05, + "loss": 2.1394, + "step": 8748 + }, + { + "epoch": 1.78, + "learning_rate": 1.1368492557303543e-05, + "loss": 2.1682, + "step": 8749 + }, + { + "epoch": 1.78, + "learning_rate": 1.1366781451707882e-05, + "loss": 2.0892, + "step": 8750 + }, + { + "epoch": 1.78, + "learning_rate": 1.1365070305331696e-05, + "loss": 2.1027, + "step": 8751 + }, + { + "epoch": 1.78, + "learning_rate": 1.1363359118226041e-05, + "loss": 2.167, + "step": 8752 + }, + { + "epoch": 1.78, + "learning_rate": 1.1361647890441973e-05, + "loss": 2.0311, + "step": 8753 + }, + { + "epoch": 1.78, + "learning_rate": 1.1359936622030552e-05, + "loss": 2.1297, + "step": 8754 + }, + { + "epoch": 1.78, + "learning_rate": 1.1358225313042833e-05, + "loss": 2.1625, + "step": 8755 + }, + { + "epoch": 1.78, + "learning_rate": 1.1356513963529879e-05, + "loss": 2.1798, + "step": 8756 + }, + { + "epoch": 1.78, + "learning_rate": 1.1354802573542747e-05, + "loss": 2.0827, + "step": 8757 + }, + { + "epoch": 1.78, + "learning_rate": 1.1353091143132509e-05, + "loss": 2.0565, + "step": 8758 + }, + { + "epoch": 1.78, + "learning_rate": 1.1351379672350219e-05, + "loss": 2.2157, + "step": 8759 + }, + { + "epoch": 1.78, + "learning_rate": 1.1349668161246945e-05, + "loss": 2.0595, + "step": 8760 + }, + { + "epoch": 1.78, + "learning_rate": 1.1347956609873758e-05, + "loss": 2.138, + "step": 8761 + }, + { + "epoch": 1.78, + "learning_rate": 1.1346245018281713e-05, + "loss": 2.1519, + "step": 8762 + }, + { + "epoch": 1.78, + "learning_rate": 1.1344533386521896e-05, + "loss": 2.2431, + "step": 8763 + }, + { + "epoch": 1.78, + "learning_rate": 1.1342821714645365e-05, + "loss": 2.121, + "step": 8764 + }, + { + "epoch": 1.78, + "learning_rate": 1.1341110002703195e-05, + "loss": 2.0693, + "step": 8765 + }, + { + "epoch": 1.78, + "learning_rate": 1.1339398250746457e-05, + "loss": 2.1339, + "step": 8766 + }, + { + "epoch": 1.78, + "learning_rate": 1.1337686458826218e-05, + "loss": 2.2489, + "step": 8767 + }, + { + "epoch": 1.78, + "learning_rate": 1.1335974626993564e-05, + "loss": 2.1377, + "step": 8768 + }, + { + "epoch": 1.78, + "learning_rate": 1.1334262755299567e-05, + "loss": 2.0958, + "step": 8769 + }, + { + "epoch": 1.78, + "learning_rate": 1.13325508437953e-05, + "loss": 2.1049, + "step": 8770 + }, + { + "epoch": 1.78, + "learning_rate": 1.1330838892531841e-05, + "loss": 2.127, + "step": 8771 + }, + { + "epoch": 1.78, + "learning_rate": 1.1329126901560277e-05, + "loss": 2.0616, + "step": 8772 + }, + { + "epoch": 1.78, + "learning_rate": 1.1327414870931683e-05, + "loss": 2.218, + "step": 8773 + }, + { + "epoch": 1.78, + "learning_rate": 1.1325702800697143e-05, + "loss": 2.1111, + "step": 8774 + }, + { + "epoch": 1.78, + "learning_rate": 1.1323990690907734e-05, + "loss": 2.16, + "step": 8775 + }, + { + "epoch": 1.78, + "learning_rate": 1.1322278541614544e-05, + "loss": 2.1518, + "step": 8776 + }, + { + "epoch": 1.78, + "learning_rate": 1.132056635286866e-05, + "loss": 2.1659, + "step": 8777 + }, + { + "epoch": 1.78, + "learning_rate": 1.1318854124721168e-05, + "loss": 2.1236, + "step": 8778 + }, + { + "epoch": 1.78, + "learning_rate": 1.1317141857223151e-05, + "loss": 2.1565, + "step": 8779 + }, + { + "epoch": 1.78, + "learning_rate": 1.1315429550425705e-05, + "loss": 2.0713, + "step": 8780 + }, + { + "epoch": 1.78, + "learning_rate": 1.1313717204379912e-05, + "loss": 2.1103, + "step": 8781 + }, + { + "epoch": 1.78, + "learning_rate": 1.131200481913687e-05, + "loss": 2.1187, + "step": 8782 + }, + { + "epoch": 1.78, + "learning_rate": 1.1310292394747668e-05, + "loss": 2.0868, + "step": 8783 + }, + { + "epoch": 1.78, + "learning_rate": 1.13085799312634e-05, + "loss": 2.11, + "step": 8784 + }, + { + "epoch": 1.78, + "learning_rate": 1.1306867428735162e-05, + "loss": 2.1806, + "step": 8785 + }, + { + "epoch": 1.78, + "learning_rate": 1.1305154887214043e-05, + "loss": 2.1787, + "step": 8786 + }, + { + "epoch": 1.78, + "learning_rate": 1.130344230675115e-05, + "loss": 2.1409, + "step": 8787 + }, + { + "epoch": 1.78, + "learning_rate": 1.1301729687397576e-05, + "loss": 2.0969, + "step": 8788 + }, + { + "epoch": 1.78, + "learning_rate": 1.130001702920442e-05, + "loss": 2.1459, + "step": 8789 + }, + { + "epoch": 1.78, + "learning_rate": 1.1298304332222782e-05, + "loss": 2.0504, + "step": 8790 + }, + { + "epoch": 1.78, + "learning_rate": 1.1296591596503763e-05, + "loss": 2.1067, + "step": 8791 + }, + { + "epoch": 1.78, + "learning_rate": 1.129487882209847e-05, + "loss": 2.1552, + "step": 8792 + }, + { + "epoch": 1.78, + "learning_rate": 1.1293166009058005e-05, + "loss": 2.0736, + "step": 8793 + }, + { + "epoch": 1.79, + "learning_rate": 1.1291453157433472e-05, + "loss": 2.1355, + "step": 8794 + }, + { + "epoch": 1.79, + "learning_rate": 1.1289740267275975e-05, + "loss": 2.0195, + "step": 8795 + }, + { + "epoch": 1.79, + "learning_rate": 1.1288027338636622e-05, + "loss": 2.1284, + "step": 8796 + }, + { + "epoch": 1.79, + "learning_rate": 1.1286314371566527e-05, + "loss": 2.1335, + "step": 8797 + }, + { + "epoch": 1.79, + "learning_rate": 1.1284601366116796e-05, + "loss": 2.2074, + "step": 8798 + }, + { + "epoch": 1.79, + "learning_rate": 1.1282888322338536e-05, + "loss": 2.1799, + "step": 8799 + }, + { + "epoch": 1.79, + "learning_rate": 1.1281175240282867e-05, + "loss": 2.0983, + "step": 8800 + }, + { + "epoch": 1.79, + "learning_rate": 1.1279462120000894e-05, + "loss": 2.2098, + "step": 8801 + }, + { + "epoch": 1.79, + "learning_rate": 1.1277748961543738e-05, + "loss": 2.1456, + "step": 8802 + }, + { + "epoch": 1.79, + "learning_rate": 1.1276035764962508e-05, + "loss": 2.1226, + "step": 8803 + }, + { + "epoch": 1.79, + "learning_rate": 1.1274322530308326e-05, + "loss": 2.1348, + "step": 8804 + }, + { + "epoch": 1.79, + "learning_rate": 1.1272609257632305e-05, + "loss": 2.1152, + "step": 8805 + }, + { + "epoch": 1.79, + "learning_rate": 1.1270895946985568e-05, + "loss": 2.1736, + "step": 8806 + }, + { + "epoch": 1.79, + "learning_rate": 1.126918259841923e-05, + "loss": 2.0425, + "step": 8807 + }, + { + "epoch": 1.79, + "learning_rate": 1.1267469211984419e-05, + "loss": 2.1746, + "step": 8808 + }, + { + "epoch": 1.79, + "learning_rate": 1.126575578773225e-05, + "loss": 2.1098, + "step": 8809 + }, + { + "epoch": 1.79, + "learning_rate": 1.126404232571385e-05, + "loss": 2.0952, + "step": 8810 + }, + { + "epoch": 1.79, + "learning_rate": 1.126232882598034e-05, + "loss": 2.1786, + "step": 8811 + }, + { + "epoch": 1.79, + "learning_rate": 1.1260615288582852e-05, + "loss": 2.0245, + "step": 8812 + }, + { + "epoch": 1.79, + "learning_rate": 1.125890171357251e-05, + "loss": 2.0935, + "step": 8813 + }, + { + "epoch": 1.79, + "learning_rate": 1.1257188101000437e-05, + "loss": 2.1331, + "step": 8814 + }, + { + "epoch": 1.79, + "learning_rate": 1.1255474450917769e-05, + "loss": 2.1578, + "step": 8815 + }, + { + "epoch": 1.79, + "learning_rate": 1.1253760763375628e-05, + "loss": 2.0572, + "step": 8816 + }, + { + "epoch": 1.79, + "learning_rate": 1.1252047038425152e-05, + "loss": 1.9943, + "step": 8817 + }, + { + "epoch": 1.79, + "learning_rate": 1.1250333276117474e-05, + "loss": 2.1476, + "step": 8818 + }, + { + "epoch": 1.79, + "learning_rate": 1.1248619476503717e-05, + "loss": 2.0751, + "step": 8819 + }, + { + "epoch": 1.79, + "learning_rate": 1.1246905639635029e-05, + "loss": 2.147, + "step": 8820 + }, + { + "epoch": 1.79, + "learning_rate": 1.1245191765562536e-05, + "loss": 2.1183, + "step": 8821 + }, + { + "epoch": 1.79, + "learning_rate": 1.1243477854337382e-05, + "loss": 2.0819, + "step": 8822 + }, + { + "epoch": 1.79, + "learning_rate": 1.1241763906010694e-05, + "loss": 2.062, + "step": 8823 + }, + { + "epoch": 1.79, + "learning_rate": 1.1240049920633624e-05, + "loss": 2.1434, + "step": 8824 + }, + { + "epoch": 1.79, + "learning_rate": 1.1238335898257305e-05, + "loss": 2.0648, + "step": 8825 + }, + { + "epoch": 1.79, + "learning_rate": 1.1236621838932875e-05, + "loss": 2.0788, + "step": 8826 + }, + { + "epoch": 1.79, + "learning_rate": 1.123490774271148e-05, + "loss": 2.104, + "step": 8827 + }, + { + "epoch": 1.79, + "learning_rate": 1.1233193609644268e-05, + "loss": 2.1927, + "step": 8828 + }, + { + "epoch": 1.79, + "learning_rate": 1.1231479439782378e-05, + "loss": 2.1064, + "step": 8829 + }, + { + "epoch": 1.79, + "learning_rate": 1.1229765233176955e-05, + "loss": 2.131, + "step": 8830 + }, + { + "epoch": 1.79, + "learning_rate": 1.1228050989879145e-05, + "loss": 2.1408, + "step": 8831 + }, + { + "epoch": 1.79, + "learning_rate": 1.1226336709940097e-05, + "loss": 2.1749, + "step": 8832 + }, + { + "epoch": 1.79, + "learning_rate": 1.1224622393410964e-05, + "loss": 2.1059, + "step": 8833 + }, + { + "epoch": 1.79, + "learning_rate": 1.1222908040342894e-05, + "loss": 2.0858, + "step": 8834 + }, + { + "epoch": 1.79, + "learning_rate": 1.1221193650787034e-05, + "loss": 2.0104, + "step": 8835 + }, + { + "epoch": 1.79, + "learning_rate": 1.1219479224794534e-05, + "loss": 2.0496, + "step": 8836 + }, + { + "epoch": 1.79, + "learning_rate": 1.1217764762416556e-05, + "loss": 2.1681, + "step": 8837 + }, + { + "epoch": 1.79, + "learning_rate": 1.1216050263704253e-05, + "loss": 2.0596, + "step": 8838 + }, + { + "epoch": 1.79, + "learning_rate": 1.1214335728708771e-05, + "loss": 2.1087, + "step": 8839 + }, + { + "epoch": 1.79, + "learning_rate": 1.1212621157481276e-05, + "loss": 2.118, + "step": 8840 + }, + { + "epoch": 1.79, + "learning_rate": 1.121090655007292e-05, + "loss": 2.1865, + "step": 8841 + }, + { + "epoch": 1.79, + "learning_rate": 1.1209191906534863e-05, + "loss": 2.1147, + "step": 8842 + }, + { + "epoch": 1.79, + "learning_rate": 1.1207477226918266e-05, + "loss": 2.1215, + "step": 8843 + }, + { + "epoch": 1.8, + "learning_rate": 1.1205762511274293e-05, + "loss": 2.0799, + "step": 8844 + }, + { + "epoch": 1.8, + "learning_rate": 1.1204047759654099e-05, + "loss": 2.0514, + "step": 8845 + }, + { + "epoch": 1.8, + "learning_rate": 1.1202332972108846e-05, + "loss": 2.204, + "step": 8846 + }, + { + "epoch": 1.8, + "learning_rate": 1.1200618148689705e-05, + "loss": 2.1313, + "step": 8847 + }, + { + "epoch": 1.8, + "learning_rate": 1.119890328944784e-05, + "loss": 2.071, + "step": 8848 + }, + { + "epoch": 1.8, + "learning_rate": 1.1197188394434413e-05, + "loss": 2.0921, + "step": 8849 + }, + { + "epoch": 1.8, + "learning_rate": 1.1195473463700592e-05, + "loss": 2.1315, + "step": 8850 + }, + { + "epoch": 1.8, + "learning_rate": 1.1193758497297544e-05, + "loss": 2.1553, + "step": 8851 + }, + { + "epoch": 1.8, + "learning_rate": 1.1192043495276444e-05, + "loss": 2.0614, + "step": 8852 + }, + { + "epoch": 1.8, + "learning_rate": 1.1190328457688458e-05, + "loss": 2.0995, + "step": 8853 + }, + { + "epoch": 1.8, + "learning_rate": 1.118861338458476e-05, + "loss": 2.1137, + "step": 8854 + }, + { + "epoch": 1.8, + "learning_rate": 1.1186898276016519e-05, + "loss": 2.0119, + "step": 8855 + }, + { + "epoch": 1.8, + "learning_rate": 1.1185183132034904e-05, + "loss": 2.0928, + "step": 8856 + }, + { + "epoch": 1.8, + "learning_rate": 1.1183467952691102e-05, + "loss": 2.225, + "step": 8857 + }, + { + "epoch": 1.8, + "learning_rate": 1.1181752738036282e-05, + "loss": 2.1055, + "step": 8858 + }, + { + "epoch": 1.8, + "learning_rate": 1.1180037488121622e-05, + "loss": 2.1128, + "step": 8859 + }, + { + "epoch": 1.8, + "learning_rate": 1.1178322202998298e-05, + "loss": 2.1695, + "step": 8860 + }, + { + "epoch": 1.8, + "learning_rate": 1.1176606882717487e-05, + "loss": 2.1552, + "step": 8861 + }, + { + "epoch": 1.8, + "learning_rate": 1.1174891527330374e-05, + "loss": 2.113, + "step": 8862 + }, + { + "epoch": 1.8, + "learning_rate": 1.1173176136888137e-05, + "loss": 2.1055, + "step": 8863 + }, + { + "epoch": 1.8, + "learning_rate": 1.1171460711441956e-05, + "loss": 2.2173, + "step": 8864 + }, + { + "epoch": 1.8, + "learning_rate": 1.1169745251043019e-05, + "loss": 2.0498, + "step": 8865 + }, + { + "epoch": 1.8, + "learning_rate": 1.1168029755742506e-05, + "loss": 2.1339, + "step": 8866 + }, + { + "epoch": 1.8, + "learning_rate": 1.1166314225591603e-05, + "loss": 2.1452, + "step": 8867 + }, + { + "epoch": 1.8, + "learning_rate": 1.1164598660641498e-05, + "loss": 2.1236, + "step": 8868 + }, + { + "epoch": 1.8, + "learning_rate": 1.1162883060943376e-05, + "loss": 2.1022, + "step": 8869 + }, + { + "epoch": 1.8, + "learning_rate": 1.1161167426548425e-05, + "loss": 2.1229, + "step": 8870 + }, + { + "epoch": 1.8, + "learning_rate": 1.1159451757507836e-05, + "loss": 2.0545, + "step": 8871 + }, + { + "epoch": 1.8, + "learning_rate": 1.1157736053872798e-05, + "loss": 2.1459, + "step": 8872 + }, + { + "epoch": 1.8, + "learning_rate": 1.1156020315694505e-05, + "loss": 2.2165, + "step": 8873 + }, + { + "epoch": 1.8, + "learning_rate": 1.1154304543024144e-05, + "loss": 2.1135, + "step": 8874 + }, + { + "epoch": 1.8, + "learning_rate": 1.1152588735912908e-05, + "loss": 2.1245, + "step": 8875 + }, + { + "epoch": 1.8, + "learning_rate": 1.1150872894412e-05, + "loss": 2.0895, + "step": 8876 + }, + { + "epoch": 1.8, + "learning_rate": 1.1149157018572607e-05, + "loss": 2.1595, + "step": 8877 + }, + { + "epoch": 1.8, + "learning_rate": 1.1147441108445931e-05, + "loss": 2.0601, + "step": 8878 + }, + { + "epoch": 1.8, + "learning_rate": 1.1145725164083166e-05, + "loss": 2.1618, + "step": 8879 + }, + { + "epoch": 1.8, + "learning_rate": 1.1144009185535506e-05, + "loss": 2.189, + "step": 8880 + }, + { + "epoch": 1.8, + "learning_rate": 1.1142293172854162e-05, + "loss": 2.097, + "step": 8881 + }, + { + "epoch": 1.8, + "learning_rate": 1.1140577126090326e-05, + "loss": 2.1028, + "step": 8882 + }, + { + "epoch": 1.8, + "learning_rate": 1.1138861045295205e-05, + "loss": 2.0542, + "step": 8883 + }, + { + "epoch": 1.8, + "learning_rate": 1.1137144930519992e-05, + "loss": 2.168, + "step": 8884 + }, + { + "epoch": 1.8, + "learning_rate": 1.11354287818159e-05, + "loss": 2.076, + "step": 8885 + }, + { + "epoch": 1.8, + "learning_rate": 1.1133712599234132e-05, + "loss": 2.1403, + "step": 8886 + }, + { + "epoch": 1.8, + "learning_rate": 1.1131996382825892e-05, + "loss": 2.1383, + "step": 8887 + }, + { + "epoch": 1.8, + "learning_rate": 1.1130280132642384e-05, + "loss": 2.1538, + "step": 8888 + }, + { + "epoch": 1.8, + "learning_rate": 1.1128563848734819e-05, + "loss": 2.0994, + "step": 8889 + }, + { + "epoch": 1.8, + "learning_rate": 1.1126847531154404e-05, + "loss": 2.1141, + "step": 8890 + }, + { + "epoch": 1.8, + "learning_rate": 1.1125131179952349e-05, + "loss": 2.231, + "step": 8891 + }, + { + "epoch": 1.8, + "learning_rate": 1.1123414795179868e-05, + "loss": 2.1002, + "step": 8892 + }, + { + "epoch": 1.81, + "learning_rate": 1.1121698376888169e-05, + "loss": 2.0481, + "step": 8893 + }, + { + "epoch": 1.81, + "learning_rate": 1.1119981925128465e-05, + "loss": 2.1709, + "step": 8894 + }, + { + "epoch": 1.81, + "learning_rate": 1.1118265439951965e-05, + "loss": 2.1118, + "step": 8895 + }, + { + "epoch": 1.81, + "learning_rate": 1.1116548921409893e-05, + "loss": 2.1855, + "step": 8896 + }, + { + "epoch": 1.81, + "learning_rate": 1.1114832369553461e-05, + "loss": 2.0783, + "step": 8897 + }, + { + "epoch": 1.81, + "learning_rate": 1.1113115784433883e-05, + "loss": 2.1799, + "step": 8898 + }, + { + "epoch": 1.81, + "learning_rate": 1.1111399166102379e-05, + "loss": 2.1328, + "step": 8899 + }, + { + "epoch": 1.81, + "learning_rate": 1.110968251461016e-05, + "loss": 2.1098, + "step": 8900 + }, + { + "epoch": 1.81, + "learning_rate": 1.110796583000846e-05, + "loss": 2.0548, + "step": 8901 + }, + { + "epoch": 1.81, + "learning_rate": 1.1106249112348491e-05, + "loss": 2.1916, + "step": 8902 + }, + { + "epoch": 1.81, + "learning_rate": 1.1104532361681475e-05, + "loss": 2.0544, + "step": 8903 + }, + { + "epoch": 1.81, + "learning_rate": 1.1102815578058632e-05, + "loss": 2.1567, + "step": 8904 + }, + { + "epoch": 1.81, + "learning_rate": 1.110109876153119e-05, + "loss": 2.1201, + "step": 8905 + }, + { + "epoch": 1.81, + "learning_rate": 1.1099381912150373e-05, + "loss": 2.136, + "step": 8906 + }, + { + "epoch": 1.81, + "learning_rate": 1.1097665029967405e-05, + "loss": 2.0952, + "step": 8907 + }, + { + "epoch": 1.81, + "learning_rate": 1.1095948115033513e-05, + "loss": 2.1205, + "step": 8908 + }, + { + "epoch": 1.81, + "learning_rate": 1.1094231167399922e-05, + "loss": 2.0803, + "step": 8909 + }, + { + "epoch": 1.81, + "learning_rate": 1.1092514187117865e-05, + "loss": 2.1607, + "step": 8910 + }, + { + "epoch": 1.81, + "learning_rate": 1.1090797174238568e-05, + "loss": 1.9888, + "step": 8911 + }, + { + "epoch": 1.81, + "learning_rate": 1.1089080128813261e-05, + "loss": 2.0989, + "step": 8912 + }, + { + "epoch": 1.81, + "learning_rate": 1.1087363050893179e-05, + "loss": 2.2005, + "step": 8913 + }, + { + "epoch": 1.81, + "learning_rate": 1.1085645940529553e-05, + "loss": 2.1809, + "step": 8914 + }, + { + "epoch": 1.81, + "learning_rate": 1.1083928797773608e-05, + "loss": 2.1073, + "step": 8915 + }, + { + "epoch": 1.81, + "learning_rate": 1.1082211622676593e-05, + "loss": 2.0996, + "step": 8916 + }, + { + "epoch": 1.81, + "learning_rate": 1.1080494415289732e-05, + "loss": 2.1707, + "step": 8917 + }, + { + "epoch": 1.81, + "learning_rate": 1.1078777175664267e-05, + "loss": 2.1136, + "step": 8918 + }, + { + "epoch": 1.81, + "learning_rate": 1.1077059903851433e-05, + "loss": 2.1431, + "step": 8919 + }, + { + "epoch": 1.81, + "learning_rate": 1.1075342599902462e-05, + "loss": 2.1712, + "step": 8920 + }, + { + "epoch": 1.81, + "learning_rate": 1.1073625263868605e-05, + "loss": 2.1891, + "step": 8921 + }, + { + "epoch": 1.81, + "learning_rate": 1.1071907895801097e-05, + "loss": 2.1704, + "step": 8922 + }, + { + "epoch": 1.81, + "learning_rate": 1.1070190495751176e-05, + "loss": 2.07, + "step": 8923 + }, + { + "epoch": 1.81, + "learning_rate": 1.1068473063770084e-05, + "loss": 2.1173, + "step": 8924 + }, + { + "epoch": 1.81, + "learning_rate": 1.1066755599909065e-05, + "loss": 2.1156, + "step": 8925 + }, + { + "epoch": 1.81, + "learning_rate": 1.1065038104219368e-05, + "loss": 2.1769, + "step": 8926 + }, + { + "epoch": 1.81, + "learning_rate": 1.106332057675223e-05, + "loss": 2.1388, + "step": 8927 + }, + { + "epoch": 1.81, + "learning_rate": 1.1061603017558899e-05, + "loss": 2.0738, + "step": 8928 + }, + { + "epoch": 1.81, + "learning_rate": 1.1059885426690624e-05, + "loss": 2.178, + "step": 8929 + }, + { + "epoch": 1.81, + "learning_rate": 1.1058167804198652e-05, + "loss": 2.1808, + "step": 8930 + }, + { + "epoch": 1.81, + "learning_rate": 1.105645015013423e-05, + "loss": 2.1245, + "step": 8931 + }, + { + "epoch": 1.81, + "learning_rate": 1.1054732464548605e-05, + "loss": 2.0862, + "step": 8932 + }, + { + "epoch": 1.81, + "learning_rate": 1.1053014747493036e-05, + "loss": 2.0796, + "step": 8933 + }, + { + "epoch": 1.81, + "learning_rate": 1.1051296999018765e-05, + "loss": 2.1799, + "step": 8934 + }, + { + "epoch": 1.81, + "learning_rate": 1.1049579219177049e-05, + "loss": 2.2272, + "step": 8935 + }, + { + "epoch": 1.81, + "learning_rate": 1.104786140801914e-05, + "loss": 2.195, + "step": 8936 + }, + { + "epoch": 1.81, + "learning_rate": 1.1046143565596295e-05, + "loss": 2.1272, + "step": 8937 + }, + { + "epoch": 1.81, + "learning_rate": 1.1044425691959765e-05, + "loss": 2.0999, + "step": 8938 + }, + { + "epoch": 1.81, + "learning_rate": 1.1042707787160809e-05, + "loss": 2.0694, + "step": 8939 + }, + { + "epoch": 1.81, + "learning_rate": 1.1040989851250678e-05, + "loss": 1.9994, + "step": 8940 + }, + { + "epoch": 1.81, + "learning_rate": 1.1039271884280639e-05, + "loss": 2.0327, + "step": 8941 + }, + { + "epoch": 1.82, + "learning_rate": 1.1037553886301946e-05, + "loss": 2.0685, + "step": 8942 + }, + { + "epoch": 1.82, + "learning_rate": 1.103583585736586e-05, + "loss": 2.0937, + "step": 8943 + }, + { + "epoch": 1.82, + "learning_rate": 1.1034117797523641e-05, + "loss": 2.0468, + "step": 8944 + }, + { + "epoch": 1.82, + "learning_rate": 1.1032399706826545e-05, + "loss": 2.1339, + "step": 8945 + }, + { + "epoch": 1.82, + "learning_rate": 1.1030681585325844e-05, + "loss": 2.05, + "step": 8946 + }, + { + "epoch": 1.82, + "learning_rate": 1.1028963433072797e-05, + "loss": 2.122, + "step": 8947 + }, + { + "epoch": 1.82, + "learning_rate": 1.1027245250118668e-05, + "loss": 2.1475, + "step": 8948 + }, + { + "epoch": 1.82, + "learning_rate": 1.1025527036514723e-05, + "loss": 2.1044, + "step": 8949 + }, + { + "epoch": 1.82, + "learning_rate": 1.1023808792312226e-05, + "loss": 2.1837, + "step": 8950 + }, + { + "epoch": 1.82, + "learning_rate": 1.1022090517562449e-05, + "loss": 2.1076, + "step": 8951 + }, + { + "epoch": 1.82, + "learning_rate": 1.1020372212316657e-05, + "loss": 2.1645, + "step": 8952 + }, + { + "epoch": 1.82, + "learning_rate": 1.101865387662612e-05, + "loss": 2.164, + "step": 8953 + }, + { + "epoch": 1.82, + "learning_rate": 1.1016935510542104e-05, + "loss": 2.1107, + "step": 8954 + }, + { + "epoch": 1.82, + "learning_rate": 1.1015217114115884e-05, + "loss": 2.1474, + "step": 8955 + }, + { + "epoch": 1.82, + "learning_rate": 1.101349868739873e-05, + "loss": 2.1576, + "step": 8956 + }, + { + "epoch": 1.82, + "learning_rate": 1.1011780230441916e-05, + "loss": 2.1299, + "step": 8957 + }, + { + "epoch": 1.82, + "learning_rate": 1.1010061743296712e-05, + "loss": 2.1169, + "step": 8958 + }, + { + "epoch": 1.82, + "learning_rate": 1.1008343226014394e-05, + "loss": 2.1068, + "step": 8959 + }, + { + "epoch": 1.82, + "learning_rate": 1.1006624678646238e-05, + "loss": 2.0746, + "step": 8960 + }, + { + "epoch": 1.82, + "learning_rate": 1.1004906101243522e-05, + "loss": 2.1355, + "step": 8961 + }, + { + "epoch": 1.82, + "learning_rate": 1.1003187493857521e-05, + "loss": 2.1679, + "step": 8962 + }, + { + "epoch": 1.82, + "learning_rate": 1.1001468856539512e-05, + "loss": 2.0187, + "step": 8963 + }, + { + "epoch": 1.82, + "learning_rate": 1.0999750189340772e-05, + "loss": 2.1102, + "step": 8964 + }, + { + "epoch": 1.82, + "learning_rate": 1.0998031492312586e-05, + "loss": 2.0381, + "step": 8965 + }, + { + "epoch": 1.82, + "learning_rate": 1.0996312765506234e-05, + "loss": 2.0912, + "step": 8966 + }, + { + "epoch": 1.82, + "learning_rate": 1.0994594008972994e-05, + "loss": 2.1624, + "step": 8967 + }, + { + "epoch": 1.82, + "learning_rate": 1.0992875222764149e-05, + "loss": 2.1987, + "step": 8968 + }, + { + "epoch": 1.82, + "learning_rate": 1.0991156406930978e-05, + "loss": 2.036, + "step": 8969 + }, + { + "epoch": 1.82, + "learning_rate": 1.0989437561524776e-05, + "loss": 2.1705, + "step": 8970 + }, + { + "epoch": 1.82, + "learning_rate": 1.0987718686596822e-05, + "loss": 2.1949, + "step": 8971 + }, + { + "epoch": 1.82, + "learning_rate": 1.09859997821984e-05, + "loss": 2.212, + "step": 8972 + }, + { + "epoch": 1.82, + "learning_rate": 1.0984280848380801e-05, + "loss": 2.0532, + "step": 8973 + }, + { + "epoch": 1.82, + "learning_rate": 1.0982561885195309e-05, + "loss": 2.0578, + "step": 8974 + }, + { + "epoch": 1.82, + "learning_rate": 1.0980842892693215e-05, + "loss": 2.0622, + "step": 8975 + }, + { + "epoch": 1.82, + "learning_rate": 1.0979123870925808e-05, + "loss": 2.142, + "step": 8976 + }, + { + "epoch": 1.82, + "learning_rate": 1.0977404819944376e-05, + "loss": 2.1582, + "step": 8977 + }, + { + "epoch": 1.82, + "learning_rate": 1.0975685739800216e-05, + "loss": 2.148, + "step": 8978 + }, + { + "epoch": 1.82, + "learning_rate": 1.097396663054461e-05, + "loss": 2.1458, + "step": 8979 + }, + { + "epoch": 1.82, + "learning_rate": 1.0972247492228859e-05, + "loss": 2.1265, + "step": 8980 + }, + { + "epoch": 1.82, + "learning_rate": 1.0970528324904255e-05, + "loss": 2.0946, + "step": 8981 + }, + { + "epoch": 1.82, + "learning_rate": 1.0968809128622094e-05, + "loss": 2.1706, + "step": 8982 + }, + { + "epoch": 1.82, + "learning_rate": 1.0967089903433668e-05, + "loss": 2.143, + "step": 8983 + }, + { + "epoch": 1.82, + "learning_rate": 1.0965370649390272e-05, + "loss": 2.0309, + "step": 8984 + }, + { + "epoch": 1.82, + "learning_rate": 1.096365136654321e-05, + "loss": 2.101, + "step": 8985 + }, + { + "epoch": 1.82, + "learning_rate": 1.0961932054943778e-05, + "loss": 2.067, + "step": 8986 + }, + { + "epoch": 1.82, + "learning_rate": 1.0960212714643273e-05, + "loss": 2.138, + "step": 8987 + }, + { + "epoch": 1.82, + "learning_rate": 1.0958493345692993e-05, + "loss": 2.0886, + "step": 8988 + }, + { + "epoch": 1.82, + "learning_rate": 1.0956773948144237e-05, + "loss": 2.0461, + "step": 8989 + }, + { + "epoch": 1.82, + "learning_rate": 1.0955054522048317e-05, + "loss": 2.0551, + "step": 8990 + }, + { + "epoch": 1.83, + "learning_rate": 1.0953335067456525e-05, + "loss": 2.0439, + "step": 8991 + }, + { + "epoch": 1.83, + "learning_rate": 1.095161558442017e-05, + "loss": 2.1472, + "step": 8992 + }, + { + "epoch": 1.83, + "learning_rate": 1.0949896072990551e-05, + "loss": 2.1636, + "step": 8993 + }, + { + "epoch": 1.83, + "learning_rate": 1.0948176533218974e-05, + "loss": 2.1368, + "step": 8994 + }, + { + "epoch": 1.83, + "learning_rate": 1.0946456965156751e-05, + "loss": 2.0302, + "step": 8995 + }, + { + "epoch": 1.83, + "learning_rate": 1.0944737368855183e-05, + "loss": 2.1644, + "step": 8996 + }, + { + "epoch": 1.83, + "learning_rate": 1.0943017744365578e-05, + "loss": 2.0878, + "step": 8997 + }, + { + "epoch": 1.83, + "learning_rate": 1.0941298091739246e-05, + "loss": 2.0344, + "step": 8998 + }, + { + "epoch": 1.83, + "learning_rate": 1.0939578411027492e-05, + "loss": 2.1391, + "step": 8999 + }, + { + "epoch": 1.83, + "learning_rate": 1.093785870228163e-05, + "loss": 2.1248, + "step": 9000 + }, + { + "epoch": 1.83, + "learning_rate": 1.093613896555297e-05, + "loss": 2.1906, + "step": 9001 + }, + { + "epoch": 1.83, + "learning_rate": 1.0934419200892824e-05, + "loss": 2.165, + "step": 9002 + }, + { + "epoch": 1.83, + "learning_rate": 1.0932699408352504e-05, + "loss": 2.1204, + "step": 9003 + }, + { + "epoch": 1.83, + "learning_rate": 1.0930979587983316e-05, + "loss": 2.0951, + "step": 9004 + }, + { + "epoch": 1.83, + "learning_rate": 1.092925973983659e-05, + "loss": 2.1936, + "step": 9005 + }, + { + "epoch": 1.83, + "learning_rate": 1.092753986396363e-05, + "loss": 2.0741, + "step": 9006 + }, + { + "epoch": 1.83, + "learning_rate": 1.0925819960415751e-05, + "loss": 2.1582, + "step": 9007 + }, + { + "epoch": 1.83, + "learning_rate": 1.0924100029244276e-05, + "loss": 2.0515, + "step": 9008 + }, + { + "epoch": 1.83, + "learning_rate": 1.0922380070500513e-05, + "loss": 2.1319, + "step": 9009 + }, + { + "epoch": 1.83, + "learning_rate": 1.092066008423579e-05, + "loss": 2.0528, + "step": 9010 + }, + { + "epoch": 1.83, + "learning_rate": 1.0918940070501423e-05, + "loss": 2.0939, + "step": 9011 + }, + { + "epoch": 1.83, + "learning_rate": 1.091722002934873e-05, + "loss": 2.1343, + "step": 9012 + }, + { + "epoch": 1.83, + "learning_rate": 1.091549996082903e-05, + "loss": 2.0724, + "step": 9013 + }, + { + "epoch": 1.83, + "learning_rate": 1.0913779864993647e-05, + "loss": 2.1433, + "step": 9014 + }, + { + "epoch": 1.83, + "learning_rate": 1.0912059741893908e-05, + "loss": 2.0996, + "step": 9015 + }, + { + "epoch": 1.83, + "learning_rate": 1.0910339591581129e-05, + "loss": 2.0698, + "step": 9016 + }, + { + "epoch": 1.83, + "learning_rate": 1.0908619414106637e-05, + "loss": 2.1702, + "step": 9017 + }, + { + "epoch": 1.83, + "learning_rate": 1.0906899209521758e-05, + "loss": 2.0518, + "step": 9018 + }, + { + "epoch": 1.83, + "learning_rate": 1.0905178977877813e-05, + "loss": 2.1944, + "step": 9019 + }, + { + "epoch": 1.83, + "learning_rate": 1.090345871922613e-05, + "loss": 2.1486, + "step": 9020 + }, + { + "epoch": 1.83, + "learning_rate": 1.0901738433618041e-05, + "loss": 2.1422, + "step": 9021 + }, + { + "epoch": 1.83, + "learning_rate": 1.0900018121104871e-05, + "loss": 2.165, + "step": 9022 + }, + { + "epoch": 1.83, + "learning_rate": 1.0898297781737948e-05, + "loss": 2.1191, + "step": 9023 + }, + { + "epoch": 1.83, + "learning_rate": 1.0896577415568596e-05, + "loss": 2.2097, + "step": 9024 + }, + { + "epoch": 1.83, + "learning_rate": 1.0894857022648157e-05, + "loss": 2.0897, + "step": 9025 + }, + { + "epoch": 1.83, + "learning_rate": 1.0893136603027957e-05, + "loss": 2.1694, + "step": 9026 + }, + { + "epoch": 1.83, + "learning_rate": 1.0891416156759326e-05, + "loss": 2.1562, + "step": 9027 + }, + { + "epoch": 1.83, + "learning_rate": 1.08896956838936e-05, + "loss": 2.0827, + "step": 9028 + }, + { + "epoch": 1.83, + "learning_rate": 1.0887975184482105e-05, + "loss": 2.0805, + "step": 9029 + }, + { + "epoch": 1.83, + "learning_rate": 1.0886254658576186e-05, + "loss": 2.1089, + "step": 9030 + }, + { + "epoch": 1.83, + "learning_rate": 1.0884534106227173e-05, + "loss": 2.1276, + "step": 9031 + }, + { + "epoch": 1.83, + "learning_rate": 1.0882813527486404e-05, + "loss": 2.1352, + "step": 9032 + }, + { + "epoch": 1.83, + "learning_rate": 1.088109292240521e-05, + "loss": 2.1328, + "step": 9033 + }, + { + "epoch": 1.83, + "learning_rate": 1.0879372291034933e-05, + "loss": 2.1364, + "step": 9034 + }, + { + "epoch": 1.83, + "learning_rate": 1.0877651633426913e-05, + "loss": 2.1694, + "step": 9035 + }, + { + "epoch": 1.83, + "learning_rate": 1.0875930949632487e-05, + "loss": 2.1618, + "step": 9036 + }, + { + "epoch": 1.83, + "learning_rate": 1.0874210239702993e-05, + "loss": 2.1427, + "step": 9037 + }, + { + "epoch": 1.83, + "learning_rate": 1.0872489503689776e-05, + "loss": 2.1368, + "step": 9038 + }, + { + "epoch": 1.83, + "learning_rate": 1.0870768741644173e-05, + "loss": 2.1044, + "step": 9039 + }, + { + "epoch": 1.83, + "learning_rate": 1.0869047953617531e-05, + "loss": 2.1345, + "step": 9040 + }, + { + "epoch": 1.84, + "learning_rate": 1.0867327139661185e-05, + "loss": 2.1227, + "step": 9041 + }, + { + "epoch": 1.84, + "learning_rate": 1.0865606299826488e-05, + "loss": 2.162, + "step": 9042 + }, + { + "epoch": 1.84, + "learning_rate": 1.086388543416478e-05, + "loss": 2.0986, + "step": 9043 + }, + { + "epoch": 1.84, + "learning_rate": 1.0862164542727405e-05, + "loss": 2.0999, + "step": 9044 + }, + { + "epoch": 1.84, + "learning_rate": 1.0860443625565712e-05, + "loss": 2.0827, + "step": 9045 + }, + { + "epoch": 1.84, + "learning_rate": 1.0858722682731046e-05, + "loss": 2.1906, + "step": 9046 + }, + { + "epoch": 1.84, + "learning_rate": 1.0857001714274757e-05, + "loss": 2.1074, + "step": 9047 + }, + { + "epoch": 1.84, + "learning_rate": 1.0855280720248186e-05, + "loss": 2.1186, + "step": 9048 + }, + { + "epoch": 1.84, + "learning_rate": 1.0853559700702696e-05, + "loss": 2.1339, + "step": 9049 + }, + { + "epoch": 1.84, + "learning_rate": 1.0851838655689626e-05, + "loss": 2.096, + "step": 9050 + }, + { + "epoch": 1.84, + "learning_rate": 1.0850117585260327e-05, + "loss": 2.1671, + "step": 9051 + }, + { + "epoch": 1.84, + "learning_rate": 1.0848396489466156e-05, + "loss": 2.1343, + "step": 9052 + }, + { + "epoch": 1.84, + "learning_rate": 1.0846675368358456e-05, + "loss": 2.1663, + "step": 9053 + }, + { + "epoch": 1.84, + "learning_rate": 1.084495422198859e-05, + "loss": 2.0567, + "step": 9054 + }, + { + "epoch": 1.84, + "learning_rate": 1.0843233050407908e-05, + "loss": 2.1017, + "step": 9055 + }, + { + "epoch": 1.84, + "learning_rate": 1.0841511853667763e-05, + "loss": 2.1395, + "step": 9056 + }, + { + "epoch": 1.84, + "learning_rate": 1.083979063181951e-05, + "loss": 2.0298, + "step": 9057 + }, + { + "epoch": 1.84, + "learning_rate": 1.0838069384914506e-05, + "loss": 2.1087, + "step": 9058 + }, + { + "epoch": 1.84, + "learning_rate": 1.083634811300411e-05, + "loss": 2.1245, + "step": 9059 + }, + { + "epoch": 1.84, + "learning_rate": 1.0834626816139678e-05, + "loss": 2.2202, + "step": 9060 + }, + { + "epoch": 1.84, + "learning_rate": 1.0832905494372561e-05, + "loss": 2.1465, + "step": 9061 + }, + { + "epoch": 1.84, + "learning_rate": 1.083118414775413e-05, + "loss": 2.011, + "step": 9062 + }, + { + "epoch": 1.84, + "learning_rate": 1.0829462776335735e-05, + "loss": 2.1277, + "step": 9063 + }, + { + "epoch": 1.84, + "learning_rate": 1.0827741380168745e-05, + "loss": 2.0903, + "step": 9064 + }, + { + "epoch": 1.84, + "learning_rate": 1.082601995930451e-05, + "loss": 2.0681, + "step": 9065 + }, + { + "epoch": 1.84, + "learning_rate": 1.0824298513794402e-05, + "loss": 1.9992, + "step": 9066 + }, + { + "epoch": 1.84, + "learning_rate": 1.082257704368978e-05, + "loss": 2.2066, + "step": 9067 + }, + { + "epoch": 1.84, + "learning_rate": 1.0820855549042005e-05, + "loss": 2.1171, + "step": 9068 + }, + { + "epoch": 1.84, + "learning_rate": 1.0819134029902446e-05, + "loss": 2.1452, + "step": 9069 + }, + { + "epoch": 1.84, + "learning_rate": 1.0817412486322464e-05, + "loss": 2.1071, + "step": 9070 + }, + { + "epoch": 1.84, + "learning_rate": 1.0815690918353428e-05, + "loss": 2.0891, + "step": 9071 + }, + { + "epoch": 1.84, + "learning_rate": 1.0813969326046699e-05, + "loss": 2.1571, + "step": 9072 + }, + { + "epoch": 1.84, + "learning_rate": 1.0812247709453643e-05, + "loss": 2.1759, + "step": 9073 + }, + { + "epoch": 1.84, + "learning_rate": 1.0810526068625637e-05, + "loss": 2.135, + "step": 9074 + }, + { + "epoch": 1.84, + "learning_rate": 1.0808804403614044e-05, + "loss": 2.1826, + "step": 9075 + }, + { + "epoch": 1.84, + "learning_rate": 1.0807082714470232e-05, + "loss": 2.2076, + "step": 9076 + }, + { + "epoch": 1.84, + "learning_rate": 1.0805361001245569e-05, + "loss": 2.1365, + "step": 9077 + }, + { + "epoch": 1.84, + "learning_rate": 1.080363926399143e-05, + "loss": 2.0913, + "step": 9078 + }, + { + "epoch": 1.84, + "learning_rate": 1.0801917502759186e-05, + "loss": 2.1341, + "step": 9079 + }, + { + "epoch": 1.84, + "learning_rate": 1.0800195717600211e-05, + "loss": 2.1582, + "step": 9080 + }, + { + "epoch": 1.84, + "learning_rate": 1.0798473908565867e-05, + "loss": 2.1055, + "step": 9081 + }, + { + "epoch": 1.84, + "learning_rate": 1.0796752075707542e-05, + "loss": 2.0963, + "step": 9082 + }, + { + "epoch": 1.84, + "learning_rate": 1.07950302190766e-05, + "loss": 2.221, + "step": 9083 + }, + { + "epoch": 1.84, + "learning_rate": 1.0793308338724419e-05, + "loss": 2.1669, + "step": 9084 + }, + { + "epoch": 1.84, + "learning_rate": 1.0791586434702372e-05, + "loss": 2.1275, + "step": 9085 + }, + { + "epoch": 1.84, + "learning_rate": 1.0789864507061845e-05, + "loss": 2.0908, + "step": 9086 + }, + { + "epoch": 1.84, + "learning_rate": 1.0788142555854203e-05, + "loss": 2.1881, + "step": 9087 + }, + { + "epoch": 1.84, + "learning_rate": 1.0786420581130828e-05, + "loss": 2.0445, + "step": 9088 + }, + { + "epoch": 1.84, + "learning_rate": 1.07846985829431e-05, + "loss": 2.1089, + "step": 9089 + }, + { + "epoch": 1.85, + "learning_rate": 1.0782976561342398e-05, + "loss": 2.1097, + "step": 9090 + }, + { + "epoch": 1.85, + "learning_rate": 1.0781254516380101e-05, + "loss": 2.1422, + "step": 9091 + }, + { + "epoch": 1.85, + "learning_rate": 1.077953244810759e-05, + "loss": 2.1294, + "step": 9092 + }, + { + "epoch": 1.85, + "learning_rate": 1.0777810356576242e-05, + "loss": 2.2276, + "step": 9093 + }, + { + "epoch": 1.85, + "learning_rate": 1.0776088241837443e-05, + "loss": 2.0856, + "step": 9094 + }, + { + "epoch": 1.85, + "learning_rate": 1.077436610394258e-05, + "loss": 2.1864, + "step": 9095 + }, + { + "epoch": 1.85, + "learning_rate": 1.0772643942943028e-05, + "loss": 2.1102, + "step": 9096 + }, + { + "epoch": 1.85, + "learning_rate": 1.0770921758890177e-05, + "loss": 2.185, + "step": 9097 + }, + { + "epoch": 1.85, + "learning_rate": 1.0769199551835401e-05, + "loss": 2.171, + "step": 9098 + }, + { + "epoch": 1.85, + "learning_rate": 1.0767477321830097e-05, + "loss": 1.9937, + "step": 9099 + }, + { + "epoch": 1.85, + "learning_rate": 1.0765755068925649e-05, + "loss": 2.1191, + "step": 9100 + }, + { + "epoch": 1.85, + "learning_rate": 1.0764032793173442e-05, + "loss": 2.0577, + "step": 9101 + }, + { + "epoch": 1.85, + "learning_rate": 1.0762310494624863e-05, + "loss": 2.0554, + "step": 9102 + }, + { + "epoch": 1.85, + "learning_rate": 1.0760588173331298e-05, + "loss": 2.0945, + "step": 9103 + }, + { + "epoch": 1.85, + "learning_rate": 1.075886582934414e-05, + "loss": 2.114, + "step": 9104 + }, + { + "epoch": 1.85, + "learning_rate": 1.0757143462714777e-05, + "loss": 2.0811, + "step": 9105 + }, + { + "epoch": 1.85, + "learning_rate": 1.0755421073494597e-05, + "loss": 2.0907, + "step": 9106 + }, + { + "epoch": 1.85, + "learning_rate": 1.0753698661734993e-05, + "loss": 2.1362, + "step": 9107 + }, + { + "epoch": 1.85, + "learning_rate": 1.0751976227487355e-05, + "loss": 2.1508, + "step": 9108 + }, + { + "epoch": 1.85, + "learning_rate": 1.0750253770803075e-05, + "loss": 2.1523, + "step": 9109 + }, + { + "epoch": 1.85, + "learning_rate": 1.0748531291733548e-05, + "loss": 2.0523, + "step": 9110 + }, + { + "epoch": 1.85, + "learning_rate": 1.0746808790330168e-05, + "loss": 2.128, + "step": 9111 + }, + { + "epoch": 1.85, + "learning_rate": 1.0745086266644325e-05, + "loss": 2.1152, + "step": 9112 + }, + { + "epoch": 1.85, + "learning_rate": 1.0743363720727414e-05, + "loss": 2.1374, + "step": 9113 + }, + { + "epoch": 1.85, + "learning_rate": 1.0741641152630832e-05, + "loss": 2.1599, + "step": 9114 + }, + { + "epoch": 1.85, + "learning_rate": 1.073991856240598e-05, + "loss": 2.0847, + "step": 9115 + }, + { + "epoch": 1.85, + "learning_rate": 1.073819595010425e-05, + "loss": 2.1555, + "step": 9116 + }, + { + "epoch": 1.85, + "learning_rate": 1.0736473315777038e-05, + "loss": 2.1791, + "step": 9117 + }, + { + "epoch": 1.85, + "learning_rate": 1.0734750659475739e-05, + "loss": 2.1486, + "step": 9118 + }, + { + "epoch": 1.85, + "learning_rate": 1.0733027981251763e-05, + "loss": 2.1537, + "step": 9119 + }, + { + "epoch": 1.85, + "learning_rate": 1.0731305281156499e-05, + "loss": 2.0728, + "step": 9120 + }, + { + "epoch": 1.85, + "learning_rate": 1.0729582559241354e-05, + "loss": 2.1731, + "step": 9121 + }, + { + "epoch": 1.85, + "learning_rate": 1.0727859815557722e-05, + "loss": 2.1522, + "step": 9122 + }, + { + "epoch": 1.85, + "learning_rate": 1.0726137050157009e-05, + "loss": 2.0908, + "step": 9123 + }, + { + "epoch": 1.85, + "learning_rate": 1.0724414263090614e-05, + "loss": 2.2238, + "step": 9124 + }, + { + "epoch": 1.85, + "learning_rate": 1.0722691454409944e-05, + "loss": 2.0514, + "step": 9125 + }, + { + "epoch": 1.85, + "learning_rate": 1.07209686241664e-05, + "loss": 2.0981, + "step": 9126 + }, + { + "epoch": 1.85, + "learning_rate": 1.0719245772411386e-05, + "loss": 2.1954, + "step": 9127 + }, + { + "epoch": 1.85, + "learning_rate": 1.0717522899196305e-05, + "loss": 2.1546, + "step": 9128 + }, + { + "epoch": 1.85, + "learning_rate": 1.071580000457256e-05, + "loss": 2.1322, + "step": 9129 + }, + { + "epoch": 1.85, + "learning_rate": 1.0714077088591566e-05, + "loss": 2.091, + "step": 9130 + }, + { + "epoch": 1.85, + "learning_rate": 1.0712354151304723e-05, + "loss": 2.1374, + "step": 9131 + }, + { + "epoch": 1.85, + "learning_rate": 1.0710631192763437e-05, + "loss": 2.0727, + "step": 9132 + }, + { + "epoch": 1.85, + "learning_rate": 1.0708908213019113e-05, + "loss": 2.0696, + "step": 9133 + }, + { + "epoch": 1.85, + "learning_rate": 1.070718521212317e-05, + "loss": 2.1477, + "step": 9134 + }, + { + "epoch": 1.85, + "learning_rate": 1.0705462190127011e-05, + "loss": 2.1251, + "step": 9135 + }, + { + "epoch": 1.85, + "learning_rate": 1.0703739147082046e-05, + "loss": 2.1959, + "step": 9136 + }, + { + "epoch": 1.85, + "learning_rate": 1.0702016083039679e-05, + "loss": 2.1913, + "step": 9137 + }, + { + "epoch": 1.85, + "learning_rate": 1.0700292998051332e-05, + "loss": 2.1171, + "step": 9138 + }, + { + "epoch": 1.86, + "learning_rate": 1.069856989216841e-05, + "loss": 2.1085, + "step": 9139 + }, + { + "epoch": 1.86, + "learning_rate": 1.0696846765442327e-05, + "loss": 2.0924, + "step": 9140 + }, + { + "epoch": 1.86, + "learning_rate": 1.0695123617924497e-05, + "loss": 2.1294, + "step": 9141 + }, + { + "epoch": 1.86, + "learning_rate": 1.0693400449666325e-05, + "loss": 2.1482, + "step": 9142 + }, + { + "epoch": 1.86, + "learning_rate": 1.0691677260719238e-05, + "loss": 2.1276, + "step": 9143 + }, + { + "epoch": 1.86, + "learning_rate": 1.068995405113464e-05, + "loss": 2.1323, + "step": 9144 + }, + { + "epoch": 1.86, + "learning_rate": 1.0688230820963956e-05, + "loss": 2.1302, + "step": 9145 + }, + { + "epoch": 1.86, + "learning_rate": 1.068650757025859e-05, + "loss": 2.1147, + "step": 9146 + }, + { + "epoch": 1.86, + "learning_rate": 1.0684784299069965e-05, + "loss": 2.1037, + "step": 9147 + }, + { + "epoch": 1.86, + "learning_rate": 1.0683061007449502e-05, + "loss": 2.1457, + "step": 9148 + }, + { + "epoch": 1.86, + "learning_rate": 1.0681337695448612e-05, + "loss": 2.0869, + "step": 9149 + }, + { + "epoch": 1.86, + "learning_rate": 1.0679614363118718e-05, + "loss": 2.1369, + "step": 9150 + }, + { + "epoch": 1.86, + "learning_rate": 1.0677891010511234e-05, + "loss": 2.1777, + "step": 9151 + }, + { + "epoch": 1.86, + "learning_rate": 1.0676167637677583e-05, + "loss": 2.1258, + "step": 9152 + }, + { + "epoch": 1.86, + "learning_rate": 1.0674444244669185e-05, + "loss": 2.1869, + "step": 9153 + }, + { + "epoch": 1.86, + "learning_rate": 1.067272083153746e-05, + "loss": 2.1525, + "step": 9154 + }, + { + "epoch": 1.86, + "learning_rate": 1.0670997398333828e-05, + "loss": 2.1313, + "step": 9155 + }, + { + "epoch": 1.86, + "learning_rate": 1.0669273945109716e-05, + "loss": 2.0906, + "step": 9156 + }, + { + "epoch": 1.86, + "learning_rate": 1.0667550471916535e-05, + "loss": 2.0872, + "step": 9157 + }, + { + "epoch": 1.86, + "learning_rate": 1.0665826978805722e-05, + "loss": 2.1608, + "step": 9158 + }, + { + "epoch": 1.86, + "learning_rate": 1.0664103465828695e-05, + "loss": 2.1781, + "step": 9159 + }, + { + "epoch": 1.86, + "learning_rate": 1.0662379933036875e-05, + "loss": 2.0692, + "step": 9160 + }, + { + "epoch": 1.86, + "learning_rate": 1.0660656380481692e-05, + "loss": 2.1593, + "step": 9161 + }, + { + "epoch": 1.86, + "learning_rate": 1.0658932808214564e-05, + "loss": 2.1264, + "step": 9162 + }, + { + "epoch": 1.86, + "learning_rate": 1.0657209216286927e-05, + "loss": 2.1346, + "step": 9163 + }, + { + "epoch": 1.86, + "learning_rate": 1.0655485604750202e-05, + "loss": 2.1418, + "step": 9164 + }, + { + "epoch": 1.86, + "learning_rate": 1.0653761973655819e-05, + "loss": 2.1352, + "step": 9165 + }, + { + "epoch": 1.86, + "learning_rate": 1.0652038323055201e-05, + "loss": 2.1981, + "step": 9166 + }, + { + "epoch": 1.86, + "learning_rate": 1.0650314652999778e-05, + "loss": 2.0988, + "step": 9167 + }, + { + "epoch": 1.86, + "learning_rate": 1.0648590963540983e-05, + "loss": 2.1097, + "step": 9168 + }, + { + "epoch": 1.86, + "learning_rate": 1.0646867254730242e-05, + "loss": 2.0409, + "step": 9169 + }, + { + "epoch": 1.86, + "learning_rate": 1.0645143526618984e-05, + "loss": 2.1592, + "step": 9170 + }, + { + "epoch": 1.86, + "learning_rate": 1.0643419779258645e-05, + "loss": 2.1172, + "step": 9171 + }, + { + "epoch": 1.86, + "learning_rate": 1.064169601270065e-05, + "loss": 2.1519, + "step": 9172 + }, + { + "epoch": 1.86, + "learning_rate": 1.0639972226996436e-05, + "loss": 2.1182, + "step": 9173 + }, + { + "epoch": 1.86, + "learning_rate": 1.0638248422197432e-05, + "loss": 2.1099, + "step": 9174 + }, + { + "epoch": 1.86, + "learning_rate": 1.0636524598355073e-05, + "loss": 2.1912, + "step": 9175 + }, + { + "epoch": 1.86, + "learning_rate": 1.0634800755520794e-05, + "loss": 2.1968, + "step": 9176 + }, + { + "epoch": 1.86, + "learning_rate": 1.0633076893746022e-05, + "loss": 2.1134, + "step": 9177 + }, + { + "epoch": 1.86, + "learning_rate": 1.06313530130822e-05, + "loss": 2.1129, + "step": 9178 + }, + { + "epoch": 1.86, + "learning_rate": 1.0629629113580759e-05, + "loss": 2.1057, + "step": 9179 + }, + { + "epoch": 1.86, + "learning_rate": 1.0627905195293135e-05, + "loss": 2.0725, + "step": 9180 + }, + { + "epoch": 1.86, + "learning_rate": 1.0626181258270767e-05, + "loss": 2.1912, + "step": 9181 + }, + { + "epoch": 1.86, + "learning_rate": 1.0624457302565085e-05, + "loss": 2.0566, + "step": 9182 + }, + { + "epoch": 1.86, + "learning_rate": 1.0622733328227536e-05, + "loss": 2.123, + "step": 9183 + }, + { + "epoch": 1.86, + "learning_rate": 1.0621009335309556e-05, + "loss": 2.1862, + "step": 9184 + }, + { + "epoch": 1.86, + "learning_rate": 1.0619285323862579e-05, + "loss": 2.1406, + "step": 9185 + }, + { + "epoch": 1.86, + "learning_rate": 1.061756129393804e-05, + "loss": 2.1641, + "step": 9186 + }, + { + "epoch": 1.86, + "learning_rate": 1.061583724558739e-05, + "loss": 2.0954, + "step": 9187 + }, + { + "epoch": 1.87, + "learning_rate": 1.0614113178862067e-05, + "loss": 2.1094, + "step": 9188 + }, + { + "epoch": 1.87, + "learning_rate": 1.0612389093813507e-05, + "loss": 2.1375, + "step": 9189 + }, + { + "epoch": 1.87, + "learning_rate": 1.0610664990493152e-05, + "loss": 2.1674, + "step": 9190 + }, + { + "epoch": 1.87, + "learning_rate": 1.0608940868952446e-05, + "loss": 2.1534, + "step": 9191 + }, + { + "epoch": 1.87, + "learning_rate": 1.060721672924283e-05, + "loss": 2.2007, + "step": 9192 + }, + { + "epoch": 1.87, + "learning_rate": 1.0605492571415751e-05, + "loss": 2.1464, + "step": 9193 + }, + { + "epoch": 1.87, + "learning_rate": 1.0603768395522647e-05, + "loss": 2.1546, + "step": 9194 + }, + { + "epoch": 1.87, + "learning_rate": 1.0602044201614965e-05, + "loss": 2.0852, + "step": 9195 + }, + { + "epoch": 1.87, + "learning_rate": 1.060031998974415e-05, + "loss": 2.121, + "step": 9196 + }, + { + "epoch": 1.87, + "learning_rate": 1.059859575996164e-05, + "loss": 2.2112, + "step": 9197 + }, + { + "epoch": 1.87, + "learning_rate": 1.0596871512318894e-05, + "loss": 2.0769, + "step": 9198 + }, + { + "epoch": 1.87, + "learning_rate": 1.059514724686735e-05, + "loss": 2.0751, + "step": 9199 + }, + { + "epoch": 1.87, + "learning_rate": 1.0593422963658455e-05, + "loss": 2.1303, + "step": 9200 + }, + { + "epoch": 1.87, + "learning_rate": 1.0591698662743657e-05, + "loss": 2.1771, + "step": 9201 + }, + { + "epoch": 1.87, + "learning_rate": 1.05899743441744e-05, + "loss": 2.0478, + "step": 9202 + }, + { + "epoch": 1.87, + "learning_rate": 1.058825000800214e-05, + "loss": 2.0256, + "step": 9203 + }, + { + "epoch": 1.87, + "learning_rate": 1.0586525654278323e-05, + "loss": 2.1113, + "step": 9204 + }, + { + "epoch": 1.87, + "learning_rate": 1.0584801283054398e-05, + "loss": 2.1309, + "step": 9205 + }, + { + "epoch": 1.87, + "learning_rate": 1.058307689438181e-05, + "loss": 2.0593, + "step": 9206 + }, + { + "epoch": 1.87, + "learning_rate": 1.0581352488312016e-05, + "loss": 2.0754, + "step": 9207 + }, + { + "epoch": 1.87, + "learning_rate": 1.0579628064896464e-05, + "loss": 2.1589, + "step": 9208 + }, + { + "epoch": 1.87, + "learning_rate": 1.0577903624186608e-05, + "loss": 2.1302, + "step": 9209 + }, + { + "epoch": 1.87, + "learning_rate": 1.0576179166233895e-05, + "loss": 2.1495, + "step": 9210 + }, + { + "epoch": 1.87, + "learning_rate": 1.0574454691089783e-05, + "loss": 2.1328, + "step": 9211 + }, + { + "epoch": 1.87, + "learning_rate": 1.057273019880572e-05, + "loss": 2.1793, + "step": 9212 + }, + { + "epoch": 1.87, + "learning_rate": 1.0571005689433165e-05, + "loss": 2.1062, + "step": 9213 + }, + { + "epoch": 1.87, + "learning_rate": 1.0569281163023566e-05, + "loss": 2.1147, + "step": 9214 + }, + { + "epoch": 1.87, + "learning_rate": 1.0567556619628384e-05, + "loss": 2.1294, + "step": 9215 + }, + { + "epoch": 1.87, + "learning_rate": 1.056583205929907e-05, + "loss": 2.1276, + "step": 9216 + }, + { + "epoch": 1.87, + "learning_rate": 1.0564107482087078e-05, + "loss": 2.1602, + "step": 9217 + }, + { + "epoch": 1.87, + "learning_rate": 1.0562382888043865e-05, + "loss": 2.1606, + "step": 9218 + }, + { + "epoch": 1.87, + "learning_rate": 1.0560658277220892e-05, + "loss": 2.1218, + "step": 9219 + }, + { + "epoch": 1.87, + "learning_rate": 1.0558933649669614e-05, + "loss": 2.1533, + "step": 9220 + }, + { + "epoch": 1.87, + "learning_rate": 1.0557209005441486e-05, + "loss": 2.1082, + "step": 9221 + }, + { + "epoch": 1.87, + "learning_rate": 1.0555484344587964e-05, + "loss": 2.117, + "step": 9222 + }, + { + "epoch": 1.87, + "learning_rate": 1.0553759667160512e-05, + "loss": 2.0639, + "step": 9223 + }, + { + "epoch": 1.87, + "learning_rate": 1.0552034973210588e-05, + "loss": 2.1054, + "step": 9224 + }, + { + "epoch": 1.87, + "learning_rate": 1.0550310262789651e-05, + "loss": 2.1477, + "step": 9225 + }, + { + "epoch": 1.87, + "learning_rate": 1.0548585535949157e-05, + "loss": 2.1602, + "step": 9226 + }, + { + "epoch": 1.87, + "learning_rate": 1.0546860792740573e-05, + "loss": 2.1253, + "step": 9227 + }, + { + "epoch": 1.87, + "learning_rate": 1.0545136033215355e-05, + "loss": 2.1593, + "step": 9228 + }, + { + "epoch": 1.87, + "learning_rate": 1.054341125742497e-05, + "loss": 2.1015, + "step": 9229 + }, + { + "epoch": 1.87, + "learning_rate": 1.0541686465420873e-05, + "loss": 2.1503, + "step": 9230 + }, + { + "epoch": 1.87, + "learning_rate": 1.053996165725453e-05, + "loss": 2.1287, + "step": 9231 + }, + { + "epoch": 1.87, + "learning_rate": 1.0538236832977405e-05, + "loss": 2.1208, + "step": 9232 + }, + { + "epoch": 1.87, + "learning_rate": 1.053651199264096e-05, + "loss": 2.1382, + "step": 9233 + }, + { + "epoch": 1.87, + "learning_rate": 1.0534787136296658e-05, + "loss": 2.1113, + "step": 9234 + }, + { + "epoch": 1.87, + "learning_rate": 1.0533062263995968e-05, + "loss": 2.1543, + "step": 9235 + }, + { + "epoch": 1.87, + "learning_rate": 1.0531337375790346e-05, + "loss": 2.1564, + "step": 9236 + }, + { + "epoch": 1.87, + "learning_rate": 1.0529612471731267e-05, + "loss": 2.0803, + "step": 9237 + }, + { + "epoch": 1.88, + "learning_rate": 1.052788755187019e-05, + "loss": 2.0894, + "step": 9238 + }, + { + "epoch": 1.88, + "learning_rate": 1.0526162616258586e-05, + "loss": 2.1388, + "step": 9239 + }, + { + "epoch": 1.88, + "learning_rate": 1.052443766494792e-05, + "loss": 2.1499, + "step": 9240 + }, + { + "epoch": 1.88, + "learning_rate": 1.0522712697989655e-05, + "loss": 2.2078, + "step": 9241 + }, + { + "epoch": 1.88, + "learning_rate": 1.0520987715435262e-05, + "loss": 2.087, + "step": 9242 + }, + { + "epoch": 1.88, + "learning_rate": 1.0519262717336213e-05, + "loss": 2.1014, + "step": 9243 + }, + { + "epoch": 1.88, + "learning_rate": 1.0517537703743973e-05, + "loss": 2.1611, + "step": 9244 + }, + { + "epoch": 1.88, + "learning_rate": 1.0515812674710011e-05, + "loss": 2.121, + "step": 9245 + }, + { + "epoch": 1.88, + "learning_rate": 1.0514087630285792e-05, + "loss": 2.168, + "step": 9246 + }, + { + "epoch": 1.88, + "learning_rate": 1.0512362570522796e-05, + "loss": 2.1114, + "step": 9247 + }, + { + "epoch": 1.88, + "learning_rate": 1.0510637495472487e-05, + "loss": 2.0769, + "step": 9248 + }, + { + "epoch": 1.88, + "learning_rate": 1.0508912405186335e-05, + "loss": 2.0789, + "step": 9249 + }, + { + "epoch": 1.88, + "learning_rate": 1.0507187299715818e-05, + "loss": 2.1632, + "step": 9250 + }, + { + "epoch": 1.88, + "learning_rate": 1.0505462179112395e-05, + "loss": 2.1534, + "step": 9251 + }, + { + "epoch": 1.88, + "learning_rate": 1.050373704342755e-05, + "loss": 2.1788, + "step": 9252 + }, + { + "epoch": 1.88, + "learning_rate": 1.0502011892712755e-05, + "loss": 2.1677, + "step": 9253 + }, + { + "epoch": 1.88, + "learning_rate": 1.0500286727019478e-05, + "loss": 2.1192, + "step": 9254 + }, + { + "epoch": 1.88, + "learning_rate": 1.0498561546399195e-05, + "loss": 2.197, + "step": 9255 + }, + { + "epoch": 1.88, + "learning_rate": 1.049683635090338e-05, + "loss": 2.1097, + "step": 9256 + }, + { + "epoch": 1.88, + "learning_rate": 1.0495111140583507e-05, + "loss": 2.1592, + "step": 9257 + }, + { + "epoch": 1.88, + "learning_rate": 1.049338591549105e-05, + "loss": 2.1436, + "step": 9258 + }, + { + "epoch": 1.88, + "learning_rate": 1.0491660675677488e-05, + "loss": 2.1753, + "step": 9259 + }, + { + "epoch": 1.88, + "learning_rate": 1.0489935421194296e-05, + "loss": 2.1579, + "step": 9260 + }, + { + "epoch": 1.88, + "learning_rate": 1.0488210152092944e-05, + "loss": 2.1487, + "step": 9261 + }, + { + "epoch": 1.88, + "learning_rate": 1.0486484868424913e-05, + "loss": 2.1618, + "step": 9262 + }, + { + "epoch": 1.88, + "learning_rate": 1.0484759570241685e-05, + "loss": 2.1654, + "step": 9263 + }, + { + "epoch": 1.88, + "learning_rate": 1.0483034257594733e-05, + "loss": 2.0865, + "step": 9264 + }, + { + "epoch": 1.88, + "learning_rate": 1.0481308930535535e-05, + "loss": 2.0616, + "step": 9265 + }, + { + "epoch": 1.88, + "learning_rate": 1.0479583589115562e-05, + "loss": 2.1328, + "step": 9266 + }, + { + "epoch": 1.88, + "learning_rate": 1.0477858233386309e-05, + "loss": 2.1445, + "step": 9267 + }, + { + "epoch": 1.88, + "learning_rate": 1.0476132863399244e-05, + "loss": 2.0822, + "step": 9268 + }, + { + "epoch": 1.88, + "learning_rate": 1.047440747920585e-05, + "loss": 2.0202, + "step": 9269 + }, + { + "epoch": 1.88, + "learning_rate": 1.0472682080857607e-05, + "loss": 2.1938, + "step": 9270 + }, + { + "epoch": 1.88, + "learning_rate": 1.0470956668405989e-05, + "loss": 2.143, + "step": 9271 + }, + { + "epoch": 1.88, + "learning_rate": 1.0469231241902489e-05, + "loss": 2.1141, + "step": 9272 + }, + { + "epoch": 1.88, + "learning_rate": 1.0467505801398583e-05, + "loss": 2.1682, + "step": 9273 + }, + { + "epoch": 1.88, + "learning_rate": 1.0465780346945752e-05, + "loss": 2.1551, + "step": 9274 + }, + { + "epoch": 1.88, + "learning_rate": 1.0464054878595473e-05, + "loss": 2.1672, + "step": 9275 + }, + { + "epoch": 1.88, + "learning_rate": 1.0462329396399236e-05, + "loss": 2.1359, + "step": 9276 + }, + { + "epoch": 1.88, + "learning_rate": 1.0460603900408523e-05, + "loss": 2.0828, + "step": 9277 + }, + { + "epoch": 1.88, + "learning_rate": 1.0458878390674816e-05, + "loss": 2.1074, + "step": 9278 + }, + { + "epoch": 1.88, + "learning_rate": 1.04571528672496e-05, + "loss": 2.1545, + "step": 9279 + }, + { + "epoch": 1.88, + "learning_rate": 1.045542733018436e-05, + "loss": 2.1711, + "step": 9280 + }, + { + "epoch": 1.88, + "learning_rate": 1.0453701779530577e-05, + "loss": 2.1411, + "step": 9281 + }, + { + "epoch": 1.88, + "learning_rate": 1.0451976215339736e-05, + "loss": 2.1149, + "step": 9282 + }, + { + "epoch": 1.88, + "learning_rate": 1.045025063766333e-05, + "loss": 2.0793, + "step": 9283 + }, + { + "epoch": 1.88, + "learning_rate": 1.0448525046552838e-05, + "loss": 2.033, + "step": 9284 + }, + { + "epoch": 1.88, + "learning_rate": 1.044679944205975e-05, + "loss": 2.1746, + "step": 9285 + }, + { + "epoch": 1.88, + "learning_rate": 1.0445073824235544e-05, + "loss": 2.1224, + "step": 9286 + }, + { + "epoch": 1.89, + "learning_rate": 1.044334819313172e-05, + "loss": 2.1749, + "step": 9287 + }, + { + "epoch": 1.89, + "learning_rate": 1.0441622548799757e-05, + "loss": 2.1839, + "step": 9288 + }, + { + "epoch": 1.89, + "learning_rate": 1.0439896891291146e-05, + "loss": 2.1576, + "step": 9289 + }, + { + "epoch": 1.89, + "learning_rate": 1.0438171220657375e-05, + "loss": 2.0697, + "step": 9290 + }, + { + "epoch": 1.89, + "learning_rate": 1.0436445536949929e-05, + "loss": 2.1871, + "step": 9291 + }, + { + "epoch": 1.89, + "learning_rate": 1.0434719840220302e-05, + "loss": 2.1358, + "step": 9292 + }, + { + "epoch": 1.89, + "learning_rate": 1.0432994130519985e-05, + "loss": 2.174, + "step": 9293 + }, + { + "epoch": 1.89, + "learning_rate": 1.0431268407900461e-05, + "loss": 2.1751, + "step": 9294 + }, + { + "epoch": 1.89, + "learning_rate": 1.0429542672413221e-05, + "loss": 2.1546, + "step": 9295 + }, + { + "epoch": 1.89, + "learning_rate": 1.0427816924109761e-05, + "loss": 2.1805, + "step": 9296 + }, + { + "epoch": 1.89, + "learning_rate": 1.0426091163041571e-05, + "loss": 2.111, + "step": 9297 + }, + { + "epoch": 1.89, + "learning_rate": 1.0424365389260141e-05, + "loss": 2.0599, + "step": 9298 + }, + { + "epoch": 1.89, + "learning_rate": 1.042263960281696e-05, + "loss": 2.1307, + "step": 9299 + }, + { + "epoch": 1.89, + "learning_rate": 1.0420913803763522e-05, + "loss": 2.1244, + "step": 9300 + }, + { + "epoch": 1.89, + "learning_rate": 1.041918799215132e-05, + "loss": 2.1578, + "step": 9301 + }, + { + "epoch": 1.89, + "learning_rate": 1.0417462168031847e-05, + "loss": 2.0701, + "step": 9302 + }, + { + "epoch": 1.89, + "learning_rate": 1.0415736331456599e-05, + "loss": 2.0898, + "step": 9303 + }, + { + "epoch": 1.89, + "learning_rate": 1.0414010482477064e-05, + "loss": 2.1335, + "step": 9304 + }, + { + "epoch": 1.89, + "learning_rate": 1.0412284621144742e-05, + "loss": 2.1108, + "step": 9305 + }, + { + "epoch": 1.89, + "learning_rate": 1.0410558747511118e-05, + "loss": 2.115, + "step": 9306 + }, + { + "epoch": 1.89, + "learning_rate": 1.0408832861627696e-05, + "loss": 2.1406, + "step": 9307 + }, + { + "epoch": 1.89, + "learning_rate": 1.040710696354597e-05, + "loss": 2.1157, + "step": 9308 + }, + { + "epoch": 1.89, + "learning_rate": 1.040538105331743e-05, + "loss": 2.065, + "step": 9309 + }, + { + "epoch": 1.89, + "learning_rate": 1.0403655130993578e-05, + "loss": 2.1255, + "step": 9310 + }, + { + "epoch": 1.89, + "learning_rate": 1.0401929196625904e-05, + "loss": 2.1726, + "step": 9311 + }, + { + "epoch": 1.89, + "learning_rate": 1.040020325026591e-05, + "loss": 2.1523, + "step": 9312 + }, + { + "epoch": 1.89, + "learning_rate": 1.039847729196509e-05, + "loss": 2.102, + "step": 9313 + }, + { + "epoch": 1.89, + "learning_rate": 1.0396751321774945e-05, + "loss": 2.158, + "step": 9314 + }, + { + "epoch": 1.89, + "learning_rate": 1.0395025339746965e-05, + "loss": 2.1401, + "step": 9315 + }, + { + "epoch": 1.89, + "learning_rate": 1.0393299345932653e-05, + "loss": 2.179, + "step": 9316 + }, + { + "epoch": 1.89, + "learning_rate": 1.039157334038351e-05, + "loss": 2.1227, + "step": 9317 + }, + { + "epoch": 1.89, + "learning_rate": 1.038984732315103e-05, + "loss": 2.1261, + "step": 9318 + }, + { + "epoch": 1.89, + "learning_rate": 1.0388121294286712e-05, + "loss": 2.1595, + "step": 9319 + }, + { + "epoch": 1.89, + "learning_rate": 1.0386395253842056e-05, + "loss": 2.0844, + "step": 9320 + }, + { + "epoch": 1.89, + "learning_rate": 1.0384669201868567e-05, + "loss": 2.1604, + "step": 9321 + }, + { + "epoch": 1.89, + "learning_rate": 1.0382943138417741e-05, + "loss": 2.2029, + "step": 9322 + }, + { + "epoch": 1.89, + "learning_rate": 1.0381217063541074e-05, + "loss": 2.047, + "step": 9323 + }, + { + "epoch": 1.89, + "learning_rate": 1.0379490977290072e-05, + "loss": 2.184, + "step": 9324 + }, + { + "epoch": 1.89, + "learning_rate": 1.0377764879716234e-05, + "loss": 2.1732, + "step": 9325 + }, + { + "epoch": 1.89, + "learning_rate": 1.0376038770871063e-05, + "loss": 2.1764, + "step": 9326 + }, + { + "epoch": 1.89, + "learning_rate": 1.037431265080606e-05, + "loss": 2.0733, + "step": 9327 + }, + { + "epoch": 1.89, + "learning_rate": 1.0372586519572727e-05, + "loss": 2.136, + "step": 9328 + }, + { + "epoch": 1.89, + "learning_rate": 1.0370860377222569e-05, + "loss": 2.1169, + "step": 9329 + }, + { + "epoch": 1.89, + "learning_rate": 1.036913422380708e-05, + "loss": 2.1376, + "step": 9330 + }, + { + "epoch": 1.89, + "learning_rate": 1.0367408059377776e-05, + "loss": 2.1654, + "step": 9331 + }, + { + "epoch": 1.89, + "learning_rate": 1.036568188398615e-05, + "loss": 2.0857, + "step": 9332 + }, + { + "epoch": 1.89, + "learning_rate": 1.036395569768371e-05, + "loss": 2.1515, + "step": 9333 + }, + { + "epoch": 1.89, + "learning_rate": 1.0362229500521962e-05, + "loss": 2.1515, + "step": 9334 + }, + { + "epoch": 1.89, + "learning_rate": 1.03605032925524e-05, + "loss": 2.0866, + "step": 9335 + }, + { + "epoch": 1.9, + "learning_rate": 1.0358777073826543e-05, + "loss": 2.169, + "step": 9336 + }, + { + "epoch": 1.9, + "learning_rate": 1.035705084439589e-05, + "loss": 2.1816, + "step": 9337 + }, + { + "epoch": 1.9, + "learning_rate": 1.0355324604311944e-05, + "loss": 2.1285, + "step": 9338 + }, + { + "epoch": 1.9, + "learning_rate": 1.0353598353626213e-05, + "loss": 2.0783, + "step": 9339 + }, + { + "epoch": 1.9, + "learning_rate": 1.03518720923902e-05, + "loss": 2.1592, + "step": 9340 + }, + { + "epoch": 1.9, + "learning_rate": 1.0350145820655417e-05, + "loss": 2.1153, + "step": 9341 + }, + { + "epoch": 1.9, + "learning_rate": 1.0348419538473367e-05, + "loss": 2.1583, + "step": 9342 + }, + { + "epoch": 1.9, + "learning_rate": 1.0346693245895556e-05, + "loss": 2.1387, + "step": 9343 + }, + { + "epoch": 1.9, + "learning_rate": 1.0344966942973493e-05, + "loss": 2.069, + "step": 9344 + }, + { + "epoch": 1.9, + "learning_rate": 1.0343240629758683e-05, + "loss": 2.1654, + "step": 9345 + }, + { + "epoch": 1.9, + "learning_rate": 1.034151430630264e-05, + "loss": 2.1613, + "step": 9346 + }, + { + "epoch": 1.9, + "learning_rate": 1.0339787972656864e-05, + "loss": 2.1053, + "step": 9347 + }, + { + "epoch": 1.9, + "learning_rate": 1.0338061628872871e-05, + "loss": 2.1412, + "step": 9348 + }, + { + "epoch": 1.9, + "learning_rate": 1.0336335275002166e-05, + "loss": 2.15, + "step": 9349 + }, + { + "epoch": 1.9, + "learning_rate": 1.0334608911096253e-05, + "loss": 2.0964, + "step": 9350 + }, + { + "epoch": 1.9, + "learning_rate": 1.0332882537206652e-05, + "loss": 2.2089, + "step": 9351 + }, + { + "epoch": 1.9, + "learning_rate": 1.0331156153384866e-05, + "loss": 2.0931, + "step": 9352 + }, + { + "epoch": 1.9, + "learning_rate": 1.0329429759682408e-05, + "loss": 2.1593, + "step": 9353 + }, + { + "epoch": 1.9, + "learning_rate": 1.0327703356150784e-05, + "loss": 2.12, + "step": 9354 + }, + { + "epoch": 1.9, + "learning_rate": 1.0325976942841505e-05, + "loss": 2.1557, + "step": 9355 + }, + { + "epoch": 1.9, + "learning_rate": 1.0324250519806087e-05, + "loss": 2.1383, + "step": 9356 + }, + { + "epoch": 1.9, + "learning_rate": 1.032252408709604e-05, + "loss": 2.1357, + "step": 9357 + }, + { + "epoch": 1.9, + "learning_rate": 1.032079764476287e-05, + "loss": 2.1592, + "step": 9358 + }, + { + "epoch": 1.9, + "learning_rate": 1.0319071192858092e-05, + "loss": 2.1343, + "step": 9359 + }, + { + "epoch": 1.9, + "learning_rate": 1.0317344731433217e-05, + "loss": 2.1397, + "step": 9360 + }, + { + "epoch": 1.9, + "learning_rate": 1.0315618260539761e-05, + "loss": 2.1486, + "step": 9361 + }, + { + "epoch": 1.9, + "learning_rate": 1.0313891780229234e-05, + "loss": 2.1494, + "step": 9362 + }, + { + "epoch": 1.9, + "learning_rate": 1.0312165290553147e-05, + "loss": 2.1651, + "step": 9363 + }, + { + "epoch": 1.9, + "learning_rate": 1.0310438791563015e-05, + "loss": 2.1934, + "step": 9364 + }, + { + "epoch": 1.9, + "learning_rate": 1.0308712283310348e-05, + "loss": 2.1566, + "step": 9365 + }, + { + "epoch": 1.9, + "learning_rate": 1.030698576584667e-05, + "loss": 2.1329, + "step": 9366 + }, + { + "epoch": 1.9, + "learning_rate": 1.0305259239223482e-05, + "loss": 2.0973, + "step": 9367 + }, + { + "epoch": 1.9, + "learning_rate": 1.0303532703492304e-05, + "loss": 2.1459, + "step": 9368 + }, + { + "epoch": 1.9, + "learning_rate": 1.0301806158704655e-05, + "loss": 2.1016, + "step": 9369 + }, + { + "epoch": 1.9, + "learning_rate": 1.0300079604912041e-05, + "loss": 2.2153, + "step": 9370 + }, + { + "epoch": 1.9, + "learning_rate": 1.029835304216598e-05, + "loss": 2.1321, + "step": 9371 + }, + { + "epoch": 1.9, + "learning_rate": 1.0296626470517991e-05, + "loss": 2.1311, + "step": 9372 + }, + { + "epoch": 1.9, + "learning_rate": 1.0294899890019589e-05, + "loss": 2.1367, + "step": 9373 + }, + { + "epoch": 1.9, + "learning_rate": 1.0293173300722286e-05, + "loss": 2.0838, + "step": 9374 + }, + { + "epoch": 1.9, + "learning_rate": 1.0291446702677596e-05, + "loss": 2.1893, + "step": 9375 + }, + { + "epoch": 1.9, + "learning_rate": 1.0289720095937044e-05, + "loss": 2.1894, + "step": 9376 + }, + { + "epoch": 1.9, + "learning_rate": 1.0287993480552141e-05, + "loss": 2.1908, + "step": 9377 + }, + { + "epoch": 1.9, + "learning_rate": 1.0286266856574405e-05, + "loss": 2.0896, + "step": 9378 + }, + { + "epoch": 1.9, + "learning_rate": 1.0284540224055353e-05, + "loss": 2.1534, + "step": 9379 + }, + { + "epoch": 1.9, + "learning_rate": 1.0282813583046498e-05, + "loss": 2.1395, + "step": 9380 + }, + { + "epoch": 1.9, + "learning_rate": 1.0281086933599366e-05, + "loss": 2.1711, + "step": 9381 + }, + { + "epoch": 1.9, + "learning_rate": 1.0279360275765472e-05, + "loss": 2.0732, + "step": 9382 + }, + { + "epoch": 1.9, + "learning_rate": 1.0277633609596327e-05, + "loss": 2.1325, + "step": 9383 + }, + { + "epoch": 1.9, + "learning_rate": 1.027590693514346e-05, + "loss": 2.0943, + "step": 9384 + }, + { + "epoch": 1.91, + "learning_rate": 1.0274180252458381e-05, + "loss": 2.1207, + "step": 9385 + }, + { + "epoch": 1.91, + "learning_rate": 1.0272453561592614e-05, + "loss": 2.2249, + "step": 9386 + }, + { + "epoch": 1.91, + "learning_rate": 1.0270726862597678e-05, + "loss": 2.0768, + "step": 9387 + }, + { + "epoch": 1.91, + "learning_rate": 1.0269000155525091e-05, + "loss": 2.1298, + "step": 9388 + }, + { + "epoch": 1.91, + "learning_rate": 1.0267273440426374e-05, + "loss": 2.0726, + "step": 9389 + }, + { + "epoch": 1.91, + "learning_rate": 1.0265546717353041e-05, + "loss": 2.158, + "step": 9390 + }, + { + "epoch": 1.91, + "learning_rate": 1.0263819986356618e-05, + "loss": 2.2373, + "step": 9391 + }, + { + "epoch": 1.91, + "learning_rate": 1.0262093247488626e-05, + "loss": 2.1293, + "step": 9392 + }, + { + "epoch": 1.91, + "learning_rate": 1.0260366500800584e-05, + "loss": 2.0989, + "step": 9393 + }, + { + "epoch": 1.91, + "learning_rate": 1.025863974634401e-05, + "loss": 2.1529, + "step": 9394 + }, + { + "epoch": 1.91, + "learning_rate": 1.0256912984170427e-05, + "loss": 2.1568, + "step": 9395 + }, + { + "epoch": 1.91, + "learning_rate": 1.0255186214331357e-05, + "loss": 2.1888, + "step": 9396 + }, + { + "epoch": 1.91, + "learning_rate": 1.025345943687832e-05, + "loss": 2.0741, + "step": 9397 + }, + { + "epoch": 1.91, + "learning_rate": 1.0251732651862842e-05, + "loss": 2.1428, + "step": 9398 + }, + { + "epoch": 1.91, + "learning_rate": 1.025000585933644e-05, + "loss": 2.0718, + "step": 9399 + }, + { + "epoch": 1.91, + "learning_rate": 1.0248279059350632e-05, + "loss": 2.0395, + "step": 9400 + }, + { + "epoch": 1.91, + "learning_rate": 1.0246552251956952e-05, + "loss": 2.1317, + "step": 9401 + }, + { + "epoch": 1.91, + "learning_rate": 1.0244825437206915e-05, + "loss": 2.1224, + "step": 9402 + }, + { + "epoch": 1.91, + "learning_rate": 1.0243098615152047e-05, + "loss": 2.1804, + "step": 9403 + }, + { + "epoch": 1.91, + "learning_rate": 1.0241371785843867e-05, + "loss": 2.0909, + "step": 9404 + }, + { + "epoch": 1.91, + "learning_rate": 1.02396449493339e-05, + "loss": 2.073, + "step": 9405 + }, + { + "epoch": 1.91, + "learning_rate": 1.0237918105673674e-05, + "loss": 2.1727, + "step": 9406 + }, + { + "epoch": 1.91, + "learning_rate": 1.0236191254914705e-05, + "loss": 2.1861, + "step": 9407 + }, + { + "epoch": 1.91, + "learning_rate": 1.0234464397108524e-05, + "loss": 2.1637, + "step": 9408 + }, + { + "epoch": 1.91, + "learning_rate": 1.0232737532306647e-05, + "loss": 2.1543, + "step": 9409 + }, + { + "epoch": 1.91, + "learning_rate": 1.0231010660560605e-05, + "loss": 2.0608, + "step": 9410 + }, + { + "epoch": 1.91, + "learning_rate": 1.0229283781921922e-05, + "loss": 2.1416, + "step": 9411 + }, + { + "epoch": 1.91, + "learning_rate": 1.0227556896442122e-05, + "loss": 2.1134, + "step": 9412 + }, + { + "epoch": 1.91, + "learning_rate": 1.0225830004172728e-05, + "loss": 2.1627, + "step": 9413 + }, + { + "epoch": 1.91, + "learning_rate": 1.0224103105165264e-05, + "loss": 2.0863, + "step": 9414 + }, + { + "epoch": 1.91, + "learning_rate": 1.0222376199471258e-05, + "loss": 2.0329, + "step": 9415 + }, + { + "epoch": 1.91, + "learning_rate": 1.0220649287142237e-05, + "loss": 2.0863, + "step": 9416 + }, + { + "epoch": 1.91, + "learning_rate": 1.0218922368229725e-05, + "loss": 2.1087, + "step": 9417 + }, + { + "epoch": 1.91, + "learning_rate": 1.0217195442785246e-05, + "loss": 2.064, + "step": 9418 + }, + { + "epoch": 1.91, + "learning_rate": 1.0215468510860327e-05, + "loss": 2.1282, + "step": 9419 + }, + { + "epoch": 1.91, + "learning_rate": 1.0213741572506497e-05, + "loss": 2.0344, + "step": 9420 + }, + { + "epoch": 1.91, + "learning_rate": 1.0212014627775282e-05, + "loss": 2.1689, + "step": 9421 + }, + { + "epoch": 1.91, + "learning_rate": 1.0210287676718205e-05, + "loss": 2.1213, + "step": 9422 + }, + { + "epoch": 1.91, + "learning_rate": 1.0208560719386798e-05, + "loss": 2.0663, + "step": 9423 + }, + { + "epoch": 1.91, + "learning_rate": 1.020683375583258e-05, + "loss": 2.1233, + "step": 9424 + }, + { + "epoch": 1.91, + "learning_rate": 1.0205106786107087e-05, + "loss": 2.1404, + "step": 9425 + }, + { + "epoch": 1.91, + "learning_rate": 1.0203379810261841e-05, + "loss": 2.0993, + "step": 9426 + }, + { + "epoch": 1.91, + "learning_rate": 1.0201652828348374e-05, + "loss": 2.1794, + "step": 9427 + }, + { + "epoch": 1.91, + "learning_rate": 1.019992584041821e-05, + "loss": 2.0503, + "step": 9428 + }, + { + "epoch": 1.91, + "learning_rate": 1.0198198846522875e-05, + "loss": 2.2272, + "step": 9429 + }, + { + "epoch": 1.91, + "learning_rate": 1.0196471846713908e-05, + "loss": 2.1622, + "step": 9430 + }, + { + "epoch": 1.91, + "learning_rate": 1.0194744841042821e-05, + "loss": 2.0279, + "step": 9431 + }, + { + "epoch": 1.91, + "learning_rate": 1.0193017829561158e-05, + "loss": 2.1712, + "step": 9432 + }, + { + "epoch": 1.91, + "learning_rate": 1.0191290812320439e-05, + "loss": 2.1078, + "step": 9433 + }, + { + "epoch": 1.91, + "learning_rate": 1.0189563789372193e-05, + "loss": 2.1208, + "step": 9434 + }, + { + "epoch": 1.92, + "learning_rate": 1.018783676076795e-05, + "loss": 2.134, + "step": 9435 + }, + { + "epoch": 1.92, + "learning_rate": 1.0186109726559243e-05, + "loss": 2.0513, + "step": 9436 + }, + { + "epoch": 1.92, + "learning_rate": 1.0184382686797598e-05, + "loss": 2.1345, + "step": 9437 + }, + { + "epoch": 1.92, + "learning_rate": 1.0182655641534545e-05, + "loss": 2.1285, + "step": 9438 + }, + { + "epoch": 1.92, + "learning_rate": 1.0180928590821608e-05, + "loss": 2.1679, + "step": 9439 + }, + { + "epoch": 1.92, + "learning_rate": 1.0179201534710327e-05, + "loss": 2.1345, + "step": 9440 + }, + { + "epoch": 1.92, + "learning_rate": 1.0177474473252228e-05, + "loss": 2.1069, + "step": 9441 + }, + { + "epoch": 1.92, + "learning_rate": 1.0175747406498841e-05, + "loss": 2.0529, + "step": 9442 + }, + { + "epoch": 1.92, + "learning_rate": 1.0174020334501696e-05, + "loss": 2.111, + "step": 9443 + }, + { + "epoch": 1.92, + "learning_rate": 1.0172293257312318e-05, + "loss": 2.0612, + "step": 9444 + }, + { + "epoch": 1.92, + "learning_rate": 1.0170566174982247e-05, + "loss": 2.152, + "step": 9445 + }, + { + "epoch": 1.92, + "learning_rate": 1.016883908756301e-05, + "loss": 2.1679, + "step": 9446 + }, + { + "epoch": 1.92, + "learning_rate": 1.0167111995106137e-05, + "loss": 2.1742, + "step": 9447 + }, + { + "epoch": 1.92, + "learning_rate": 1.016538489766316e-05, + "loss": 2.0844, + "step": 9448 + }, + { + "epoch": 1.92, + "learning_rate": 1.0163657795285609e-05, + "loss": 2.1489, + "step": 9449 + }, + { + "epoch": 1.92, + "learning_rate": 1.0161930688025018e-05, + "loss": 2.1571, + "step": 9450 + }, + { + "epoch": 1.92, + "learning_rate": 1.0160203575932915e-05, + "loss": 2.0546, + "step": 9451 + }, + { + "epoch": 1.92, + "learning_rate": 1.0158476459060833e-05, + "loss": 2.1493, + "step": 9452 + }, + { + "epoch": 1.92, + "learning_rate": 1.0156749337460308e-05, + "loss": 2.1136, + "step": 9453 + }, + { + "epoch": 1.92, + "learning_rate": 1.0155022211182863e-05, + "loss": 2.157, + "step": 9454 + }, + { + "epoch": 1.92, + "learning_rate": 1.0153295080280039e-05, + "loss": 2.0734, + "step": 9455 + }, + { + "epoch": 1.92, + "learning_rate": 1.0151567944803364e-05, + "loss": 2.1217, + "step": 9456 + }, + { + "epoch": 1.92, + "learning_rate": 1.0149840804804372e-05, + "loss": 2.1542, + "step": 9457 + }, + { + "epoch": 1.92, + "learning_rate": 1.0148113660334596e-05, + "loss": 2.1764, + "step": 9458 + }, + { + "epoch": 1.92, + "learning_rate": 1.0146386511445561e-05, + "loss": 2.0888, + "step": 9459 + }, + { + "epoch": 1.92, + "learning_rate": 1.0144659358188811e-05, + "loss": 2.161, + "step": 9460 + }, + { + "epoch": 1.92, + "learning_rate": 1.0142932200615874e-05, + "loss": 2.1504, + "step": 9461 + }, + { + "epoch": 1.92, + "learning_rate": 1.014120503877828e-05, + "loss": 2.1364, + "step": 9462 + }, + { + "epoch": 1.92, + "learning_rate": 1.013947787272757e-05, + "loss": 2.1063, + "step": 9463 + }, + { + "epoch": 1.92, + "learning_rate": 1.0137750702515265e-05, + "loss": 2.1677, + "step": 9464 + }, + { + "epoch": 1.92, + "learning_rate": 1.013602352819291e-05, + "loss": 2.1522, + "step": 9465 + }, + { + "epoch": 1.92, + "learning_rate": 1.0134296349812037e-05, + "loss": 2.0533, + "step": 9466 + }, + { + "epoch": 1.92, + "learning_rate": 1.0132569167424174e-05, + "loss": 2.1046, + "step": 9467 + }, + { + "epoch": 1.92, + "learning_rate": 1.0130841981080856e-05, + "loss": 2.0875, + "step": 9468 + }, + { + "epoch": 1.92, + "learning_rate": 1.012911479083362e-05, + "loss": 2.0291, + "step": 9469 + }, + { + "epoch": 1.92, + "learning_rate": 1.0127387596734e-05, + "loss": 2.1348, + "step": 9470 + }, + { + "epoch": 1.92, + "learning_rate": 1.0125660398833528e-05, + "loss": 2.1391, + "step": 9471 + }, + { + "epoch": 1.92, + "learning_rate": 1.0123933197183737e-05, + "loss": 2.1042, + "step": 9472 + }, + { + "epoch": 1.92, + "learning_rate": 1.0122205991836167e-05, + "loss": 2.193, + "step": 9473 + }, + { + "epoch": 1.92, + "learning_rate": 1.0120478782842345e-05, + "loss": 2.0572, + "step": 9474 + }, + { + "epoch": 1.92, + "learning_rate": 1.0118751570253813e-05, + "loss": 2.1226, + "step": 9475 + }, + { + "epoch": 1.92, + "learning_rate": 1.0117024354122099e-05, + "loss": 2.1128, + "step": 9476 + }, + { + "epoch": 1.92, + "learning_rate": 1.0115297134498744e-05, + "loss": 2.1864, + "step": 9477 + }, + { + "epoch": 1.92, + "learning_rate": 1.0113569911435277e-05, + "loss": 2.129, + "step": 9478 + }, + { + "epoch": 1.92, + "learning_rate": 1.0111842684983232e-05, + "loss": 2.1467, + "step": 9479 + }, + { + "epoch": 1.92, + "learning_rate": 1.0110115455194153e-05, + "loss": 2.0699, + "step": 9480 + }, + { + "epoch": 1.92, + "learning_rate": 1.010838822211957e-05, + "loss": 2.061, + "step": 9481 + }, + { + "epoch": 1.92, + "learning_rate": 1.0106660985811018e-05, + "loss": 2.1637, + "step": 9482 + }, + { + "epoch": 1.92, + "learning_rate": 1.0104933746320034e-05, + "loss": 2.1098, + "step": 9483 + }, + { + "epoch": 1.93, + "learning_rate": 1.0103206503698144e-05, + "loss": 2.0444, + "step": 9484 + }, + { + "epoch": 1.93, + "learning_rate": 1.01014792579969e-05, + "loss": 2.1948, + "step": 9485 + }, + { + "epoch": 1.93, + "learning_rate": 1.0099752009267828e-05, + "loss": 2.1022, + "step": 9486 + }, + { + "epoch": 1.93, + "learning_rate": 1.0098024757562464e-05, + "loss": 2.1916, + "step": 9487 + }, + { + "epoch": 1.93, + "learning_rate": 1.0096297502932343e-05, + "loss": 2.2488, + "step": 9488 + }, + { + "epoch": 1.93, + "learning_rate": 1.0094570245429002e-05, + "loss": 2.2469, + "step": 9489 + }, + { + "epoch": 1.93, + "learning_rate": 1.0092842985103984e-05, + "loss": 2.1767, + "step": 9490 + }, + { + "epoch": 1.93, + "learning_rate": 1.0091115722008816e-05, + "loss": 2.1402, + "step": 9491 + }, + { + "epoch": 1.93, + "learning_rate": 1.0089388456195033e-05, + "loss": 2.0979, + "step": 9492 + }, + { + "epoch": 1.93, + "learning_rate": 1.008766118771418e-05, + "loss": 2.1972, + "step": 9493 + }, + { + "epoch": 1.93, + "learning_rate": 1.0085933916617788e-05, + "loss": 2.0692, + "step": 9494 + }, + { + "epoch": 1.93, + "learning_rate": 1.0084206642957393e-05, + "loss": 2.2002, + "step": 9495 + }, + { + "epoch": 1.93, + "learning_rate": 1.0082479366784532e-05, + "loss": 2.1685, + "step": 9496 + }, + { + "epoch": 1.93, + "learning_rate": 1.0080752088150745e-05, + "loss": 2.1477, + "step": 9497 + }, + { + "epoch": 1.93, + "learning_rate": 1.0079024807107565e-05, + "loss": 2.1556, + "step": 9498 + }, + { + "epoch": 1.93, + "learning_rate": 1.0077297523706527e-05, + "loss": 2.1447, + "step": 9499 + }, + { + "epoch": 1.93, + "learning_rate": 1.007557023799917e-05, + "loss": 2.0269, + "step": 9500 + }, + { + "epoch": 1.93, + "learning_rate": 1.0073842950037034e-05, + "loss": 2.1233, + "step": 9501 + }, + { + "epoch": 1.93, + "learning_rate": 1.0072115659871653e-05, + "loss": 2.1773, + "step": 9502 + }, + { + "epoch": 1.93, + "learning_rate": 1.007038836755456e-05, + "loss": 2.0945, + "step": 9503 + }, + { + "epoch": 1.93, + "learning_rate": 1.0068661073137301e-05, + "loss": 2.2065, + "step": 9504 + }, + { + "epoch": 1.93, + "learning_rate": 1.0066933776671408e-05, + "loss": 2.0857, + "step": 9505 + }, + { + "epoch": 1.93, + "learning_rate": 1.0065206478208418e-05, + "loss": 2.1588, + "step": 9506 + }, + { + "epoch": 1.93, + "learning_rate": 1.0063479177799868e-05, + "loss": 2.1519, + "step": 9507 + }, + { + "epoch": 1.93, + "learning_rate": 1.0061751875497295e-05, + "loss": 2.1473, + "step": 9508 + }, + { + "epoch": 1.93, + "learning_rate": 1.0060024571352238e-05, + "loss": 2.0944, + "step": 9509 + }, + { + "epoch": 1.93, + "learning_rate": 1.0058297265416234e-05, + "loss": 2.1304, + "step": 9510 + }, + { + "epoch": 1.93, + "learning_rate": 1.0056569957740822e-05, + "loss": 2.1459, + "step": 9511 + }, + { + "epoch": 1.93, + "learning_rate": 1.0054842648377535e-05, + "loss": 2.1626, + "step": 9512 + }, + { + "epoch": 1.93, + "learning_rate": 1.0053115337377912e-05, + "loss": 2.112, + "step": 9513 + }, + { + "epoch": 1.93, + "learning_rate": 1.0051388024793495e-05, + "loss": 2.2014, + "step": 9514 + }, + { + "epoch": 1.93, + "learning_rate": 1.0049660710675818e-05, + "loss": 2.0744, + "step": 9515 + }, + { + "epoch": 1.93, + "learning_rate": 1.0047933395076419e-05, + "loss": 2.0725, + "step": 9516 + }, + { + "epoch": 1.93, + "learning_rate": 1.0046206078046836e-05, + "loss": 2.1168, + "step": 9517 + }, + { + "epoch": 1.93, + "learning_rate": 1.0044478759638607e-05, + "loss": 2.1715, + "step": 9518 + }, + { + "epoch": 1.93, + "learning_rate": 1.0042751439903269e-05, + "loss": 2.1798, + "step": 9519 + }, + { + "epoch": 1.93, + "learning_rate": 1.004102411889236e-05, + "loss": 2.0757, + "step": 9520 + }, + { + "epoch": 1.93, + "learning_rate": 1.003929679665742e-05, + "loss": 2.1484, + "step": 9521 + }, + { + "epoch": 1.93, + "learning_rate": 1.0037569473249986e-05, + "loss": 2.1473, + "step": 9522 + }, + { + "epoch": 1.93, + "learning_rate": 1.003584214872159e-05, + "loss": 2.2178, + "step": 9523 + }, + { + "epoch": 1.93, + "learning_rate": 1.003411482312378e-05, + "loss": 2.1549, + "step": 9524 + }, + { + "epoch": 1.93, + "learning_rate": 1.003238749650809e-05, + "loss": 2.1992, + "step": 9525 + }, + { + "epoch": 1.93, + "learning_rate": 1.0030660168926053e-05, + "loss": 2.1794, + "step": 9526 + }, + { + "epoch": 1.93, + "learning_rate": 1.0028932840429218e-05, + "loss": 2.1076, + "step": 9527 + }, + { + "epoch": 1.93, + "learning_rate": 1.0027205511069109e-05, + "loss": 2.1781, + "step": 9528 + }, + { + "epoch": 1.93, + "learning_rate": 1.0025478180897276e-05, + "loss": 2.0905, + "step": 9529 + }, + { + "epoch": 1.93, + "learning_rate": 1.0023750849965255e-05, + "loss": 2.0073, + "step": 9530 + }, + { + "epoch": 1.93, + "learning_rate": 1.002202351832458e-05, + "loss": 2.1352, + "step": 9531 + }, + { + "epoch": 1.93, + "learning_rate": 1.0020296186026792e-05, + "loss": 2.0948, + "step": 9532 + }, + { + "epoch": 1.94, + "learning_rate": 1.0018568853123424e-05, + "loss": 2.1464, + "step": 9533 + }, + { + "epoch": 1.94, + "learning_rate": 1.0016841519666025e-05, + "loss": 2.0907, + "step": 9534 + }, + { + "epoch": 1.94, + "learning_rate": 1.0015114185706127e-05, + "loss": 2.1213, + "step": 9535 + }, + { + "epoch": 1.94, + "learning_rate": 1.0013386851295266e-05, + "loss": 2.0764, + "step": 9536 + }, + { + "epoch": 1.94, + "learning_rate": 1.0011659516484984e-05, + "loss": 2.0427, + "step": 9537 + }, + { + "epoch": 1.94, + "learning_rate": 1.0009932181326816e-05, + "loss": 2.1608, + "step": 9538 + }, + { + "epoch": 1.94, + "learning_rate": 1.0008204845872305e-05, + "loss": 2.0294, + "step": 9539 + }, + { + "epoch": 1.94, + "learning_rate": 1.0006477510172984e-05, + "loss": 2.1573, + "step": 9540 + }, + { + "epoch": 1.94, + "learning_rate": 1.0004750174280398e-05, + "loss": 2.2015, + "step": 9541 + }, + { + "epoch": 1.94, + "learning_rate": 1.000302283824608e-05, + "loss": 2.1071, + "step": 9542 + }, + { + "epoch": 1.94, + "learning_rate": 1.0001295502121568e-05, + "loss": 2.0937, + "step": 9543 + }, + { + "epoch": 1.94, + "learning_rate": 9.999568165958404e-06, + "loss": 2.1708, + "step": 9544 + }, + { + "epoch": 1.94, + "learning_rate": 9.997840829808124e-06, + "loss": 2.1852, + "step": 9545 + }, + { + "epoch": 1.94, + "learning_rate": 9.996113493722269e-06, + "loss": 2.1058, + "step": 9546 + }, + { + "epoch": 1.94, + "learning_rate": 9.994386157752375e-06, + "loss": 2.1037, + "step": 9547 + }, + { + "epoch": 1.94, + "learning_rate": 9.992658821949976e-06, + "loss": 2.0842, + "step": 9548 + }, + { + "epoch": 1.94, + "learning_rate": 9.99093148636662e-06, + "loss": 2.1037, + "step": 9549 + }, + { + "epoch": 1.94, + "learning_rate": 9.989204151053839e-06, + "loss": 2.1673, + "step": 9550 + }, + { + "epoch": 1.94, + "learning_rate": 9.987476816063173e-06, + "loss": 2.103, + "step": 9551 + }, + { + "epoch": 1.94, + "learning_rate": 9.98574948144616e-06, + "loss": 2.1295, + "step": 9552 + }, + { + "epoch": 1.94, + "learning_rate": 9.984022147254335e-06, + "loss": 2.1201, + "step": 9553 + }, + { + "epoch": 1.94, + "learning_rate": 9.982294813539243e-06, + "loss": 2.1408, + "step": 9554 + }, + { + "epoch": 1.94, + "learning_rate": 9.980567480352419e-06, + "loss": 2.1215, + "step": 9555 + }, + { + "epoch": 1.94, + "learning_rate": 9.978840147745402e-06, + "loss": 2.0684, + "step": 9556 + }, + { + "epoch": 1.94, + "learning_rate": 9.977112815769726e-06, + "loss": 2.0933, + "step": 9557 + }, + { + "epoch": 1.94, + "learning_rate": 9.975385484476933e-06, + "loss": 2.1354, + "step": 9558 + }, + { + "epoch": 1.94, + "learning_rate": 9.973658153918564e-06, + "loss": 2.1211, + "step": 9559 + }, + { + "epoch": 1.94, + "learning_rate": 9.971930824146151e-06, + "loss": 1.9992, + "step": 9560 + }, + { + "epoch": 1.94, + "learning_rate": 9.970203495211238e-06, + "loss": 2.0815, + "step": 9561 + }, + { + "epoch": 1.94, + "learning_rate": 9.968476167165361e-06, + "loss": 2.1838, + "step": 9562 + }, + { + "epoch": 1.94, + "learning_rate": 9.966748840060054e-06, + "loss": 2.1705, + "step": 9563 + }, + { + "epoch": 1.94, + "learning_rate": 9.965021513946859e-06, + "loss": 2.1053, + "step": 9564 + }, + { + "epoch": 1.94, + "learning_rate": 9.963294188877316e-06, + "loss": 2.1261, + "step": 9565 + }, + { + "epoch": 1.94, + "learning_rate": 9.96156686490296e-06, + "loss": 2.0669, + "step": 9566 + }, + { + "epoch": 1.94, + "learning_rate": 9.959839542075332e-06, + "loss": 2.1816, + "step": 9567 + }, + { + "epoch": 1.94, + "learning_rate": 9.95811222044596e-06, + "loss": 2.1562, + "step": 9568 + }, + { + "epoch": 1.94, + "learning_rate": 9.956384900066396e-06, + "loss": 2.0673, + "step": 9569 + }, + { + "epoch": 1.94, + "learning_rate": 9.954657580988171e-06, + "loss": 2.135, + "step": 9570 + }, + { + "epoch": 1.94, + "learning_rate": 9.952930263262825e-06, + "loss": 2.0863, + "step": 9571 + }, + { + "epoch": 1.94, + "learning_rate": 9.951202946941892e-06, + "loss": 2.0787, + "step": 9572 + }, + { + "epoch": 1.94, + "learning_rate": 9.949475632076907e-06, + "loss": 2.1421, + "step": 9573 + }, + { + "epoch": 1.94, + "learning_rate": 9.947748318719419e-06, + "loss": 2.142, + "step": 9574 + }, + { + "epoch": 1.94, + "learning_rate": 9.946021006920959e-06, + "loss": 2.1749, + "step": 9575 + }, + { + "epoch": 1.94, + "learning_rate": 9.944293696733064e-06, + "loss": 2.1299, + "step": 9576 + }, + { + "epoch": 1.94, + "learning_rate": 9.94256638820727e-06, + "loss": 2.1504, + "step": 9577 + }, + { + "epoch": 1.94, + "learning_rate": 9.940839081395118e-06, + "loss": 2.1658, + "step": 9578 + }, + { + "epoch": 1.94, + "learning_rate": 9.939111776348146e-06, + "loss": 2.2293, + "step": 9579 + }, + { + "epoch": 1.94, + "learning_rate": 9.93738447311789e-06, + "loss": 2.1882, + "step": 9580 + }, + { + "epoch": 1.94, + "learning_rate": 9.935657171755886e-06, + "loss": 2.0823, + "step": 9581 + }, + { + "epoch": 1.94, + "learning_rate": 9.933929872313675e-06, + "loss": 2.2143, + "step": 9582 + }, + { + "epoch": 1.95, + "learning_rate": 9.932202574842789e-06, + "loss": 2.0516, + "step": 9583 + }, + { + "epoch": 1.95, + "learning_rate": 9.930475279394767e-06, + "loss": 2.1814, + "step": 9584 + }, + { + "epoch": 1.95, + "learning_rate": 9.928747986021153e-06, + "loss": 2.0734, + "step": 9585 + }, + { + "epoch": 1.95, + "learning_rate": 9.927020694773476e-06, + "loss": 2.1181, + "step": 9586 + }, + { + "epoch": 1.95, + "learning_rate": 9.925293405703275e-06, + "loss": 2.1399, + "step": 9587 + }, + { + "epoch": 1.95, + "learning_rate": 9.923566118862083e-06, + "loss": 2.0062, + "step": 9588 + }, + { + "epoch": 1.95, + "learning_rate": 9.921838834301447e-06, + "loss": 2.1513, + "step": 9589 + }, + { + "epoch": 1.95, + "learning_rate": 9.920111552072897e-06, + "loss": 2.1252, + "step": 9590 + }, + { + "epoch": 1.95, + "learning_rate": 9.91838427222797e-06, + "loss": 2.156, + "step": 9591 + }, + { + "epoch": 1.95, + "learning_rate": 9.916656994818202e-06, + "loss": 2.1106, + "step": 9592 + }, + { + "epoch": 1.95, + "learning_rate": 9.914929719895133e-06, + "loss": 2.0968, + "step": 9593 + }, + { + "epoch": 1.95, + "learning_rate": 9.913202447510299e-06, + "loss": 2.1212, + "step": 9594 + }, + { + "epoch": 1.95, + "learning_rate": 9.911475177715234e-06, + "loss": 2.1742, + "step": 9595 + }, + { + "epoch": 1.95, + "learning_rate": 9.909747910561475e-06, + "loss": 2.051, + "step": 9596 + }, + { + "epoch": 1.95, + "learning_rate": 9.908020646100556e-06, + "loss": 2.12, + "step": 9597 + }, + { + "epoch": 1.95, + "learning_rate": 9.906293384384019e-06, + "loss": 2.1961, + "step": 9598 + }, + { + "epoch": 1.95, + "learning_rate": 9.904566125463398e-06, + "loss": 2.1838, + "step": 9599 + }, + { + "epoch": 1.95, + "learning_rate": 9.90283886939023e-06, + "loss": 2.0989, + "step": 9600 + }, + { + "epoch": 1.95, + "learning_rate": 9.901111616216043e-06, + "loss": 2.1086, + "step": 9601 + }, + { + "epoch": 1.95, + "learning_rate": 9.89938436599238e-06, + "loss": 2.0582, + "step": 9602 + }, + { + "epoch": 1.95, + "learning_rate": 9.897657118770781e-06, + "loss": 2.1956, + "step": 9603 + }, + { + "epoch": 1.95, + "learning_rate": 9.895929874602774e-06, + "loss": 2.1069, + "step": 9604 + }, + { + "epoch": 1.95, + "learning_rate": 9.894202633539895e-06, + "loss": 2.1408, + "step": 9605 + }, + { + "epoch": 1.95, + "learning_rate": 9.892475395633684e-06, + "loss": 2.1049, + "step": 9606 + }, + { + "epoch": 1.95, + "learning_rate": 9.890748160935672e-06, + "loss": 2.0698, + "step": 9607 + }, + { + "epoch": 1.95, + "learning_rate": 9.889020929497398e-06, + "loss": 2.1033, + "step": 9608 + }, + { + "epoch": 1.95, + "learning_rate": 9.887293701370396e-06, + "loss": 2.0475, + "step": 9609 + }, + { + "epoch": 1.95, + "learning_rate": 9.885566476606203e-06, + "loss": 2.0609, + "step": 9610 + }, + { + "epoch": 1.95, + "learning_rate": 9.883839255256349e-06, + "loss": 2.1121, + "step": 9611 + }, + { + "epoch": 1.95, + "learning_rate": 9.882112037372369e-06, + "loss": 2.112, + "step": 9612 + }, + { + "epoch": 1.95, + "learning_rate": 9.880384823005805e-06, + "loss": 2.0185, + "step": 9613 + }, + { + "epoch": 1.95, + "learning_rate": 9.878657612208187e-06, + "loss": 2.0787, + "step": 9614 + }, + { + "epoch": 1.95, + "learning_rate": 9.87693040503105e-06, + "loss": 2.1838, + "step": 9615 + }, + { + "epoch": 1.95, + "learning_rate": 9.875203201525928e-06, + "loss": 2.0499, + "step": 9616 + }, + { + "epoch": 1.95, + "learning_rate": 9.873476001744351e-06, + "loss": 2.1197, + "step": 9617 + }, + { + "epoch": 1.95, + "learning_rate": 9.871748805737864e-06, + "loss": 2.0937, + "step": 9618 + }, + { + "epoch": 1.95, + "learning_rate": 9.870021613557995e-06, + "loss": 2.0644, + "step": 9619 + }, + { + "epoch": 1.95, + "learning_rate": 9.868294425256278e-06, + "loss": 2.1621, + "step": 9620 + }, + { + "epoch": 1.95, + "learning_rate": 9.866567240884247e-06, + "loss": 2.1683, + "step": 9621 + }, + { + "epoch": 1.95, + "learning_rate": 9.864840060493433e-06, + "loss": 2.1267, + "step": 9622 + }, + { + "epoch": 1.95, + "learning_rate": 9.863112884135377e-06, + "loss": 2.174, + "step": 9623 + }, + { + "epoch": 1.95, + "learning_rate": 9.86138571186161e-06, + "loss": 2.1124, + "step": 9624 + }, + { + "epoch": 1.95, + "learning_rate": 9.859658543723659e-06, + "loss": 2.0216, + "step": 9625 + }, + { + "epoch": 1.95, + "learning_rate": 9.857931379773065e-06, + "loss": 2.1454, + "step": 9626 + }, + { + "epoch": 1.95, + "learning_rate": 9.856204220061357e-06, + "loss": 2.1376, + "step": 9627 + }, + { + "epoch": 1.95, + "learning_rate": 9.854477064640072e-06, + "loss": 2.1235, + "step": 9628 + }, + { + "epoch": 1.95, + "learning_rate": 9.852749913560738e-06, + "loss": 2.1218, + "step": 9629 + }, + { + "epoch": 1.95, + "learning_rate": 9.851022766874892e-06, + "loss": 2.1557, + "step": 9630 + }, + { + "epoch": 1.95, + "learning_rate": 9.849295624634066e-06, + "loss": 2.1289, + "step": 9631 + }, + { + "epoch": 1.96, + "learning_rate": 9.847568486889788e-06, + "loss": 2.1105, + "step": 9632 + }, + { + "epoch": 1.96, + "learning_rate": 9.845841353693597e-06, + "loss": 2.1144, + "step": 9633 + }, + { + "epoch": 1.96, + "learning_rate": 9.844114225097023e-06, + "loss": 2.1124, + "step": 9634 + }, + { + "epoch": 1.96, + "learning_rate": 9.842387101151599e-06, + "loss": 2.1644, + "step": 9635 + }, + { + "epoch": 1.96, + "learning_rate": 9.840659981908855e-06, + "loss": 2.1407, + "step": 9636 + }, + { + "epoch": 1.96, + "learning_rate": 9.838932867420318e-06, + "loss": 2.1047, + "step": 9637 + }, + { + "epoch": 1.96, + "learning_rate": 9.83720575773753e-06, + "loss": 2.0943, + "step": 9638 + }, + { + "epoch": 1.96, + "learning_rate": 9.835478652912017e-06, + "loss": 2.1455, + "step": 9639 + }, + { + "epoch": 1.96, + "learning_rate": 9.833751552995314e-06, + "loss": 2.1164, + "step": 9640 + }, + { + "epoch": 1.96, + "learning_rate": 9.832024458038945e-06, + "loss": 2.088, + "step": 9641 + }, + { + "epoch": 1.96, + "learning_rate": 9.830297368094449e-06, + "loss": 2.0882, + "step": 9642 + }, + { + "epoch": 1.96, + "learning_rate": 9.828570283213354e-06, + "loss": 2.1332, + "step": 9643 + }, + { + "epoch": 1.96, + "learning_rate": 9.82684320344719e-06, + "loss": 2.1213, + "step": 9644 + }, + { + "epoch": 1.96, + "learning_rate": 9.825116128847486e-06, + "loss": 2.0509, + "step": 9645 + }, + { + "epoch": 1.96, + "learning_rate": 9.823389059465778e-06, + "loss": 2.1303, + "step": 9646 + }, + { + "epoch": 1.96, + "learning_rate": 9.821661995353591e-06, + "loss": 2.1354, + "step": 9647 + }, + { + "epoch": 1.96, + "learning_rate": 9.819934936562462e-06, + "loss": 2.0748, + "step": 9648 + }, + { + "epoch": 1.96, + "learning_rate": 9.818207883143911e-06, + "loss": 2.1243, + "step": 9649 + }, + { + "epoch": 1.96, + "learning_rate": 9.816480835149478e-06, + "loss": 2.0057, + "step": 9650 + }, + { + "epoch": 1.96, + "learning_rate": 9.814753792630687e-06, + "loss": 2.1517, + "step": 9651 + }, + { + "epoch": 1.96, + "learning_rate": 9.813026755639068e-06, + "loss": 2.0635, + "step": 9652 + }, + { + "epoch": 1.96, + "learning_rate": 9.811299724226151e-06, + "loss": 2.0748, + "step": 9653 + }, + { + "epoch": 1.96, + "learning_rate": 9.809572698443467e-06, + "loss": 2.1275, + "step": 9654 + }, + { + "epoch": 1.96, + "learning_rate": 9.807845678342545e-06, + "loss": 2.1364, + "step": 9655 + }, + { + "epoch": 1.96, + "learning_rate": 9.806118663974913e-06, + "loss": 2.1505, + "step": 9656 + }, + { + "epoch": 1.96, + "learning_rate": 9.804391655392095e-06, + "loss": 2.1674, + "step": 9657 + }, + { + "epoch": 1.96, + "learning_rate": 9.802664652645627e-06, + "loss": 2.1016, + "step": 9658 + }, + { + "epoch": 1.96, + "learning_rate": 9.800937655787035e-06, + "loss": 2.137, + "step": 9659 + }, + { + "epoch": 1.96, + "learning_rate": 9.799210664867846e-06, + "loss": 2.1913, + "step": 9660 + }, + { + "epoch": 1.96, + "learning_rate": 9.79748367993959e-06, + "loss": 2.1159, + "step": 9661 + }, + { + "epoch": 1.96, + "learning_rate": 9.79575670105379e-06, + "loss": 2.1218, + "step": 9662 + }, + { + "epoch": 1.96, + "learning_rate": 9.79402972826198e-06, + "loss": 2.1442, + "step": 9663 + }, + { + "epoch": 1.96, + "learning_rate": 9.792302761615687e-06, + "loss": 2.1586, + "step": 9664 + }, + { + "epoch": 1.96, + "learning_rate": 9.790575801166432e-06, + "loss": 2.0409, + "step": 9665 + }, + { + "epoch": 1.96, + "learning_rate": 9.788848846965749e-06, + "loss": 2.2298, + "step": 9666 + }, + { + "epoch": 1.96, + "learning_rate": 9.78712189906516e-06, + "loss": 2.0858, + "step": 9667 + }, + { + "epoch": 1.96, + "learning_rate": 9.785394957516196e-06, + "loss": 2.1193, + "step": 9668 + }, + { + "epoch": 1.96, + "learning_rate": 9.78366802237038e-06, + "loss": 2.0714, + "step": 9669 + }, + { + "epoch": 1.96, + "learning_rate": 9.781941093679244e-06, + "loss": 2.2282, + "step": 9670 + }, + { + "epoch": 1.96, + "learning_rate": 9.780214171494307e-06, + "loss": 2.0838, + "step": 9671 + }, + { + "epoch": 1.96, + "learning_rate": 9.778487255867099e-06, + "loss": 2.045, + "step": 9672 + }, + { + "epoch": 1.96, + "learning_rate": 9.776760346849143e-06, + "loss": 2.0749, + "step": 9673 + }, + { + "epoch": 1.96, + "learning_rate": 9.77503344449197e-06, + "loss": 2.0776, + "step": 9674 + }, + { + "epoch": 1.96, + "learning_rate": 9.773306548847102e-06, + "loss": 2.2365, + "step": 9675 + }, + { + "epoch": 1.96, + "learning_rate": 9.771579659966064e-06, + "loss": 2.1229, + "step": 9676 + }, + { + "epoch": 1.96, + "learning_rate": 9.769852777900378e-06, + "loss": 2.0528, + "step": 9677 + }, + { + "epoch": 1.96, + "learning_rate": 9.768125902701574e-06, + "loss": 2.1357, + "step": 9678 + }, + { + "epoch": 1.96, + "learning_rate": 9.766399034421177e-06, + "loss": 2.1237, + "step": 9679 + }, + { + "epoch": 1.96, + "learning_rate": 9.764672173110709e-06, + "loss": 2.0638, + "step": 9680 + }, + { + "epoch": 1.97, + "learning_rate": 9.76294531882169e-06, + "loss": 2.1566, + "step": 9681 + }, + { + "epoch": 1.97, + "learning_rate": 9.761218471605653e-06, + "loss": 2.1426, + "step": 9682 + }, + { + "epoch": 1.97, + "learning_rate": 9.759491631514115e-06, + "loss": 2.1117, + "step": 9683 + }, + { + "epoch": 1.97, + "learning_rate": 9.757764798598604e-06, + "loss": 2.1371, + "step": 9684 + }, + { + "epoch": 1.97, + "learning_rate": 9.75603797291064e-06, + "loss": 2.2297, + "step": 9685 + }, + { + "epoch": 1.97, + "learning_rate": 9.754311154501743e-06, + "loss": 2.0541, + "step": 9686 + }, + { + "epoch": 1.97, + "learning_rate": 9.752584343423445e-06, + "loss": 2.1064, + "step": 9687 + }, + { + "epoch": 1.97, + "learning_rate": 9.750857539727264e-06, + "loss": 2.0949, + "step": 9688 + }, + { + "epoch": 1.97, + "learning_rate": 9.749130743464718e-06, + "loss": 2.197, + "step": 9689 + }, + { + "epoch": 1.97, + "learning_rate": 9.747403954687337e-06, + "loss": 2.109, + "step": 9690 + }, + { + "epoch": 1.97, + "learning_rate": 9.745677173446638e-06, + "loss": 2.0883, + "step": 9691 + }, + { + "epoch": 1.97, + "learning_rate": 9.743950399794145e-06, + "loss": 2.1326, + "step": 9692 + }, + { + "epoch": 1.97, + "learning_rate": 9.742223633781377e-06, + "loss": 2.1041, + "step": 9693 + }, + { + "epoch": 1.97, + "learning_rate": 9.740496875459861e-06, + "loss": 2.1303, + "step": 9694 + }, + { + "epoch": 1.97, + "learning_rate": 9.738770124881114e-06, + "loss": 2.1627, + "step": 9695 + }, + { + "epoch": 1.97, + "learning_rate": 9.737043382096653e-06, + "loss": 2.1859, + "step": 9696 + }, + { + "epoch": 1.97, + "learning_rate": 9.735316647158004e-06, + "loss": 2.1063, + "step": 9697 + }, + { + "epoch": 1.97, + "learning_rate": 9.733589920116689e-06, + "loss": 2.147, + "step": 9698 + }, + { + "epoch": 1.97, + "learning_rate": 9.731863201024225e-06, + "loss": 2.1406, + "step": 9699 + }, + { + "epoch": 1.97, + "learning_rate": 9.730136489932133e-06, + "loss": 2.1033, + "step": 9700 + }, + { + "epoch": 1.97, + "learning_rate": 9.728409786891929e-06, + "loss": 2.0917, + "step": 9701 + }, + { + "epoch": 1.97, + "learning_rate": 9.726683091955137e-06, + "loss": 2.1697, + "step": 9702 + }, + { + "epoch": 1.97, + "learning_rate": 9.724956405173278e-06, + "loss": 2.0439, + "step": 9703 + }, + { + "epoch": 1.97, + "learning_rate": 9.723229726597865e-06, + "loss": 2.1295, + "step": 9704 + }, + { + "epoch": 1.97, + "learning_rate": 9.72150305628042e-06, + "loss": 2.1376, + "step": 9705 + }, + { + "epoch": 1.97, + "learning_rate": 9.719776394272459e-06, + "loss": 2.1561, + "step": 9706 + }, + { + "epoch": 1.97, + "learning_rate": 9.718049740625505e-06, + "loss": 2.1083, + "step": 9707 + }, + { + "epoch": 1.97, + "learning_rate": 9.716323095391073e-06, + "loss": 2.1899, + "step": 9708 + }, + { + "epoch": 1.97, + "learning_rate": 9.714596458620683e-06, + "loss": 2.1304, + "step": 9709 + }, + { + "epoch": 1.97, + "learning_rate": 9.712869830365846e-06, + "loss": 2.0601, + "step": 9710 + }, + { + "epoch": 1.97, + "learning_rate": 9.711143210678085e-06, + "loss": 2.1107, + "step": 9711 + }, + { + "epoch": 1.97, + "learning_rate": 9.709416599608919e-06, + "loss": 2.2012, + "step": 9712 + }, + { + "epoch": 1.97, + "learning_rate": 9.707689997209858e-06, + "loss": 2.0964, + "step": 9713 + }, + { + "epoch": 1.97, + "learning_rate": 9.705963403532424e-06, + "loss": 2.1139, + "step": 9714 + }, + { + "epoch": 1.97, + "learning_rate": 9.70423681862813e-06, + "loss": 2.1796, + "step": 9715 + }, + { + "epoch": 1.97, + "learning_rate": 9.702510242548494e-06, + "loss": 2.2041, + "step": 9716 + }, + { + "epoch": 1.97, + "learning_rate": 9.70078367534503e-06, + "loss": 2.137, + "step": 9717 + }, + { + "epoch": 1.97, + "learning_rate": 9.699057117069254e-06, + "loss": 2.0797, + "step": 9718 + }, + { + "epoch": 1.97, + "learning_rate": 9.697330567772683e-06, + "loss": 2.0707, + "step": 9719 + }, + { + "epoch": 1.97, + "learning_rate": 9.695604027506831e-06, + "loss": 2.2128, + "step": 9720 + }, + { + "epoch": 1.97, + "learning_rate": 9.693877496323207e-06, + "loss": 2.1477, + "step": 9721 + }, + { + "epoch": 1.97, + "learning_rate": 9.692150974273334e-06, + "loss": 2.175, + "step": 9722 + }, + { + "epoch": 1.97, + "learning_rate": 9.690424461408723e-06, + "loss": 2.0686, + "step": 9723 + }, + { + "epoch": 1.97, + "learning_rate": 9.688697957780887e-06, + "loss": 2.1195, + "step": 9724 + }, + { + "epoch": 1.97, + "learning_rate": 9.68697146344134e-06, + "loss": 2.0877, + "step": 9725 + }, + { + "epoch": 1.97, + "learning_rate": 9.68524497844159e-06, + "loss": 2.1361, + "step": 9726 + }, + { + "epoch": 1.97, + "learning_rate": 9.68351850283316e-06, + "loss": 2.0723, + "step": 9727 + }, + { + "epoch": 1.97, + "learning_rate": 9.681792036667556e-06, + "loss": 2.1734, + "step": 9728 + }, + { + "epoch": 1.97, + "learning_rate": 9.680065579996293e-06, + "loss": 2.1412, + "step": 9729 + }, + { + "epoch": 1.98, + "learning_rate": 9.678339132870879e-06, + "loss": 2.1442, + "step": 9730 + }, + { + "epoch": 1.98, + "learning_rate": 9.676612695342829e-06, + "loss": 2.1999, + "step": 9731 + }, + { + "epoch": 1.98, + "learning_rate": 9.674886267463656e-06, + "loss": 2.1586, + "step": 9732 + }, + { + "epoch": 1.98, + "learning_rate": 9.673159849284871e-06, + "loss": 2.1694, + "step": 9733 + }, + { + "epoch": 1.98, + "learning_rate": 9.671433440857981e-06, + "loss": 2.138, + "step": 9734 + }, + { + "epoch": 1.98, + "learning_rate": 9.669707042234502e-06, + "loss": 2.1338, + "step": 9735 + }, + { + "epoch": 1.98, + "learning_rate": 9.667980653465939e-06, + "loss": 2.1441, + "step": 9736 + }, + { + "epoch": 1.98, + "learning_rate": 9.666254274603804e-06, + "loss": 2.138, + "step": 9737 + }, + { + "epoch": 1.98, + "learning_rate": 9.66452790569961e-06, + "loss": 2.0851, + "step": 9738 + }, + { + "epoch": 1.98, + "learning_rate": 9.662801546804865e-06, + "loss": 2.0447, + "step": 9739 + }, + { + "epoch": 1.98, + "learning_rate": 9.661075197971078e-06, + "loss": 2.0672, + "step": 9740 + }, + { + "epoch": 1.98, + "learning_rate": 9.659348859249751e-06, + "loss": 2.1218, + "step": 9741 + }, + { + "epoch": 1.98, + "learning_rate": 9.657622530692404e-06, + "loss": 2.1178, + "step": 9742 + }, + { + "epoch": 1.98, + "learning_rate": 9.65589621235054e-06, + "loss": 2.0793, + "step": 9743 + }, + { + "epoch": 1.98, + "learning_rate": 9.654169904275668e-06, + "loss": 2.1548, + "step": 9744 + }, + { + "epoch": 1.98, + "learning_rate": 9.652443606519292e-06, + "loss": 2.019, + "step": 9745 + }, + { + "epoch": 1.98, + "learning_rate": 9.650717319132921e-06, + "loss": 2.126, + "step": 9746 + }, + { + "epoch": 1.98, + "learning_rate": 9.648991042168065e-06, + "loss": 2.0627, + "step": 9747 + }, + { + "epoch": 1.98, + "learning_rate": 9.64726477567623e-06, + "loss": 2.1613, + "step": 9748 + }, + { + "epoch": 1.98, + "learning_rate": 9.645538519708922e-06, + "loss": 2.2172, + "step": 9749 + }, + { + "epoch": 1.98, + "learning_rate": 9.643812274317644e-06, + "loss": 2.1113, + "step": 9750 + }, + { + "epoch": 1.98, + "learning_rate": 9.642086039553905e-06, + "loss": 2.1441, + "step": 9751 + }, + { + "epoch": 1.98, + "learning_rate": 9.640359815469211e-06, + "loss": 2.159, + "step": 9752 + }, + { + "epoch": 1.98, + "learning_rate": 9.638633602115066e-06, + "loss": 2.195, + "step": 9753 + }, + { + "epoch": 1.98, + "learning_rate": 9.636907399542973e-06, + "loss": 2.0949, + "step": 9754 + }, + { + "epoch": 1.98, + "learning_rate": 9.635181207804441e-06, + "loss": 2.1763, + "step": 9755 + }, + { + "epoch": 1.98, + "learning_rate": 9.63345502695097e-06, + "loss": 2.1444, + "step": 9756 + }, + { + "epoch": 1.98, + "learning_rate": 9.631728857034066e-06, + "loss": 2.1591, + "step": 9757 + }, + { + "epoch": 1.98, + "learning_rate": 9.63000269810523e-06, + "loss": 2.0688, + "step": 9758 + }, + { + "epoch": 1.98, + "learning_rate": 9.628276550215972e-06, + "loss": 2.0685, + "step": 9759 + }, + { + "epoch": 1.98, + "learning_rate": 9.626550413417788e-06, + "loss": 2.0618, + "step": 9760 + }, + { + "epoch": 1.98, + "learning_rate": 9.624824287762178e-06, + "loss": 2.1369, + "step": 9761 + }, + { + "epoch": 1.98, + "learning_rate": 9.623098173300655e-06, + "loss": 2.088, + "step": 9762 + }, + { + "epoch": 1.98, + "learning_rate": 9.621372070084715e-06, + "loss": 2.0663, + "step": 9763 + }, + { + "epoch": 1.98, + "learning_rate": 9.619645978165858e-06, + "loss": 2.1009, + "step": 9764 + }, + { + "epoch": 1.98, + "learning_rate": 9.617919897595588e-06, + "loss": 2.2066, + "step": 9765 + }, + { + "epoch": 1.98, + "learning_rate": 9.616193828425399e-06, + "loss": 2.0742, + "step": 9766 + }, + { + "epoch": 1.98, + "learning_rate": 9.614467770706804e-06, + "loss": 2.1976, + "step": 9767 + }, + { + "epoch": 1.98, + "learning_rate": 9.612741724491294e-06, + "loss": 2.0562, + "step": 9768 + }, + { + "epoch": 1.98, + "learning_rate": 9.611015689830372e-06, + "loss": 2.0521, + "step": 9769 + }, + { + "epoch": 1.98, + "learning_rate": 9.609289666775537e-06, + "loss": 2.0774, + "step": 9770 + }, + { + "epoch": 1.98, + "learning_rate": 9.607563655378285e-06, + "loss": 2.1567, + "step": 9771 + }, + { + "epoch": 1.98, + "learning_rate": 9.60583765569012e-06, + "loss": 2.1032, + "step": 9772 + }, + { + "epoch": 1.98, + "learning_rate": 9.60411166776254e-06, + "loss": 2.1687, + "step": 9773 + }, + { + "epoch": 1.98, + "learning_rate": 9.602385691647039e-06, + "loss": 2.1347, + "step": 9774 + }, + { + "epoch": 1.98, + "learning_rate": 9.600659727395118e-06, + "loss": 2.0728, + "step": 9775 + }, + { + "epoch": 1.98, + "learning_rate": 9.598933775058273e-06, + "loss": 2.1358, + "step": 9776 + }, + { + "epoch": 1.98, + "learning_rate": 9.597207834688006e-06, + "loss": 2.0947, + "step": 9777 + }, + { + "epoch": 1.98, + "learning_rate": 9.595481906335802e-06, + "loss": 2.1289, + "step": 9778 + }, + { + "epoch": 1.98, + "learning_rate": 9.593755990053171e-06, + "loss": 2.0871, + "step": 9779 + }, + { + "epoch": 1.99, + "learning_rate": 9.5920300858916e-06, + "loss": 2.2045, + "step": 9780 + }, + { + "epoch": 1.99, + "learning_rate": 9.590304193902589e-06, + "loss": 2.1646, + "step": 9781 + }, + { + "epoch": 1.99, + "learning_rate": 9.58857831413763e-06, + "loss": 2.1164, + "step": 9782 + }, + { + "epoch": 1.99, + "learning_rate": 9.58685244664822e-06, + "loss": 2.1374, + "step": 9783 + }, + { + "epoch": 1.99, + "learning_rate": 9.585126591485857e-06, + "loss": 2.178, + "step": 9784 + }, + { + "epoch": 1.99, + "learning_rate": 9.583400748702023e-06, + "loss": 2.1595, + "step": 9785 + }, + { + "epoch": 1.99, + "learning_rate": 9.581674918348226e-06, + "loss": 2.1892, + "step": 9786 + }, + { + "epoch": 1.99, + "learning_rate": 9.579949100475953e-06, + "loss": 2.1672, + "step": 9787 + }, + { + "epoch": 1.99, + "learning_rate": 9.5782232951367e-06, + "loss": 2.1435, + "step": 9788 + }, + { + "epoch": 1.99, + "learning_rate": 9.576497502381955e-06, + "loss": 2.1503, + "step": 9789 + }, + { + "epoch": 1.99, + "learning_rate": 9.574771722263208e-06, + "loss": 2.189, + "step": 9790 + }, + { + "epoch": 1.99, + "learning_rate": 9.573045954831962e-06, + "loss": 2.1609, + "step": 9791 + }, + { + "epoch": 1.99, + "learning_rate": 9.571320200139702e-06, + "loss": 2.0517, + "step": 9792 + }, + { + "epoch": 1.99, + "learning_rate": 9.569594458237917e-06, + "loss": 2.1769, + "step": 9793 + }, + { + "epoch": 1.99, + "learning_rate": 9.567868729178099e-06, + "loss": 2.1193, + "step": 9794 + }, + { + "epoch": 1.99, + "learning_rate": 9.56614301301174e-06, + "loss": 2.1355, + "step": 9795 + }, + { + "epoch": 1.99, + "learning_rate": 9.564417309790331e-06, + "loss": 2.1634, + "step": 9796 + }, + { + "epoch": 1.99, + "learning_rate": 9.56269161956536e-06, + "loss": 2.1684, + "step": 9797 + }, + { + "epoch": 1.99, + "learning_rate": 9.560965942388316e-06, + "loss": 2.1234, + "step": 9798 + }, + { + "epoch": 1.99, + "learning_rate": 9.559240278310689e-06, + "loss": 2.0324, + "step": 9799 + }, + { + "epoch": 1.99, + "learning_rate": 9.557514627383964e-06, + "loss": 2.1499, + "step": 9800 + }, + { + "epoch": 1.99, + "learning_rate": 9.555788989659636e-06, + "loss": 2.0829, + "step": 9801 + }, + { + "epoch": 1.99, + "learning_rate": 9.554063365189185e-06, + "loss": 2.1364, + "step": 9802 + }, + { + "epoch": 1.99, + "learning_rate": 9.552337754024104e-06, + "loss": 2.0107, + "step": 9803 + }, + { + "epoch": 1.99, + "learning_rate": 9.550612156215877e-06, + "loss": 2.1486, + "step": 9804 + }, + { + "epoch": 1.99, + "learning_rate": 9.548886571815989e-06, + "loss": 2.1344, + "step": 9805 + }, + { + "epoch": 1.99, + "learning_rate": 9.54716100087593e-06, + "loss": 2.1204, + "step": 9806 + }, + { + "epoch": 1.99, + "learning_rate": 9.545435443447185e-06, + "loss": 2.1618, + "step": 9807 + }, + { + "epoch": 1.99, + "learning_rate": 9.543709899581238e-06, + "loss": 2.1491, + "step": 9808 + }, + { + "epoch": 1.99, + "learning_rate": 9.541984369329574e-06, + "loss": 2.1129, + "step": 9809 + }, + { + "epoch": 1.99, + "learning_rate": 9.540258852743675e-06, + "loss": 2.0859, + "step": 9810 + }, + { + "epoch": 1.99, + "learning_rate": 9.53853334987503e-06, + "loss": 2.0276, + "step": 9811 + }, + { + "epoch": 1.99, + "learning_rate": 9.53680786077512e-06, + "loss": 2.1469, + "step": 9812 + }, + { + "epoch": 1.99, + "learning_rate": 9.535082385495432e-06, + "loss": 2.0828, + "step": 9813 + }, + { + "epoch": 1.99, + "learning_rate": 9.533356924087442e-06, + "loss": 2.101, + "step": 9814 + }, + { + "epoch": 1.99, + "learning_rate": 9.531631476602632e-06, + "loss": 2.1485, + "step": 9815 + }, + { + "epoch": 1.99, + "learning_rate": 9.529906043092494e-06, + "loss": 2.0801, + "step": 9816 + }, + { + "epoch": 1.99, + "learning_rate": 9.528180623608502e-06, + "loss": 2.1156, + "step": 9817 + }, + { + "epoch": 1.99, + "learning_rate": 9.526455218202136e-06, + "loss": 2.1599, + "step": 9818 + }, + { + "epoch": 1.99, + "learning_rate": 9.524729826924883e-06, + "loss": 2.1141, + "step": 9819 + }, + { + "epoch": 1.99, + "learning_rate": 9.523004449828217e-06, + "loss": 2.1015, + "step": 9820 + }, + { + "epoch": 1.99, + "learning_rate": 9.521279086963621e-06, + "loss": 2.1662, + "step": 9821 + }, + { + "epoch": 1.99, + "learning_rate": 9.519553738382573e-06, + "loss": 2.171, + "step": 9822 + }, + { + "epoch": 1.99, + "learning_rate": 9.517828404136557e-06, + "loss": 2.1642, + "step": 9823 + }, + { + "epoch": 1.99, + "learning_rate": 9.516103084277046e-06, + "loss": 2.2599, + "step": 9824 + }, + { + "epoch": 1.99, + "learning_rate": 9.51437777885552e-06, + "loss": 2.127, + "step": 9825 + }, + { + "epoch": 1.99, + "learning_rate": 9.512652487923454e-06, + "loss": 2.1747, + "step": 9826 + }, + { + "epoch": 1.99, + "learning_rate": 9.510927211532331e-06, + "loss": 2.0702, + "step": 9827 + }, + { + "epoch": 1.99, + "learning_rate": 9.509201949733627e-06, + "loss": 2.1519, + "step": 9828 + }, + { + "epoch": 2.0, + "learning_rate": 9.507476702578814e-06, + "loss": 2.054, + "step": 9829 + }, + { + "epoch": 2.0, + "learning_rate": 9.505751470119366e-06, + "loss": 2.0976, + "step": 9830 + }, + { + "epoch": 2.0, + "learning_rate": 9.504026252406769e-06, + "loss": 2.1289, + "step": 9831 + }, + { + "epoch": 2.0, + "learning_rate": 9.502301049492492e-06, + "loss": 2.1323, + "step": 9832 + }, + { + "epoch": 2.0, + "learning_rate": 9.50057586142801e-06, + "loss": 2.0331, + "step": 9833 + }, + { + "epoch": 2.0, + "learning_rate": 9.498850688264797e-06, + "loss": 2.1187, + "step": 9834 + }, + { + "epoch": 2.0, + "learning_rate": 9.497125530054322e-06, + "loss": 2.0834, + "step": 9835 + }, + { + "epoch": 2.0, + "learning_rate": 9.495400386848069e-06, + "loss": 2.234, + "step": 9836 + }, + { + "epoch": 2.0, + "learning_rate": 9.493675258697503e-06, + "loss": 2.1169, + "step": 9837 + }, + { + "epoch": 2.0, + "learning_rate": 9.491950145654102e-06, + "loss": 2.1304, + "step": 9838 + }, + { + "epoch": 2.0, + "learning_rate": 9.490225047769329e-06, + "loss": 2.2184, + "step": 9839 + }, + { + "epoch": 2.0, + "learning_rate": 9.488499965094664e-06, + "loss": 2.1465, + "step": 9840 + }, + { + "epoch": 2.0, + "learning_rate": 9.486774897681578e-06, + "loss": 2.0601, + "step": 9841 + }, + { + "epoch": 2.0, + "learning_rate": 9.485049845581535e-06, + "loss": 2.0934, + "step": 9842 + }, + { + "epoch": 2.0, + "learning_rate": 9.483324808846014e-06, + "loss": 2.0884, + "step": 9843 + }, + { + "epoch": 2.0, + "learning_rate": 9.481599787526479e-06, + "loss": 2.0513, + "step": 9844 + }, + { + "epoch": 2.0, + "learning_rate": 9.479874781674397e-06, + "loss": 2.1207, + "step": 9845 + }, + { + "epoch": 2.0, + "learning_rate": 9.478149791341243e-06, + "loss": 2.1264, + "step": 9846 + }, + { + "epoch": 2.0, + "learning_rate": 9.476424816578483e-06, + "loss": 2.1068, + "step": 9847 + }, + { + "epoch": 2.0, + "learning_rate": 9.474699857437585e-06, + "loss": 2.085, + "step": 9848 + }, + { + "epoch": 2.0, + "learning_rate": 9.472974913970015e-06, + "loss": 2.1747, + "step": 9849 + }, + { + "epoch": 2.0, + "learning_rate": 9.47124998622724e-06, + "loss": 2.0568, + "step": 9850 + }, + { + "epoch": 2.0, + "learning_rate": 9.469525074260728e-06, + "loss": 2.0466, + "step": 9851 + }, + { + "epoch": 2.0, + "learning_rate": 9.467800178121946e-06, + "loss": 2.1907, + "step": 9852 + }, + { + "epoch": 2.0, + "learning_rate": 9.466075297862359e-06, + "loss": 2.1555, + "step": 9853 + }, + { + "epoch": 2.0, + "learning_rate": 9.46435043353343e-06, + "loss": 2.0673, + "step": 9854 + }, + { + "epoch": 2.0, + "learning_rate": 9.46262558518662e-06, + "loss": 2.0435, + "step": 9855 + }, + { + "epoch": 2.0, + "learning_rate": 9.460900752873403e-06, + "loss": 2.0327, + "step": 9856 + }, + { + "epoch": 2.0, + "learning_rate": 9.459175936645237e-06, + "loss": 2.137, + "step": 9857 + }, + { + "epoch": 2.0, + "learning_rate": 9.457451136553586e-06, + "loss": 2.0634, + "step": 9858 + }, + { + "epoch": 2.0, + "learning_rate": 9.45572635264991e-06, + "loss": 2.094, + "step": 9859 + }, + { + "epoch": 2.0, + "learning_rate": 9.454001584985676e-06, + "loss": 2.1116, + "step": 9860 + }, + { + "epoch": 2.0, + "learning_rate": 9.452276833612342e-06, + "loss": 2.2046, + "step": 9861 + }, + { + "epoch": 2.0, + "learning_rate": 9.450552098581373e-06, + "loss": 2.1351, + "step": 9862 + }, + { + "epoch": 2.0, + "learning_rate": 9.448827379944224e-06, + "loss": 2.1365, + "step": 9863 + }, + { + "epoch": 2.0, + "learning_rate": 9.44710267775236e-06, + "loss": 2.1414, + "step": 9864 + }, + { + "epoch": 2.0, + "learning_rate": 9.445377992057239e-06, + "loss": 2.1336, + "step": 9865 + }, + { + "epoch": 2.0, + "learning_rate": 9.44365332291032e-06, + "loss": 2.1289, + "step": 9866 + }, + { + "epoch": 2.0, + "learning_rate": 9.441928670363064e-06, + "loss": 2.2009, + "step": 9867 + }, + { + "epoch": 2.0, + "learning_rate": 9.440204034466928e-06, + "loss": 2.0706, + "step": 9868 + }, + { + "epoch": 2.0, + "learning_rate": 9.438479415273368e-06, + "loss": 2.0968, + "step": 9869 + }, + { + "epoch": 2.0, + "learning_rate": 9.436754812833843e-06, + "loss": 2.0725, + "step": 9870 + }, + { + "epoch": 2.0, + "learning_rate": 9.43503022719981e-06, + "loss": 2.1617, + "step": 9871 + }, + { + "epoch": 2.0, + "learning_rate": 9.433305658422727e-06, + "loss": 2.1063, + "step": 9872 + }, + { + "epoch": 2.0, + "learning_rate": 9.431581106554047e-06, + "loss": 2.0672, + "step": 9873 + }, + { + "epoch": 2.0, + "learning_rate": 9.42985657164522e-06, + "loss": 2.1319, + "step": 9874 + }, + { + "epoch": 2.0, + "learning_rate": 9.428132053747713e-06, + "loss": 2.1422, + "step": 9875 + }, + { + "epoch": 2.0, + "learning_rate": 9.426407552912973e-06, + "loss": 2.0536, + "step": 9876 + }, + { + "epoch": 2.0, + "learning_rate": 9.424683069192454e-06, + "loss": 2.1511, + "step": 9877 + }, + { + "epoch": 2.01, + "learning_rate": 9.422958602637612e-06, + "loss": 2.1873, + "step": 9878 + }, + { + "epoch": 2.01, + "learning_rate": 9.421234153299892e-06, + "loss": 2.1231, + "step": 9879 + }, + { + "epoch": 2.01, + "learning_rate": 9.419509721230758e-06, + "loss": 2.192, + "step": 9880 + }, + { + "epoch": 2.01, + "learning_rate": 9.417785306481656e-06, + "loss": 2.0972, + "step": 9881 + }, + { + "epoch": 2.01, + "learning_rate": 9.416060909104035e-06, + "loss": 2.1491, + "step": 9882 + }, + { + "epoch": 2.01, + "learning_rate": 9.414336529149347e-06, + "loss": 2.1253, + "step": 9883 + }, + { + "epoch": 2.01, + "learning_rate": 9.412612166669041e-06, + "loss": 2.0376, + "step": 9884 + }, + { + "epoch": 2.01, + "learning_rate": 9.410887821714571e-06, + "loss": 2.0798, + "step": 9885 + }, + { + "epoch": 2.01, + "learning_rate": 9.409163494337383e-06, + "loss": 2.1647, + "step": 9886 + }, + { + "epoch": 2.01, + "learning_rate": 9.407439184588926e-06, + "loss": 2.0622, + "step": 9887 + }, + { + "epoch": 2.01, + "learning_rate": 9.405714892520648e-06, + "loss": 2.143, + "step": 9888 + }, + { + "epoch": 2.01, + "learning_rate": 9.403990618183996e-06, + "loss": 2.0942, + "step": 9889 + }, + { + "epoch": 2.01, + "learning_rate": 9.402266361630417e-06, + "loss": 2.1478, + "step": 9890 + }, + { + "epoch": 2.01, + "learning_rate": 9.400542122911361e-06, + "loss": 2.1155, + "step": 9891 + }, + { + "epoch": 2.01, + "learning_rate": 9.39881790207827e-06, + "loss": 2.1311, + "step": 9892 + }, + { + "epoch": 2.01, + "learning_rate": 9.397093699182592e-06, + "loss": 2.1261, + "step": 9893 + }, + { + "epoch": 2.01, + "learning_rate": 9.395369514275764e-06, + "loss": 2.0848, + "step": 9894 + }, + { + "epoch": 2.01, + "learning_rate": 9.393645347409243e-06, + "loss": 2.1701, + "step": 9895 + }, + { + "epoch": 2.01, + "learning_rate": 9.391921198634466e-06, + "loss": 2.1639, + "step": 9896 + }, + { + "epoch": 2.01, + "learning_rate": 9.390197068002875e-06, + "loss": 2.1236, + "step": 9897 + }, + { + "epoch": 2.01, + "learning_rate": 9.388472955565917e-06, + "loss": 2.1477, + "step": 9898 + }, + { + "epoch": 2.01, + "learning_rate": 9.386748861375025e-06, + "loss": 2.0636, + "step": 9899 + }, + { + "epoch": 2.01, + "learning_rate": 9.385024785481653e-06, + "loss": 2.152, + "step": 9900 + }, + { + "epoch": 2.01, + "learning_rate": 9.383300727937237e-06, + "loss": 2.0308, + "step": 9901 + }, + { + "epoch": 2.01, + "learning_rate": 9.381576688793216e-06, + "loss": 2.086, + "step": 9902 + }, + { + "epoch": 2.01, + "learning_rate": 9.379852668101029e-06, + "loss": 2.2052, + "step": 9903 + }, + { + "epoch": 2.01, + "learning_rate": 9.378128665912117e-06, + "loss": 2.1399, + "step": 9904 + }, + { + "epoch": 2.01, + "learning_rate": 9.37640468227792e-06, + "loss": 2.107, + "step": 9905 + }, + { + "epoch": 2.01, + "learning_rate": 9.374680717249879e-06, + "loss": 2.0766, + "step": 9906 + }, + { + "epoch": 2.01, + "learning_rate": 9.372956770879423e-06, + "loss": 2.1121, + "step": 9907 + }, + { + "epoch": 2.01, + "learning_rate": 9.371232843217999e-06, + "loss": 2.1223, + "step": 9908 + }, + { + "epoch": 2.01, + "learning_rate": 9.369508934317035e-06, + "loss": 2.1998, + "step": 9909 + }, + { + "epoch": 2.01, + "learning_rate": 9.367785044227975e-06, + "loss": 2.0967, + "step": 9910 + }, + { + "epoch": 2.01, + "learning_rate": 9.366061173002248e-06, + "loss": 2.1142, + "step": 9911 + }, + { + "epoch": 2.01, + "learning_rate": 9.364337320691294e-06, + "loss": 2.1863, + "step": 9912 + }, + { + "epoch": 2.01, + "learning_rate": 9.362613487346545e-06, + "loss": 2.1437, + "step": 9913 + }, + { + "epoch": 2.01, + "learning_rate": 9.360889673019432e-06, + "loss": 2.1014, + "step": 9914 + }, + { + "epoch": 2.01, + "learning_rate": 9.359165877761394e-06, + "loss": 2.063, + "step": 9915 + }, + { + "epoch": 2.01, + "learning_rate": 9.357442101623863e-06, + "loss": 2.1955, + "step": 9916 + }, + { + "epoch": 2.01, + "learning_rate": 9.355718344658267e-06, + "loss": 2.1455, + "step": 9917 + }, + { + "epoch": 2.01, + "learning_rate": 9.353994606916041e-06, + "loss": 2.1388, + "step": 9918 + }, + { + "epoch": 2.01, + "learning_rate": 9.35227088844861e-06, + "loss": 2.072, + "step": 9919 + }, + { + "epoch": 2.01, + "learning_rate": 9.350547189307413e-06, + "loss": 2.1018, + "step": 9920 + }, + { + "epoch": 2.01, + "learning_rate": 9.348823509543878e-06, + "loss": 2.1351, + "step": 9921 + }, + { + "epoch": 2.01, + "learning_rate": 9.347099849209432e-06, + "loss": 2.1278, + "step": 9922 + }, + { + "epoch": 2.01, + "learning_rate": 9.345376208355502e-06, + "loss": 2.0868, + "step": 9923 + }, + { + "epoch": 2.01, + "learning_rate": 9.343652587033516e-06, + "loss": 2.1114, + "step": 9924 + }, + { + "epoch": 2.01, + "learning_rate": 9.341928985294907e-06, + "loss": 2.1872, + "step": 9925 + }, + { + "epoch": 2.01, + "learning_rate": 9.340205403191098e-06, + "loss": 2.0828, + "step": 9926 + }, + { + "epoch": 2.02, + "learning_rate": 9.338481840773515e-06, + "loss": 2.1094, + "step": 9927 + }, + { + "epoch": 2.02, + "learning_rate": 9.336758298093585e-06, + "loss": 2.0667, + "step": 9928 + }, + { + "epoch": 2.02, + "learning_rate": 9.335034775202731e-06, + "loss": 2.1756, + "step": 9929 + }, + { + "epoch": 2.02, + "learning_rate": 9.333311272152383e-06, + "loss": 2.1172, + "step": 9930 + }, + { + "epoch": 2.02, + "learning_rate": 9.331587788993958e-06, + "loss": 2.1363, + "step": 9931 + }, + { + "epoch": 2.02, + "learning_rate": 9.329864325778885e-06, + "loss": 2.1347, + "step": 9932 + }, + { + "epoch": 2.02, + "learning_rate": 9.328140882558586e-06, + "loss": 2.1144, + "step": 9933 + }, + { + "epoch": 2.02, + "learning_rate": 9.326417459384478e-06, + "loss": 2.0776, + "step": 9934 + }, + { + "epoch": 2.02, + "learning_rate": 9.324694056307988e-06, + "loss": 2.1569, + "step": 9935 + }, + { + "epoch": 2.02, + "learning_rate": 9.322970673380536e-06, + "loss": 2.0505, + "step": 9936 + }, + { + "epoch": 2.02, + "learning_rate": 9.321247310653543e-06, + "loss": 2.0833, + "step": 9937 + }, + { + "epoch": 2.02, + "learning_rate": 9.319523968178426e-06, + "loss": 2.132, + "step": 9938 + }, + { + "epoch": 2.02, + "learning_rate": 9.317800646006603e-06, + "loss": 2.159, + "step": 9939 + }, + { + "epoch": 2.02, + "learning_rate": 9.3160773441895e-06, + "loss": 2.1104, + "step": 9940 + }, + { + "epoch": 2.02, + "learning_rate": 9.31435406277853e-06, + "loss": 2.0516, + "step": 9941 + }, + { + "epoch": 2.02, + "learning_rate": 9.312630801825112e-06, + "loss": 2.1265, + "step": 9942 + }, + { + "epoch": 2.02, + "learning_rate": 9.31090756138066e-06, + "loss": 2.1296, + "step": 9943 + }, + { + "epoch": 2.02, + "learning_rate": 9.309184341496586e-06, + "loss": 2.1505, + "step": 9944 + }, + { + "epoch": 2.02, + "learning_rate": 9.307461142224318e-06, + "loss": 2.0428, + "step": 9945 + }, + { + "epoch": 2.02, + "learning_rate": 9.305737963615265e-06, + "loss": 1.9991, + "step": 9946 + }, + { + "epoch": 2.02, + "learning_rate": 9.304014805720836e-06, + "loss": 2.1922, + "step": 9947 + }, + { + "epoch": 2.02, + "learning_rate": 9.302291668592453e-06, + "loss": 2.1143, + "step": 9948 + }, + { + "epoch": 2.02, + "learning_rate": 9.300568552281522e-06, + "loss": 2.1819, + "step": 9949 + }, + { + "epoch": 2.02, + "learning_rate": 9.29884545683946e-06, + "loss": 2.1914, + "step": 9950 + }, + { + "epoch": 2.02, + "learning_rate": 9.297122382317678e-06, + "loss": 2.1321, + "step": 9951 + }, + { + "epoch": 2.02, + "learning_rate": 9.295399328767588e-06, + "loss": 2.1833, + "step": 9952 + }, + { + "epoch": 2.02, + "learning_rate": 9.293676296240596e-06, + "loss": 2.0111, + "step": 9953 + }, + { + "epoch": 2.02, + "learning_rate": 9.291953284788118e-06, + "loss": 2.1574, + "step": 9954 + }, + { + "epoch": 2.02, + "learning_rate": 9.29023029446156e-06, + "loss": 2.1936, + "step": 9955 + }, + { + "epoch": 2.02, + "learning_rate": 9.288507325312333e-06, + "loss": 2.1051, + "step": 9956 + }, + { + "epoch": 2.02, + "learning_rate": 9.286784377391842e-06, + "loss": 2.1233, + "step": 9957 + }, + { + "epoch": 2.02, + "learning_rate": 9.285061450751495e-06, + "loss": 2.149, + "step": 9958 + }, + { + "epoch": 2.02, + "learning_rate": 9.283338545442698e-06, + "loss": 2.0298, + "step": 9959 + }, + { + "epoch": 2.02, + "learning_rate": 9.281615661516862e-06, + "loss": 2.2014, + "step": 9960 + }, + { + "epoch": 2.02, + "learning_rate": 9.27989279902539e-06, + "loss": 2.0959, + "step": 9961 + }, + { + "epoch": 2.02, + "learning_rate": 9.278169958019685e-06, + "loss": 2.1756, + "step": 9962 + }, + { + "epoch": 2.02, + "learning_rate": 9.27644713855115e-06, + "loss": 2.1379, + "step": 9963 + }, + { + "epoch": 2.02, + "learning_rate": 9.274724340671193e-06, + "loss": 2.1687, + "step": 9964 + }, + { + "epoch": 2.02, + "learning_rate": 9.273001564431216e-06, + "loss": 2.1261, + "step": 9965 + }, + { + "epoch": 2.02, + "learning_rate": 9.27127880988262e-06, + "loss": 2.2033, + "step": 9966 + }, + { + "epoch": 2.02, + "learning_rate": 9.269556077076806e-06, + "loss": 2.187, + "step": 9967 + }, + { + "epoch": 2.02, + "learning_rate": 9.267833366065173e-06, + "loss": 1.9963, + "step": 9968 + }, + { + "epoch": 2.02, + "learning_rate": 9.266110676899127e-06, + "loss": 2.1449, + "step": 9969 + }, + { + "epoch": 2.02, + "learning_rate": 9.264388009630068e-06, + "loss": 2.0792, + "step": 9970 + }, + { + "epoch": 2.02, + "learning_rate": 9.262665364309389e-06, + "loss": 2.1643, + "step": 9971 + }, + { + "epoch": 2.02, + "learning_rate": 9.260942740988493e-06, + "loss": 2.0823, + "step": 9972 + }, + { + "epoch": 2.02, + "learning_rate": 9.259220139718772e-06, + "loss": 2.1089, + "step": 9973 + }, + { + "epoch": 2.02, + "learning_rate": 9.257497560551631e-06, + "loss": 2.1502, + "step": 9974 + }, + { + "epoch": 2.02, + "learning_rate": 9.255775003538462e-06, + "loss": 2.0592, + "step": 9975 + }, + { + "epoch": 2.02, + "learning_rate": 9.254052468730662e-06, + "loss": 2.1688, + "step": 9976 + }, + { + "epoch": 2.03, + "learning_rate": 9.252329956179626e-06, + "loss": 2.0524, + "step": 9977 + }, + { + "epoch": 2.03, + "learning_rate": 9.250607465936746e-06, + "loss": 2.1294, + "step": 9978 + }, + { + "epoch": 2.03, + "learning_rate": 9.248884998053418e-06, + "loss": 2.0976, + "step": 9979 + }, + { + "epoch": 2.03, + "learning_rate": 9.247162552581036e-06, + "loss": 2.1212, + "step": 9980 + }, + { + "epoch": 2.03, + "learning_rate": 9.245440129570993e-06, + "loss": 2.1953, + "step": 9981 + }, + { + "epoch": 2.03, + "learning_rate": 9.243717729074679e-06, + "loss": 2.042, + "step": 9982 + }, + { + "epoch": 2.03, + "learning_rate": 9.241995351143478e-06, + "loss": 2.137, + "step": 9983 + }, + { + "epoch": 2.03, + "learning_rate": 9.240272995828794e-06, + "loss": 2.1674, + "step": 9984 + }, + { + "epoch": 2.03, + "learning_rate": 9.238550663182012e-06, + "loss": 2.1075, + "step": 9985 + }, + { + "epoch": 2.03, + "learning_rate": 9.236828353254517e-06, + "loss": 2.1441, + "step": 9986 + }, + { + "epoch": 2.03, + "learning_rate": 9.235106066097702e-06, + "loss": 2.1064, + "step": 9987 + }, + { + "epoch": 2.03, + "learning_rate": 9.233383801762945e-06, + "loss": 2.0943, + "step": 9988 + }, + { + "epoch": 2.03, + "learning_rate": 9.231661560301647e-06, + "loss": 2.1185, + "step": 9989 + }, + { + "epoch": 2.03, + "learning_rate": 9.229939341765188e-06, + "loss": 2.1852, + "step": 9990 + }, + { + "epoch": 2.03, + "learning_rate": 9.228217146204953e-06, + "loss": 2.0703, + "step": 9991 + }, + { + "epoch": 2.03, + "learning_rate": 9.226494973672326e-06, + "loss": 2.1256, + "step": 9992 + }, + { + "epoch": 2.03, + "learning_rate": 9.224772824218693e-06, + "loss": 2.0942, + "step": 9993 + }, + { + "epoch": 2.03, + "learning_rate": 9.22305069789544e-06, + "loss": 2.1423, + "step": 9994 + }, + { + "epoch": 2.03, + "learning_rate": 9.221328594753943e-06, + "loss": 2.1285, + "step": 9995 + }, + { + "epoch": 2.03, + "learning_rate": 9.219606514845592e-06, + "loss": 2.0433, + "step": 9996 + }, + { + "epoch": 2.03, + "learning_rate": 9.217884458221766e-06, + "loss": 2.0788, + "step": 9997 + }, + { + "epoch": 2.03, + "learning_rate": 9.216162424933842e-06, + "loss": 2.0946, + "step": 9998 + }, + { + "epoch": 2.03, + "learning_rate": 9.214440415033204e-06, + "loss": 2.0919, + "step": 9999 + }, + { + "epoch": 2.03, + "learning_rate": 9.21271842857123e-06, + "loss": 2.0614, + "step": 10000 + }, + { + "epoch": 2.03, + "learning_rate": 9.210996465599303e-06, + "loss": 2.0705, + "step": 10001 + }, + { + "epoch": 2.03, + "learning_rate": 9.209274526168795e-06, + "loss": 2.1692, + "step": 10002 + }, + { + "epoch": 2.03, + "learning_rate": 9.207552610331083e-06, + "loss": 2.1751, + "step": 10003 + }, + { + "epoch": 2.03, + "learning_rate": 9.20583071813755e-06, + "loss": 2.1024, + "step": 10004 + }, + { + "epoch": 2.03, + "learning_rate": 9.204108849639567e-06, + "loss": 2.086, + "step": 10005 + }, + { + "epoch": 2.03, + "learning_rate": 9.202387004888511e-06, + "loss": 2.0887, + "step": 10006 + }, + { + "epoch": 2.03, + "learning_rate": 9.200665183935758e-06, + "loss": 2.1029, + "step": 10007 + }, + { + "epoch": 2.03, + "learning_rate": 9.198943386832673e-06, + "loss": 2.1126, + "step": 10008 + }, + { + "epoch": 2.03, + "learning_rate": 9.19722161363064e-06, + "loss": 2.1332, + "step": 10009 + }, + { + "epoch": 2.03, + "learning_rate": 9.195499864381028e-06, + "loss": 2.1161, + "step": 10010 + }, + { + "epoch": 2.03, + "learning_rate": 9.193778139135209e-06, + "loss": 2.0568, + "step": 10011 + }, + { + "epoch": 2.03, + "learning_rate": 9.19205643794455e-06, + "loss": 2.0511, + "step": 10012 + }, + { + "epoch": 2.03, + "learning_rate": 9.190334760860423e-06, + "loss": 2.1385, + "step": 10013 + }, + { + "epoch": 2.03, + "learning_rate": 9.188613107934203e-06, + "loss": 2.1766, + "step": 10014 + }, + { + "epoch": 2.03, + "learning_rate": 9.186891479217252e-06, + "loss": 2.1651, + "step": 10015 + }, + { + "epoch": 2.03, + "learning_rate": 9.18516987476094e-06, + "loss": 2.1335, + "step": 10016 + }, + { + "epoch": 2.03, + "learning_rate": 9.183448294616637e-06, + "loss": 2.1183, + "step": 10017 + }, + { + "epoch": 2.03, + "learning_rate": 9.181726738835705e-06, + "loss": 2.0677, + "step": 10018 + }, + { + "epoch": 2.03, + "learning_rate": 9.180005207469511e-06, + "loss": 2.1168, + "step": 10019 + }, + { + "epoch": 2.03, + "learning_rate": 9.178283700569424e-06, + "loss": 2.1196, + "step": 10020 + }, + { + "epoch": 2.03, + "learning_rate": 9.176562218186808e-06, + "loss": 2.1343, + "step": 10021 + }, + { + "epoch": 2.03, + "learning_rate": 9.174840760373023e-06, + "loss": 2.1627, + "step": 10022 + }, + { + "epoch": 2.03, + "learning_rate": 9.173119327179428e-06, + "loss": 2.1538, + "step": 10023 + }, + { + "epoch": 2.03, + "learning_rate": 9.171397918657398e-06, + "loss": 2.0907, + "step": 10024 + }, + { + "epoch": 2.03, + "learning_rate": 9.169676534858284e-06, + "loss": 2.0972, + "step": 10025 + }, + { + "epoch": 2.04, + "learning_rate": 9.167955175833453e-06, + "loss": 2.1443, + "step": 10026 + }, + { + "epoch": 2.04, + "learning_rate": 9.166233841634262e-06, + "loss": 2.071, + "step": 10027 + }, + { + "epoch": 2.04, + "learning_rate": 9.164512532312065e-06, + "loss": 2.1678, + "step": 10028 + }, + { + "epoch": 2.04, + "learning_rate": 9.16279124791823e-06, + "loss": 2.1321, + "step": 10029 + }, + { + "epoch": 2.04, + "learning_rate": 9.16106998850411e-06, + "loss": 2.1414, + "step": 10030 + }, + { + "epoch": 2.04, + "learning_rate": 9.159348754121065e-06, + "loss": 2.1105, + "step": 10031 + }, + { + "epoch": 2.04, + "learning_rate": 9.157627544820444e-06, + "loss": 2.0424, + "step": 10032 + }, + { + "epoch": 2.04, + "learning_rate": 9.15590636065361e-06, + "loss": 2.1036, + "step": 10033 + }, + { + "epoch": 2.04, + "learning_rate": 9.154185201671916e-06, + "loss": 2.0763, + "step": 10034 + }, + { + "epoch": 2.04, + "learning_rate": 9.152464067926717e-06, + "loss": 2.0932, + "step": 10035 + }, + { + "epoch": 2.04, + "learning_rate": 9.150742959469361e-06, + "loss": 2.1027, + "step": 10036 + }, + { + "epoch": 2.04, + "learning_rate": 9.149021876351208e-06, + "loss": 2.2568, + "step": 10037 + }, + { + "epoch": 2.04, + "learning_rate": 9.147300818623604e-06, + "loss": 2.1277, + "step": 10038 + }, + { + "epoch": 2.04, + "learning_rate": 9.145579786337904e-06, + "loss": 2.0802, + "step": 10039 + }, + { + "epoch": 2.04, + "learning_rate": 9.143858779545454e-06, + "loss": 2.0719, + "step": 10040 + }, + { + "epoch": 2.04, + "learning_rate": 9.142137798297609e-06, + "loss": 2.0787, + "step": 10041 + }, + { + "epoch": 2.04, + "learning_rate": 9.140416842645712e-06, + "loss": 2.0857, + "step": 10042 + }, + { + "epoch": 2.04, + "learning_rate": 9.138695912641111e-06, + "loss": 2.1381, + "step": 10043 + }, + { + "epoch": 2.04, + "learning_rate": 9.13697500833516e-06, + "loss": 2.089, + "step": 10044 + }, + { + "epoch": 2.04, + "learning_rate": 9.135254129779201e-06, + "loss": 2.0701, + "step": 10045 + }, + { + "epoch": 2.04, + "learning_rate": 9.13353327702458e-06, + "loss": 2.1579, + "step": 10046 + }, + { + "epoch": 2.04, + "learning_rate": 9.13181245012264e-06, + "loss": 2.055, + "step": 10047 + }, + { + "epoch": 2.04, + "learning_rate": 9.130091649124729e-06, + "loss": 2.1448, + "step": 10048 + }, + { + "epoch": 2.04, + "learning_rate": 9.128370874082189e-06, + "loss": 2.0821, + "step": 10049 + }, + { + "epoch": 2.04, + "learning_rate": 9.126650125046361e-06, + "loss": 2.0903, + "step": 10050 + }, + { + "epoch": 2.04, + "learning_rate": 9.12492940206859e-06, + "loss": 2.0579, + "step": 10051 + }, + { + "epoch": 2.04, + "learning_rate": 9.123208705200209e-06, + "loss": 2.052, + "step": 10052 + }, + { + "epoch": 2.04, + "learning_rate": 9.121488034492569e-06, + "loss": 2.0721, + "step": 10053 + }, + { + "epoch": 2.04, + "learning_rate": 9.119767389997005e-06, + "loss": 2.1468, + "step": 10054 + }, + { + "epoch": 2.04, + "learning_rate": 9.118046771764855e-06, + "loss": 2.0148, + "step": 10055 + }, + { + "epoch": 2.04, + "learning_rate": 9.116326179847455e-06, + "loss": 2.0856, + "step": 10056 + }, + { + "epoch": 2.04, + "learning_rate": 9.114605614296143e-06, + "loss": 2.122, + "step": 10057 + }, + { + "epoch": 2.04, + "learning_rate": 9.11288507516226e-06, + "loss": 2.1487, + "step": 10058 + }, + { + "epoch": 2.04, + "learning_rate": 9.11116456249714e-06, + "loss": 2.0744, + "step": 10059 + }, + { + "epoch": 2.04, + "learning_rate": 9.109444076352112e-06, + "loss": 2.1445, + "step": 10060 + }, + { + "epoch": 2.04, + "learning_rate": 9.107723616778517e-06, + "loss": 2.0973, + "step": 10061 + }, + { + "epoch": 2.04, + "learning_rate": 9.106003183827683e-06, + "loss": 2.0192, + "step": 10062 + }, + { + "epoch": 2.04, + "learning_rate": 9.104282777550947e-06, + "loss": 2.0836, + "step": 10063 + }, + { + "epoch": 2.04, + "learning_rate": 9.102562397999634e-06, + "loss": 2.0576, + "step": 10064 + }, + { + "epoch": 2.04, + "learning_rate": 9.100842045225084e-06, + "loss": 2.1348, + "step": 10065 + }, + { + "epoch": 2.04, + "learning_rate": 9.09912171927862e-06, + "loss": 2.1345, + "step": 10066 + }, + { + "epoch": 2.04, + "learning_rate": 9.09740142021157e-06, + "loss": 2.0975, + "step": 10067 + }, + { + "epoch": 2.04, + "learning_rate": 9.09568114807527e-06, + "loss": 2.1005, + "step": 10068 + }, + { + "epoch": 2.04, + "learning_rate": 9.093960902921044e-06, + "loss": 2.1271, + "step": 10069 + }, + { + "epoch": 2.04, + "learning_rate": 9.092240684800217e-06, + "loss": 2.1347, + "step": 10070 + }, + { + "epoch": 2.04, + "learning_rate": 9.090520493764115e-06, + "loss": 2.1706, + "step": 10071 + }, + { + "epoch": 2.04, + "learning_rate": 9.08880032986406e-06, + "loss": 2.1071, + "step": 10072 + }, + { + "epoch": 2.04, + "learning_rate": 9.087080193151386e-06, + "loss": 2.1381, + "step": 10073 + }, + { + "epoch": 2.04, + "learning_rate": 9.08536008367741e-06, + "loss": 2.1378, + "step": 10074 + }, + { + "epoch": 2.05, + "learning_rate": 9.083640001493456e-06, + "loss": 2.1436, + "step": 10075 + }, + { + "epoch": 2.05, + "learning_rate": 9.081919946650844e-06, + "loss": 2.0582, + "step": 10076 + }, + { + "epoch": 2.05, + "learning_rate": 9.080199919200897e-06, + "loss": 2.0708, + "step": 10077 + }, + { + "epoch": 2.05, + "learning_rate": 9.078479919194937e-06, + "loss": 2.157, + "step": 10078 + }, + { + "epoch": 2.05, + "learning_rate": 9.076759946684282e-06, + "loss": 2.082, + "step": 10079 + }, + { + "epoch": 2.05, + "learning_rate": 9.075040001720247e-06, + "loss": 2.1782, + "step": 10080 + }, + { + "epoch": 2.05, + "learning_rate": 9.073320084354156e-06, + "loss": 2.1209, + "step": 10081 + }, + { + "epoch": 2.05, + "learning_rate": 9.07160019463732e-06, + "loss": 2.1712, + "step": 10082 + }, + { + "epoch": 2.05, + "learning_rate": 9.069880332621061e-06, + "loss": 2.168, + "step": 10083 + }, + { + "epoch": 2.05, + "learning_rate": 9.068160498356691e-06, + "loss": 2.1297, + "step": 10084 + }, + { + "epoch": 2.05, + "learning_rate": 9.066440691895527e-06, + "loss": 2.0779, + "step": 10085 + }, + { + "epoch": 2.05, + "learning_rate": 9.064720913288879e-06, + "loss": 2.1426, + "step": 10086 + }, + { + "epoch": 2.05, + "learning_rate": 9.06300116258806e-06, + "loss": 2.1687, + "step": 10087 + }, + { + "epoch": 2.05, + "learning_rate": 9.061281439844384e-06, + "loss": 2.1623, + "step": 10088 + }, + { + "epoch": 2.05, + "learning_rate": 9.059561745109163e-06, + "loss": 2.1252, + "step": 10089 + }, + { + "epoch": 2.05, + "learning_rate": 9.057842078433709e-06, + "loss": 2.0423, + "step": 10090 + }, + { + "epoch": 2.05, + "learning_rate": 9.056122439869325e-06, + "loss": 2.1027, + "step": 10091 + }, + { + "epoch": 2.05, + "learning_rate": 9.054402829467322e-06, + "loss": 1.9589, + "step": 10092 + }, + { + "epoch": 2.05, + "learning_rate": 9.052683247279013e-06, + "loss": 2.168, + "step": 10093 + }, + { + "epoch": 2.05, + "learning_rate": 9.050963693355703e-06, + "loss": 2.0463, + "step": 10094 + }, + { + "epoch": 2.05, + "learning_rate": 9.049244167748694e-06, + "loss": 2.1068, + "step": 10095 + }, + { + "epoch": 2.05, + "learning_rate": 9.047524670509294e-06, + "loss": 2.254, + "step": 10096 + }, + { + "epoch": 2.05, + "learning_rate": 9.045805201688805e-06, + "loss": 2.1549, + "step": 10097 + }, + { + "epoch": 2.05, + "learning_rate": 9.044085761338535e-06, + "loss": 2.1377, + "step": 10098 + }, + { + "epoch": 2.05, + "learning_rate": 9.042366349509787e-06, + "loss": 2.1014, + "step": 10099 + }, + { + "epoch": 2.05, + "learning_rate": 9.040646966253856e-06, + "loss": 2.1268, + "step": 10100 + }, + { + "epoch": 2.05, + "learning_rate": 9.038927611622052e-06, + "loss": 2.1386, + "step": 10101 + }, + { + "epoch": 2.05, + "learning_rate": 9.037208285665667e-06, + "loss": 2.1506, + "step": 10102 + }, + { + "epoch": 2.05, + "learning_rate": 9.035488988436008e-06, + "loss": 2.1555, + "step": 10103 + }, + { + "epoch": 2.05, + "learning_rate": 9.033769719984366e-06, + "loss": 2.0877, + "step": 10104 + }, + { + "epoch": 2.05, + "learning_rate": 9.032050480362045e-06, + "loss": 2.1045, + "step": 10105 + }, + { + "epoch": 2.05, + "learning_rate": 9.03033126962034e-06, + "loss": 2.1574, + "step": 10106 + }, + { + "epoch": 2.05, + "learning_rate": 9.028612087810545e-06, + "loss": 2.1659, + "step": 10107 + }, + { + "epoch": 2.05, + "learning_rate": 9.026892934983954e-06, + "loss": 2.047, + "step": 10108 + }, + { + "epoch": 2.05, + "learning_rate": 9.025173811191867e-06, + "loss": 2.0932, + "step": 10109 + }, + { + "epoch": 2.05, + "learning_rate": 9.023454716485573e-06, + "loss": 2.2138, + "step": 10110 + }, + { + "epoch": 2.05, + "learning_rate": 9.021735650916366e-06, + "loss": 2.192, + "step": 10111 + }, + { + "epoch": 2.05, + "learning_rate": 9.020016614535531e-06, + "loss": 2.1009, + "step": 10112 + }, + { + "epoch": 2.05, + "learning_rate": 9.01829760739437e-06, + "loss": 2.1235, + "step": 10113 + }, + { + "epoch": 2.05, + "learning_rate": 9.016578629544166e-06, + "loss": 2.1462, + "step": 10114 + }, + { + "epoch": 2.05, + "learning_rate": 9.014859681036211e-06, + "loss": 2.1227, + "step": 10115 + }, + { + "epoch": 2.05, + "learning_rate": 9.01314076192179e-06, + "loss": 2.1058, + "step": 10116 + }, + { + "epoch": 2.05, + "learning_rate": 9.011421872252188e-06, + "loss": 2.0725, + "step": 10117 + }, + { + "epoch": 2.05, + "learning_rate": 9.0097030120787e-06, + "loss": 2.1403, + "step": 10118 + }, + { + "epoch": 2.05, + "learning_rate": 9.007984181452605e-06, + "loss": 2.0994, + "step": 10119 + }, + { + "epoch": 2.05, + "learning_rate": 9.00626538042519e-06, + "loss": 2.1657, + "step": 10120 + }, + { + "epoch": 2.05, + "learning_rate": 9.004546609047735e-06, + "loss": 2.0811, + "step": 10121 + }, + { + "epoch": 2.05, + "learning_rate": 9.002827867371526e-06, + "loss": 2.1535, + "step": 10122 + }, + { + "epoch": 2.05, + "learning_rate": 9.001109155447846e-06, + "loss": 2.0915, + "step": 10123 + }, + { + "epoch": 2.06, + "learning_rate": 8.999390473327974e-06, + "loss": 2.1275, + "step": 10124 + }, + { + "epoch": 2.06, + "learning_rate": 8.99767182106319e-06, + "loss": 2.1672, + "step": 10125 + }, + { + "epoch": 2.06, + "learning_rate": 8.995953198704776e-06, + "loss": 2.096, + "step": 10126 + }, + { + "epoch": 2.06, + "learning_rate": 8.994234606304006e-06, + "loss": 2.1003, + "step": 10127 + }, + { + "epoch": 2.06, + "learning_rate": 8.992516043912158e-06, + "loss": 2.1037, + "step": 10128 + }, + { + "epoch": 2.06, + "learning_rate": 8.990797511580514e-06, + "loss": 2.1223, + "step": 10129 + }, + { + "epoch": 2.06, + "learning_rate": 8.989079009360347e-06, + "loss": 2.1325, + "step": 10130 + }, + { + "epoch": 2.06, + "learning_rate": 8.98736053730293e-06, + "loss": 2.0873, + "step": 10131 + }, + { + "epoch": 2.06, + "learning_rate": 8.985642095459533e-06, + "loss": 2.1008, + "step": 10132 + }, + { + "epoch": 2.06, + "learning_rate": 8.983923683881437e-06, + "loss": 2.0605, + "step": 10133 + }, + { + "epoch": 2.06, + "learning_rate": 8.982205302619912e-06, + "loss": 2.0881, + "step": 10134 + }, + { + "epoch": 2.06, + "learning_rate": 8.980486951726228e-06, + "loss": 2.1235, + "step": 10135 + }, + { + "epoch": 2.06, + "learning_rate": 8.97876863125165e-06, + "loss": 2.0286, + "step": 10136 + }, + { + "epoch": 2.06, + "learning_rate": 8.977050341247457e-06, + "loss": 2.1319, + "step": 10137 + }, + { + "epoch": 2.06, + "learning_rate": 8.975332081764913e-06, + "loss": 2.0718, + "step": 10138 + }, + { + "epoch": 2.06, + "learning_rate": 8.973613852855286e-06, + "loss": 2.2022, + "step": 10139 + }, + { + "epoch": 2.06, + "learning_rate": 8.971895654569842e-06, + "loss": 2.0826, + "step": 10140 + }, + { + "epoch": 2.06, + "learning_rate": 8.970177486959842e-06, + "loss": 2.137, + "step": 10141 + }, + { + "epoch": 2.06, + "learning_rate": 8.968459350076562e-06, + "loss": 2.0769, + "step": 10142 + }, + { + "epoch": 2.06, + "learning_rate": 8.966741243971258e-06, + "loss": 2.1219, + "step": 10143 + }, + { + "epoch": 2.06, + "learning_rate": 8.965023168695195e-06, + "loss": 2.1589, + "step": 10144 + }, + { + "epoch": 2.06, + "learning_rate": 8.96330512429963e-06, + "loss": 2.0702, + "step": 10145 + }, + { + "epoch": 2.06, + "learning_rate": 8.961587110835832e-06, + "loss": 2.1715, + "step": 10146 + }, + { + "epoch": 2.06, + "learning_rate": 8.959869128355057e-06, + "loss": 2.1446, + "step": 10147 + }, + { + "epoch": 2.06, + "learning_rate": 8.958151176908566e-06, + "loss": 2.2039, + "step": 10148 + }, + { + "epoch": 2.06, + "learning_rate": 8.956433256547618e-06, + "loss": 2.103, + "step": 10149 + }, + { + "epoch": 2.06, + "learning_rate": 8.954715367323468e-06, + "loss": 2.1112, + "step": 10150 + }, + { + "epoch": 2.06, + "learning_rate": 8.952997509287371e-06, + "loss": 1.9617, + "step": 10151 + }, + { + "epoch": 2.06, + "learning_rate": 8.951279682490586e-06, + "loss": 2.1345, + "step": 10152 + }, + { + "epoch": 2.06, + "learning_rate": 8.949561886984367e-06, + "loss": 2.1035, + "step": 10153 + }, + { + "epoch": 2.06, + "learning_rate": 8.94784412281997e-06, + "loss": 2.0581, + "step": 10154 + }, + { + "epoch": 2.06, + "learning_rate": 8.946126390048643e-06, + "loss": 2.0377, + "step": 10155 + }, + { + "epoch": 2.06, + "learning_rate": 8.944408688721636e-06, + "loss": 2.1671, + "step": 10156 + }, + { + "epoch": 2.06, + "learning_rate": 8.942691018890209e-06, + "loss": 2.1756, + "step": 10157 + }, + { + "epoch": 2.06, + "learning_rate": 8.940973380605606e-06, + "loss": 2.1198, + "step": 10158 + }, + { + "epoch": 2.06, + "learning_rate": 8.939255773919077e-06, + "loss": 2.0571, + "step": 10159 + }, + { + "epoch": 2.06, + "learning_rate": 8.937538198881868e-06, + "loss": 2.0862, + "step": 10160 + }, + { + "epoch": 2.06, + "learning_rate": 8.935820655545227e-06, + "loss": 2.0421, + "step": 10161 + }, + { + "epoch": 2.06, + "learning_rate": 8.934103143960403e-06, + "loss": 2.1115, + "step": 10162 + }, + { + "epoch": 2.06, + "learning_rate": 8.932385664178639e-06, + "loss": 2.0438, + "step": 10163 + }, + { + "epoch": 2.06, + "learning_rate": 8.930668216251182e-06, + "loss": 2.1, + "step": 10164 + }, + { + "epoch": 2.06, + "learning_rate": 8.92895080022927e-06, + "loss": 2.1726, + "step": 10165 + }, + { + "epoch": 2.06, + "learning_rate": 8.927233416164146e-06, + "loss": 2.1145, + "step": 10166 + }, + { + "epoch": 2.06, + "learning_rate": 8.925516064107058e-06, + "loss": 2.1044, + "step": 10167 + }, + { + "epoch": 2.06, + "learning_rate": 8.92379874410924e-06, + "loss": 2.0883, + "step": 10168 + }, + { + "epoch": 2.06, + "learning_rate": 8.922081456221936e-06, + "loss": 2.1087, + "step": 10169 + }, + { + "epoch": 2.06, + "learning_rate": 8.92036420049638e-06, + "loss": 2.1593, + "step": 10170 + }, + { + "epoch": 2.06, + "learning_rate": 8.91864697698381e-06, + "loss": 2.1388, + "step": 10171 + }, + { + "epoch": 2.06, + "learning_rate": 8.916929785735467e-06, + "loss": 2.1006, + "step": 10172 + }, + { + "epoch": 2.06, + "learning_rate": 8.915212626802585e-06, + "loss": 2.1805, + "step": 10173 + }, + { + "epoch": 2.07, + "learning_rate": 8.913495500236398e-06, + "loss": 2.145, + "step": 10174 + }, + { + "epoch": 2.07, + "learning_rate": 8.911778406088139e-06, + "loss": 2.0651, + "step": 10175 + }, + { + "epoch": 2.07, + "learning_rate": 8.910061344409037e-06, + "loss": 2.1303, + "step": 10176 + }, + { + "epoch": 2.07, + "learning_rate": 8.908344315250333e-06, + "loss": 2.1519, + "step": 10177 + }, + { + "epoch": 2.07, + "learning_rate": 8.906627318663252e-06, + "loss": 2.0451, + "step": 10178 + }, + { + "epoch": 2.07, + "learning_rate": 8.904910354699027e-06, + "loss": 2.0953, + "step": 10179 + }, + { + "epoch": 2.07, + "learning_rate": 8.903193423408882e-06, + "loss": 2.0459, + "step": 10180 + }, + { + "epoch": 2.07, + "learning_rate": 8.901476524844046e-06, + "loss": 2.1152, + "step": 10181 + }, + { + "epoch": 2.07, + "learning_rate": 8.89975965905575e-06, + "loss": 2.0722, + "step": 10182 + }, + { + "epoch": 2.07, + "learning_rate": 8.898042826095218e-06, + "loss": 2.1098, + "step": 10183 + }, + { + "epoch": 2.07, + "learning_rate": 8.896326026013675e-06, + "loss": 2.1457, + "step": 10184 + }, + { + "epoch": 2.07, + "learning_rate": 8.894609258862341e-06, + "loss": 2.1858, + "step": 10185 + }, + { + "epoch": 2.07, + "learning_rate": 8.892892524692444e-06, + "loss": 2.1078, + "step": 10186 + }, + { + "epoch": 2.07, + "learning_rate": 8.891175823555205e-06, + "loss": 2.1241, + "step": 10187 + }, + { + "epoch": 2.07, + "learning_rate": 8.889459155501845e-06, + "loss": 2.0531, + "step": 10188 + }, + { + "epoch": 2.07, + "learning_rate": 8.887742520583583e-06, + "loss": 2.0708, + "step": 10189 + }, + { + "epoch": 2.07, + "learning_rate": 8.88602591885164e-06, + "loss": 2.1548, + "step": 10190 + }, + { + "epoch": 2.07, + "learning_rate": 8.88430935035723e-06, + "loss": 2.1664, + "step": 10191 + }, + { + "epoch": 2.07, + "learning_rate": 8.882592815151578e-06, + "loss": 2.0766, + "step": 10192 + }, + { + "epoch": 2.07, + "learning_rate": 8.880876313285889e-06, + "loss": 2.1689, + "step": 10193 + }, + { + "epoch": 2.07, + "learning_rate": 8.879159844811387e-06, + "loss": 2.1681, + "step": 10194 + }, + { + "epoch": 2.07, + "learning_rate": 8.877443409779285e-06, + "loss": 2.1313, + "step": 10195 + }, + { + "epoch": 2.07, + "learning_rate": 8.875727008240788e-06, + "loss": 2.121, + "step": 10196 + }, + { + "epoch": 2.07, + "learning_rate": 8.87401064024712e-06, + "loss": 2.1561, + "step": 10197 + }, + { + "epoch": 2.07, + "learning_rate": 8.872294305849486e-06, + "loss": 2.1506, + "step": 10198 + }, + { + "epoch": 2.07, + "learning_rate": 8.870578005099097e-06, + "loss": 2.1442, + "step": 10199 + }, + { + "epoch": 2.07, + "learning_rate": 8.86886173804716e-06, + "loss": 2.0522, + "step": 10200 + }, + { + "epoch": 2.07, + "learning_rate": 8.867145504744882e-06, + "loss": 2.1122, + "step": 10201 + }, + { + "epoch": 2.07, + "learning_rate": 8.865429305243477e-06, + "loss": 2.0958, + "step": 10202 + }, + { + "epoch": 2.07, + "learning_rate": 8.863713139594147e-06, + "loss": 2.2106, + "step": 10203 + }, + { + "epoch": 2.07, + "learning_rate": 8.861997007848097e-06, + "loss": 2.192, + "step": 10204 + }, + { + "epoch": 2.07, + "learning_rate": 8.860280910056528e-06, + "loss": 2.1383, + "step": 10205 + }, + { + "epoch": 2.07, + "learning_rate": 8.858564846270646e-06, + "loss": 2.1238, + "step": 10206 + }, + { + "epoch": 2.07, + "learning_rate": 8.856848816541656e-06, + "loss": 2.1005, + "step": 10207 + }, + { + "epoch": 2.07, + "learning_rate": 8.855132820920756e-06, + "loss": 2.1131, + "step": 10208 + }, + { + "epoch": 2.07, + "learning_rate": 8.853416859459145e-06, + "loss": 2.1136, + "step": 10209 + }, + { + "epoch": 2.07, + "learning_rate": 8.851700932208024e-06, + "loss": 2.1558, + "step": 10210 + }, + { + "epoch": 2.07, + "learning_rate": 8.849985039218588e-06, + "loss": 2.1276, + "step": 10211 + }, + { + "epoch": 2.07, + "learning_rate": 8.848269180542038e-06, + "loss": 2.1083, + "step": 10212 + }, + { + "epoch": 2.07, + "learning_rate": 8.846553356229566e-06, + "loss": 2.0719, + "step": 10213 + }, + { + "epoch": 2.07, + "learning_rate": 8.844837566332371e-06, + "loss": 2.0971, + "step": 10214 + }, + { + "epoch": 2.07, + "learning_rate": 8.843121810901645e-06, + "loss": 2.1025, + "step": 10215 + }, + { + "epoch": 2.07, + "learning_rate": 8.841406089988578e-06, + "loss": 2.1563, + "step": 10216 + }, + { + "epoch": 2.07, + "learning_rate": 8.839690403644364e-06, + "loss": 2.1551, + "step": 10217 + }, + { + "epoch": 2.07, + "learning_rate": 8.837974751920197e-06, + "loss": 2.126, + "step": 10218 + }, + { + "epoch": 2.07, + "learning_rate": 8.836259134867264e-06, + "loss": 2.0985, + "step": 10219 + }, + { + "epoch": 2.07, + "learning_rate": 8.834543552536752e-06, + "loss": 2.0998, + "step": 10220 + }, + { + "epoch": 2.07, + "learning_rate": 8.832828004979845e-06, + "loss": 2.1468, + "step": 10221 + }, + { + "epoch": 2.07, + "learning_rate": 8.831112492247741e-06, + "loss": 2.1135, + "step": 10222 + }, + { + "epoch": 2.08, + "learning_rate": 8.829397014391619e-06, + "loss": 2.1349, + "step": 10223 + }, + { + "epoch": 2.08, + "learning_rate": 8.827681571462663e-06, + "loss": 2.0566, + "step": 10224 + }, + { + "epoch": 2.08, + "learning_rate": 8.825966163512059e-06, + "loss": 2.0845, + "step": 10225 + }, + { + "epoch": 2.08, + "learning_rate": 8.824250790590982e-06, + "loss": 2.0455, + "step": 10226 + }, + { + "epoch": 2.08, + "learning_rate": 8.822535452750625e-06, + "loss": 2.0708, + "step": 10227 + }, + { + "epoch": 2.08, + "learning_rate": 8.820820150042161e-06, + "loss": 2.1438, + "step": 10228 + }, + { + "epoch": 2.08, + "learning_rate": 8.81910488251677e-06, + "loss": 2.1089, + "step": 10229 + }, + { + "epoch": 2.08, + "learning_rate": 8.81738965022563e-06, + "loss": 2.0954, + "step": 10230 + }, + { + "epoch": 2.08, + "learning_rate": 8.815674453219922e-06, + "loss": 2.0533, + "step": 10231 + }, + { + "epoch": 2.08, + "learning_rate": 8.81395929155082e-06, + "loss": 2.0942, + "step": 10232 + }, + { + "epoch": 2.08, + "learning_rate": 8.812244165269496e-06, + "loss": 2.113, + "step": 10233 + }, + { + "epoch": 2.08, + "learning_rate": 8.81052907442713e-06, + "loss": 2.0852, + "step": 10234 + }, + { + "epoch": 2.08, + "learning_rate": 8.808814019074888e-06, + "loss": 2.1558, + "step": 10235 + }, + { + "epoch": 2.08, + "learning_rate": 8.807098999263948e-06, + "loss": 1.9884, + "step": 10236 + }, + { + "epoch": 2.08, + "learning_rate": 8.805384015045479e-06, + "loss": 2.1573, + "step": 10237 + }, + { + "epoch": 2.08, + "learning_rate": 8.80366906647065e-06, + "loss": 2.0678, + "step": 10238 + }, + { + "epoch": 2.08, + "learning_rate": 8.80195415359063e-06, + "loss": 2.1485, + "step": 10239 + }, + { + "epoch": 2.08, + "learning_rate": 8.800239276456586e-06, + "loss": 2.1559, + "step": 10240 + }, + { + "epoch": 2.08, + "learning_rate": 8.798524435119684e-06, + "loss": 2.1431, + "step": 10241 + }, + { + "epoch": 2.08, + "learning_rate": 8.796809629631096e-06, + "loss": 2.0805, + "step": 10242 + }, + { + "epoch": 2.08, + "learning_rate": 8.79509486004198e-06, + "loss": 2.079, + "step": 10243 + }, + { + "epoch": 2.08, + "learning_rate": 8.793380126403499e-06, + "loss": 2.0818, + "step": 10244 + }, + { + "epoch": 2.08, + "learning_rate": 8.791665428766814e-06, + "loss": 2.0605, + "step": 10245 + }, + { + "epoch": 2.08, + "learning_rate": 8.789950767183096e-06, + "loss": 2.1561, + "step": 10246 + }, + { + "epoch": 2.08, + "learning_rate": 8.788236141703496e-06, + "loss": 1.9823, + "step": 10247 + }, + { + "epoch": 2.08, + "learning_rate": 8.786521552379178e-06, + "loss": 2.0811, + "step": 10248 + }, + { + "epoch": 2.08, + "learning_rate": 8.784806999261294e-06, + "loss": 2.181, + "step": 10249 + }, + { + "epoch": 2.08, + "learning_rate": 8.783092482401003e-06, + "loss": 2.1144, + "step": 10250 + }, + { + "epoch": 2.08, + "learning_rate": 8.781378001849468e-06, + "loss": 2.1581, + "step": 10251 + }, + { + "epoch": 2.08, + "learning_rate": 8.779663557657838e-06, + "loss": 2.1487, + "step": 10252 + }, + { + "epoch": 2.08, + "learning_rate": 8.777949149877264e-06, + "loss": 2.1841, + "step": 10253 + }, + { + "epoch": 2.08, + "learning_rate": 8.776234778558904e-06, + "loss": 2.01, + "step": 10254 + }, + { + "epoch": 2.08, + "learning_rate": 8.774520443753905e-06, + "loss": 2.148, + "step": 10255 + }, + { + "epoch": 2.08, + "learning_rate": 8.772806145513423e-06, + "loss": 2.1689, + "step": 10256 + }, + { + "epoch": 2.08, + "learning_rate": 8.771091883888601e-06, + "loss": 2.0673, + "step": 10257 + }, + { + "epoch": 2.08, + "learning_rate": 8.769377658930594e-06, + "loss": 2.1619, + "step": 10258 + }, + { + "epoch": 2.08, + "learning_rate": 8.767663470690543e-06, + "loss": 2.096, + "step": 10259 + }, + { + "epoch": 2.08, + "learning_rate": 8.765949319219595e-06, + "loss": 2.0882, + "step": 10260 + }, + { + "epoch": 2.08, + "learning_rate": 8.764235204568898e-06, + "loss": 2.0514, + "step": 10261 + }, + { + "epoch": 2.08, + "learning_rate": 8.762521126789596e-06, + "loss": 2.1085, + "step": 10262 + }, + { + "epoch": 2.08, + "learning_rate": 8.760807085932829e-06, + "loss": 2.0159, + "step": 10263 + }, + { + "epoch": 2.08, + "learning_rate": 8.759093082049742e-06, + "loss": 2.0446, + "step": 10264 + }, + { + "epoch": 2.08, + "learning_rate": 8.757379115191468e-06, + "loss": 2.0836, + "step": 10265 + }, + { + "epoch": 2.08, + "learning_rate": 8.755665185409157e-06, + "loss": 2.1583, + "step": 10266 + }, + { + "epoch": 2.08, + "learning_rate": 8.753951292753942e-06, + "loss": 2.0173, + "step": 10267 + }, + { + "epoch": 2.08, + "learning_rate": 8.75223743727696e-06, + "loss": 2.0878, + "step": 10268 + }, + { + "epoch": 2.08, + "learning_rate": 8.750523619029347e-06, + "loss": 2.0836, + "step": 10269 + }, + { + "epoch": 2.08, + "learning_rate": 8.748809838062234e-06, + "loss": 2.1127, + "step": 10270 + }, + { + "epoch": 2.08, + "learning_rate": 8.747096094426766e-06, + "loss": 2.1728, + "step": 10271 + }, + { + "epoch": 2.09, + "learning_rate": 8.745382388174068e-06, + "loss": 2.1032, + "step": 10272 + }, + { + "epoch": 2.09, + "learning_rate": 8.743668719355273e-06, + "loss": 2.1726, + "step": 10273 + }, + { + "epoch": 2.09, + "learning_rate": 8.74195508802151e-06, + "loss": 2.1157, + "step": 10274 + }, + { + "epoch": 2.09, + "learning_rate": 8.74024149422391e-06, + "loss": 2.062, + "step": 10275 + }, + { + "epoch": 2.09, + "learning_rate": 8.738527938013603e-06, + "loss": 2.0996, + "step": 10276 + }, + { + "epoch": 2.09, + "learning_rate": 8.736814419441712e-06, + "loss": 2.1501, + "step": 10277 + }, + { + "epoch": 2.09, + "learning_rate": 8.735100938559369e-06, + "loss": 2.1647, + "step": 10278 + }, + { + "epoch": 2.09, + "learning_rate": 8.733387495417695e-06, + "loss": 2.0477, + "step": 10279 + }, + { + "epoch": 2.09, + "learning_rate": 8.731674090067814e-06, + "loss": 2.1425, + "step": 10280 + }, + { + "epoch": 2.09, + "learning_rate": 8.729960722560848e-06, + "loss": 2.1279, + "step": 10281 + }, + { + "epoch": 2.09, + "learning_rate": 8.728247392947921e-06, + "loss": 2.0998, + "step": 10282 + }, + { + "epoch": 2.09, + "learning_rate": 8.726534101280153e-06, + "loss": 2.1803, + "step": 10283 + }, + { + "epoch": 2.09, + "learning_rate": 8.724820847608663e-06, + "loss": 2.1071, + "step": 10284 + }, + { + "epoch": 2.09, + "learning_rate": 8.723107631984564e-06, + "loss": 2.1421, + "step": 10285 + }, + { + "epoch": 2.09, + "learning_rate": 8.721394454458983e-06, + "loss": 2.1142, + "step": 10286 + }, + { + "epoch": 2.09, + "learning_rate": 8.71968131508303e-06, + "loss": 2.1198, + "step": 10287 + }, + { + "epoch": 2.09, + "learning_rate": 8.71796821390782e-06, + "loss": 2.1272, + "step": 10288 + }, + { + "epoch": 2.09, + "learning_rate": 8.716255150984467e-06, + "loss": 2.0808, + "step": 10289 + }, + { + "epoch": 2.09, + "learning_rate": 8.71454212636408e-06, + "loss": 2.1426, + "step": 10290 + }, + { + "epoch": 2.09, + "learning_rate": 8.712829140097779e-06, + "loss": 2.1033, + "step": 10291 + }, + { + "epoch": 2.09, + "learning_rate": 8.711116192236668e-06, + "loss": 2.122, + "step": 10292 + }, + { + "epoch": 2.09, + "learning_rate": 8.709403282831856e-06, + "loss": 2.1342, + "step": 10293 + }, + { + "epoch": 2.09, + "learning_rate": 8.707690411934452e-06, + "loss": 2.1211, + "step": 10294 + }, + { + "epoch": 2.09, + "learning_rate": 8.705977579595562e-06, + "loss": 2.1038, + "step": 10295 + }, + { + "epoch": 2.09, + "learning_rate": 8.704264785866295e-06, + "loss": 2.1006, + "step": 10296 + }, + { + "epoch": 2.09, + "learning_rate": 8.702552030797753e-06, + "loss": 2.1189, + "step": 10297 + }, + { + "epoch": 2.09, + "learning_rate": 8.700839314441036e-06, + "loss": 2.0742, + "step": 10298 + }, + { + "epoch": 2.09, + "learning_rate": 8.699126636847253e-06, + "loss": 2.175, + "step": 10299 + }, + { + "epoch": 2.09, + "learning_rate": 8.697413998067498e-06, + "loss": 2.0265, + "step": 10300 + }, + { + "epoch": 2.09, + "learning_rate": 8.695701398152875e-06, + "loss": 2.191, + "step": 10301 + }, + { + "epoch": 2.09, + "learning_rate": 8.693988837154484e-06, + "loss": 2.0923, + "step": 10302 + }, + { + "epoch": 2.09, + "learning_rate": 8.69227631512342e-06, + "loss": 2.0876, + "step": 10303 + }, + { + "epoch": 2.09, + "learning_rate": 8.69056383211078e-06, + "loss": 2.1212, + "step": 10304 + }, + { + "epoch": 2.09, + "learning_rate": 8.688851388167654e-06, + "loss": 2.0971, + "step": 10305 + }, + { + "epoch": 2.09, + "learning_rate": 8.687138983345147e-06, + "loss": 2.1379, + "step": 10306 + }, + { + "epoch": 2.09, + "learning_rate": 8.685426617694344e-06, + "loss": 2.1705, + "step": 10307 + }, + { + "epoch": 2.09, + "learning_rate": 8.68371429126634e-06, + "loss": 2.1453, + "step": 10308 + }, + { + "epoch": 2.09, + "learning_rate": 8.682002004112223e-06, + "loss": 2.073, + "step": 10309 + }, + { + "epoch": 2.09, + "learning_rate": 8.68028975628308e-06, + "loss": 2.0681, + "step": 10310 + }, + { + "epoch": 2.09, + "learning_rate": 8.678577547830006e-06, + "loss": 2.0971, + "step": 10311 + }, + { + "epoch": 2.09, + "learning_rate": 8.676865378804085e-06, + "loss": 2.113, + "step": 10312 + }, + { + "epoch": 2.09, + "learning_rate": 8.675153249256403e-06, + "loss": 2.1257, + "step": 10313 + }, + { + "epoch": 2.09, + "learning_rate": 8.673441159238041e-06, + "loss": 2.0489, + "step": 10314 + }, + { + "epoch": 2.09, + "learning_rate": 8.671729108800086e-06, + "loss": 2.0995, + "step": 10315 + }, + { + "epoch": 2.09, + "learning_rate": 8.670017097993621e-06, + "loss": 2.0666, + "step": 10316 + }, + { + "epoch": 2.09, + "learning_rate": 8.668305126869726e-06, + "loss": 2.0365, + "step": 10317 + }, + { + "epoch": 2.09, + "learning_rate": 8.666593195479478e-06, + "loss": 2.1135, + "step": 10318 + }, + { + "epoch": 2.09, + "learning_rate": 8.66488130387396e-06, + "loss": 2.0223, + "step": 10319 + }, + { + "epoch": 2.09, + "learning_rate": 8.66316945210425e-06, + "loss": 2.0646, + "step": 10320 + }, + { + "epoch": 2.09, + "learning_rate": 8.661457640221423e-06, + "loss": 2.1637, + "step": 10321 + }, + { + "epoch": 2.1, + "learning_rate": 8.65974586827655e-06, + "loss": 2.1105, + "step": 10322 + }, + { + "epoch": 2.1, + "learning_rate": 8.65803413632071e-06, + "loss": 2.2287, + "step": 10323 + }, + { + "epoch": 2.1, + "learning_rate": 8.656322444404974e-06, + "loss": 2.136, + "step": 10324 + }, + { + "epoch": 2.1, + "learning_rate": 8.654610792580414e-06, + "loss": 1.9859, + "step": 10325 + }, + { + "epoch": 2.1, + "learning_rate": 8.652899180898101e-06, + "loss": 2.187, + "step": 10326 + }, + { + "epoch": 2.1, + "learning_rate": 8.651187609409105e-06, + "loss": 2.1563, + "step": 10327 + }, + { + "epoch": 2.1, + "learning_rate": 8.649476078164492e-06, + "loss": 2.1092, + "step": 10328 + }, + { + "epoch": 2.1, + "learning_rate": 8.647764587215323e-06, + "loss": 2.1183, + "step": 10329 + }, + { + "epoch": 2.1, + "learning_rate": 8.646053136612677e-06, + "loss": 2.1278, + "step": 10330 + }, + { + "epoch": 2.1, + "learning_rate": 8.644341726407609e-06, + "loss": 2.1362, + "step": 10331 + }, + { + "epoch": 2.1, + "learning_rate": 8.642630356651186e-06, + "loss": 2.078, + "step": 10332 + }, + { + "epoch": 2.1, + "learning_rate": 8.640919027394468e-06, + "loss": 2.0568, + "step": 10333 + }, + { + "epoch": 2.1, + "learning_rate": 8.639207738688512e-06, + "loss": 2.1165, + "step": 10334 + }, + { + "epoch": 2.1, + "learning_rate": 8.637496490584385e-06, + "loss": 2.1927, + "step": 10335 + }, + { + "epoch": 2.1, + "learning_rate": 8.635785283133143e-06, + "loss": 2.167, + "step": 10336 + }, + { + "epoch": 2.1, + "learning_rate": 8.63407411638584e-06, + "loss": 2.153, + "step": 10337 + }, + { + "epoch": 2.1, + "learning_rate": 8.632362990393534e-06, + "loss": 2.1656, + "step": 10338 + }, + { + "epoch": 2.1, + "learning_rate": 8.63065190520728e-06, + "loss": 2.0907, + "step": 10339 + }, + { + "epoch": 2.1, + "learning_rate": 8.628940860878134e-06, + "loss": 2.1575, + "step": 10340 + }, + { + "epoch": 2.1, + "learning_rate": 8.627229857457143e-06, + "loss": 2.1664, + "step": 10341 + }, + { + "epoch": 2.1, + "learning_rate": 8.625518894995361e-06, + "loss": 2.1513, + "step": 10342 + }, + { + "epoch": 2.1, + "learning_rate": 8.623807973543838e-06, + "loss": 2.1216, + "step": 10343 + }, + { + "epoch": 2.1, + "learning_rate": 8.62209709315362e-06, + "loss": 2.1031, + "step": 10344 + }, + { + "epoch": 2.1, + "learning_rate": 8.620386253875759e-06, + "loss": 2.1324, + "step": 10345 + }, + { + "epoch": 2.1, + "learning_rate": 8.618675455761296e-06, + "loss": 2.1634, + "step": 10346 + }, + { + "epoch": 2.1, + "learning_rate": 8.616964698861281e-06, + "loss": 2.0749, + "step": 10347 + }, + { + "epoch": 2.1, + "learning_rate": 8.615253983226755e-06, + "loss": 2.0824, + "step": 10348 + }, + { + "epoch": 2.1, + "learning_rate": 8.613543308908756e-06, + "loss": 1.9934, + "step": 10349 + }, + { + "epoch": 2.1, + "learning_rate": 8.611832675958333e-06, + "loss": 2.0479, + "step": 10350 + }, + { + "epoch": 2.1, + "learning_rate": 8.610122084426524e-06, + "loss": 2.1222, + "step": 10351 + }, + { + "epoch": 2.1, + "learning_rate": 8.608411534364367e-06, + "loss": 2.1579, + "step": 10352 + }, + { + "epoch": 2.1, + "learning_rate": 8.606701025822898e-06, + "loss": 2.1397, + "step": 10353 + }, + { + "epoch": 2.1, + "learning_rate": 8.60499055885315e-06, + "loss": 2.1785, + "step": 10354 + }, + { + "epoch": 2.1, + "learning_rate": 8.603280133506168e-06, + "loss": 2.1522, + "step": 10355 + }, + { + "epoch": 2.1, + "learning_rate": 8.601569749832978e-06, + "loss": 2.1309, + "step": 10356 + }, + { + "epoch": 2.1, + "learning_rate": 8.599859407884618e-06, + "loss": 2.0511, + "step": 10357 + }, + { + "epoch": 2.1, + "learning_rate": 8.59814910771211e-06, + "loss": 2.2051, + "step": 10358 + }, + { + "epoch": 2.1, + "learning_rate": 8.596438849366493e-06, + "loss": 2.2399, + "step": 10359 + }, + { + "epoch": 2.1, + "learning_rate": 8.594728632898794e-06, + "loss": 2.1015, + "step": 10360 + }, + { + "epoch": 2.1, + "learning_rate": 8.59301845836004e-06, + "loss": 2.1189, + "step": 10361 + }, + { + "epoch": 2.1, + "learning_rate": 8.591308325801252e-06, + "loss": 2.0898, + "step": 10362 + }, + { + "epoch": 2.1, + "learning_rate": 8.589598235273465e-06, + "loss": 2.1288, + "step": 10363 + }, + { + "epoch": 2.1, + "learning_rate": 8.587888186827694e-06, + "loss": 2.1037, + "step": 10364 + }, + { + "epoch": 2.1, + "learning_rate": 8.586178180514968e-06, + "loss": 2.1759, + "step": 10365 + }, + { + "epoch": 2.1, + "learning_rate": 8.584468216386303e-06, + "loss": 2.0512, + "step": 10366 + }, + { + "epoch": 2.1, + "learning_rate": 8.582758294492724e-06, + "loss": 2.1256, + "step": 10367 + }, + { + "epoch": 2.1, + "learning_rate": 8.581048414885246e-06, + "loss": 2.1739, + "step": 10368 + }, + { + "epoch": 2.1, + "learning_rate": 8.579338577614886e-06, + "loss": 2.0785, + "step": 10369 + }, + { + "epoch": 2.1, + "learning_rate": 8.577628782732663e-06, + "loss": 2.1077, + "step": 10370 + }, + { + "epoch": 2.11, + "learning_rate": 8.575919030289592e-06, + "loss": 2.0778, + "step": 10371 + }, + { + "epoch": 2.11, + "learning_rate": 8.574209320336686e-06, + "loss": 2.1057, + "step": 10372 + }, + { + "epoch": 2.11, + "learning_rate": 8.572499652924958e-06, + "loss": 2.0958, + "step": 10373 + }, + { + "epoch": 2.11, + "learning_rate": 8.570790028105414e-06, + "loss": 2.0454, + "step": 10374 + }, + { + "epoch": 2.11, + "learning_rate": 8.569080445929071e-06, + "loss": 2.0246, + "step": 10375 + }, + { + "epoch": 2.11, + "learning_rate": 8.567370906446937e-06, + "loss": 2.1441, + "step": 10376 + }, + { + "epoch": 2.11, + "learning_rate": 8.565661409710016e-06, + "loss": 2.1303, + "step": 10377 + }, + { + "epoch": 2.11, + "learning_rate": 8.563951955769315e-06, + "loss": 2.0486, + "step": 10378 + }, + { + "epoch": 2.11, + "learning_rate": 8.562242544675835e-06, + "loss": 2.0512, + "step": 10379 + }, + { + "epoch": 2.11, + "learning_rate": 8.560533176480588e-06, + "loss": 2.116, + "step": 10380 + }, + { + "epoch": 2.11, + "learning_rate": 8.558823851234573e-06, + "loss": 2.1395, + "step": 10381 + }, + { + "epoch": 2.11, + "learning_rate": 8.557114568988787e-06, + "loss": 2.089, + "step": 10382 + }, + { + "epoch": 2.11, + "learning_rate": 8.555405329794234e-06, + "loss": 2.0987, + "step": 10383 + }, + { + "epoch": 2.11, + "learning_rate": 8.55369613370191e-06, + "loss": 2.1614, + "step": 10384 + }, + { + "epoch": 2.11, + "learning_rate": 8.551986980762814e-06, + "loss": 2.1006, + "step": 10385 + }, + { + "epoch": 2.11, + "learning_rate": 8.55027787102794e-06, + "loss": 2.1357, + "step": 10386 + }, + { + "epoch": 2.11, + "learning_rate": 8.548568804548286e-06, + "loss": 2.1465, + "step": 10387 + }, + { + "epoch": 2.11, + "learning_rate": 8.546859781374842e-06, + "loss": 2.1734, + "step": 10388 + }, + { + "epoch": 2.11, + "learning_rate": 8.545150801558597e-06, + "loss": 2.0591, + "step": 10389 + }, + { + "epoch": 2.11, + "learning_rate": 8.543441865150546e-06, + "loss": 2.1423, + "step": 10390 + }, + { + "epoch": 2.11, + "learning_rate": 8.54173297220168e-06, + "loss": 2.1347, + "step": 10391 + }, + { + "epoch": 2.11, + "learning_rate": 8.540024122762986e-06, + "loss": 2.0644, + "step": 10392 + }, + { + "epoch": 2.11, + "learning_rate": 8.53831531688545e-06, + "loss": 2.0552, + "step": 10393 + }, + { + "epoch": 2.11, + "learning_rate": 8.53660655462005e-06, + "loss": 2.1078, + "step": 10394 + }, + { + "epoch": 2.11, + "learning_rate": 8.534897836017784e-06, + "loss": 2.1705, + "step": 10395 + }, + { + "epoch": 2.11, + "learning_rate": 8.533189161129627e-06, + "loss": 2.0346, + "step": 10396 + }, + { + "epoch": 2.11, + "learning_rate": 8.531480530006562e-06, + "loss": 2.1512, + "step": 10397 + }, + { + "epoch": 2.11, + "learning_rate": 8.529771942699569e-06, + "loss": 2.0914, + "step": 10398 + }, + { + "epoch": 2.11, + "learning_rate": 8.528063399259621e-06, + "loss": 2.1326, + "step": 10399 + }, + { + "epoch": 2.11, + "learning_rate": 8.526354899737708e-06, + "loss": 2.1423, + "step": 10400 + }, + { + "epoch": 2.11, + "learning_rate": 8.524646444184798e-06, + "loss": 2.0782, + "step": 10401 + }, + { + "epoch": 2.11, + "learning_rate": 8.52293803265187e-06, + "loss": 2.1964, + "step": 10402 + }, + { + "epoch": 2.11, + "learning_rate": 8.521229665189891e-06, + "loss": 2.1637, + "step": 10403 + }, + { + "epoch": 2.11, + "learning_rate": 8.519521341849839e-06, + "loss": 2.1611, + "step": 10404 + }, + { + "epoch": 2.11, + "learning_rate": 8.517813062682687e-06, + "loss": 2.093, + "step": 10405 + }, + { + "epoch": 2.11, + "learning_rate": 8.516104827739399e-06, + "loss": 2.038, + "step": 10406 + }, + { + "epoch": 2.11, + "learning_rate": 8.514396637070947e-06, + "loss": 2.1196, + "step": 10407 + }, + { + "epoch": 2.11, + "learning_rate": 8.512688490728297e-06, + "loss": 2.192, + "step": 10408 + }, + { + "epoch": 2.11, + "learning_rate": 8.510980388762418e-06, + "loss": 2.1141, + "step": 10409 + }, + { + "epoch": 2.11, + "learning_rate": 8.509272331224267e-06, + "loss": 2.162, + "step": 10410 + }, + { + "epoch": 2.11, + "learning_rate": 8.507564318164816e-06, + "loss": 2.1146, + "step": 10411 + }, + { + "epoch": 2.11, + "learning_rate": 8.505856349635022e-06, + "loss": 2.1246, + "step": 10412 + }, + { + "epoch": 2.11, + "learning_rate": 8.504148425685844e-06, + "loss": 2.1252, + "step": 10413 + }, + { + "epoch": 2.11, + "learning_rate": 8.502440546368241e-06, + "loss": 2.1553, + "step": 10414 + }, + { + "epoch": 2.11, + "learning_rate": 8.500732711733177e-06, + "loss": 2.1382, + "step": 10415 + }, + { + "epoch": 2.11, + "learning_rate": 8.499024921831605e-06, + "loss": 2.1853, + "step": 10416 + }, + { + "epoch": 2.11, + "learning_rate": 8.497317176714479e-06, + "loss": 2.1671, + "step": 10417 + }, + { + "epoch": 2.11, + "learning_rate": 8.49560947643275e-06, + "loss": 2.1515, + "step": 10418 + }, + { + "epoch": 2.11, + "learning_rate": 8.493901821037377e-06, + "loss": 2.131, + "step": 10419 + }, + { + "epoch": 2.12, + "learning_rate": 8.49219421057931e-06, + "loss": 2.1736, + "step": 10420 + }, + { + "epoch": 2.12, + "learning_rate": 8.490486645109494e-06, + "loss": 2.1799, + "step": 10421 + }, + { + "epoch": 2.12, + "learning_rate": 8.488779124678883e-06, + "loss": 2.1806, + "step": 10422 + }, + { + "epoch": 2.12, + "learning_rate": 8.487071649338416e-06, + "loss": 2.1004, + "step": 10423 + }, + { + "epoch": 2.12, + "learning_rate": 8.48536421913905e-06, + "loss": 2.0841, + "step": 10424 + }, + { + "epoch": 2.12, + "learning_rate": 8.483656834131723e-06, + "loss": 2.1608, + "step": 10425 + }, + { + "epoch": 2.12, + "learning_rate": 8.481949494367378e-06, + "loss": 2.109, + "step": 10426 + }, + { + "epoch": 2.12, + "learning_rate": 8.480242199896957e-06, + "loss": 2.1154, + "step": 10427 + }, + { + "epoch": 2.12, + "learning_rate": 8.4785349507714e-06, + "loss": 2.1699, + "step": 10428 + }, + { + "epoch": 2.12, + "learning_rate": 8.47682774704165e-06, + "loss": 2.0705, + "step": 10429 + }, + { + "epoch": 2.12, + "learning_rate": 8.475120588758638e-06, + "loss": 2.0762, + "step": 10430 + }, + { + "epoch": 2.12, + "learning_rate": 8.473413475973307e-06, + "loss": 2.0911, + "step": 10431 + }, + { + "epoch": 2.12, + "learning_rate": 8.47170640873659e-06, + "loss": 2.0501, + "step": 10432 + }, + { + "epoch": 2.12, + "learning_rate": 8.469999387099414e-06, + "loss": 2.1049, + "step": 10433 + }, + { + "epoch": 2.12, + "learning_rate": 8.46829241111272e-06, + "loss": 2.0734, + "step": 10434 + }, + { + "epoch": 2.12, + "learning_rate": 8.466585480827436e-06, + "loss": 2.0855, + "step": 10435 + }, + { + "epoch": 2.12, + "learning_rate": 8.464878596294492e-06, + "loss": 2.1054, + "step": 10436 + }, + { + "epoch": 2.12, + "learning_rate": 8.463171757564816e-06, + "loss": 2.0916, + "step": 10437 + }, + { + "epoch": 2.12, + "learning_rate": 8.461464964689327e-06, + "loss": 2.0903, + "step": 10438 + }, + { + "epoch": 2.12, + "learning_rate": 8.459758217718964e-06, + "loss": 2.1491, + "step": 10439 + }, + { + "epoch": 2.12, + "learning_rate": 8.458051516704644e-06, + "loss": 2.1566, + "step": 10440 + }, + { + "epoch": 2.12, + "learning_rate": 8.456344861697291e-06, + "loss": 2.1421, + "step": 10441 + }, + { + "epoch": 2.12, + "learning_rate": 8.454638252747824e-06, + "loss": 2.1217, + "step": 10442 + }, + { + "epoch": 2.12, + "learning_rate": 8.452931689907161e-06, + "loss": 2.1765, + "step": 10443 + }, + { + "epoch": 2.12, + "learning_rate": 8.451225173226228e-06, + "loss": 2.1079, + "step": 10444 + }, + { + "epoch": 2.12, + "learning_rate": 8.449518702755938e-06, + "loss": 2.0671, + "step": 10445 + }, + { + "epoch": 2.12, + "learning_rate": 8.447812278547208e-06, + "loss": 2.0905, + "step": 10446 + }, + { + "epoch": 2.12, + "learning_rate": 8.446105900650948e-06, + "loss": 2.0635, + "step": 10447 + }, + { + "epoch": 2.12, + "learning_rate": 8.444399569118075e-06, + "loss": 2.0844, + "step": 10448 + }, + { + "epoch": 2.12, + "learning_rate": 8.442693283999502e-06, + "loss": 2.1375, + "step": 10449 + }, + { + "epoch": 2.12, + "learning_rate": 8.440987045346138e-06, + "loss": 2.1256, + "step": 10450 + }, + { + "epoch": 2.12, + "learning_rate": 8.439280853208887e-06, + "loss": 2.0934, + "step": 10451 + }, + { + "epoch": 2.12, + "learning_rate": 8.437574707638666e-06, + "loss": 2.1873, + "step": 10452 + }, + { + "epoch": 2.12, + "learning_rate": 8.43586860868637e-06, + "loss": 2.0723, + "step": 10453 + }, + { + "epoch": 2.12, + "learning_rate": 8.434162556402911e-06, + "loss": 2.0572, + "step": 10454 + }, + { + "epoch": 2.12, + "learning_rate": 8.432456550839194e-06, + "loss": 2.0646, + "step": 10455 + }, + { + "epoch": 2.12, + "learning_rate": 8.430750592046117e-06, + "loss": 2.1077, + "step": 10456 + }, + { + "epoch": 2.12, + "learning_rate": 8.42904468007458e-06, + "loss": 2.1595, + "step": 10457 + }, + { + "epoch": 2.12, + "learning_rate": 8.42733881497548e-06, + "loss": 2.0755, + "step": 10458 + }, + { + "epoch": 2.12, + "learning_rate": 8.425632996799721e-06, + "loss": 2.0721, + "step": 10459 + }, + { + "epoch": 2.12, + "learning_rate": 8.423927225598199e-06, + "loss": 2.0498, + "step": 10460 + }, + { + "epoch": 2.12, + "learning_rate": 8.422221501421805e-06, + "loss": 2.058, + "step": 10461 + }, + { + "epoch": 2.12, + "learning_rate": 8.420515824321434e-06, + "loss": 2.1229, + "step": 10462 + }, + { + "epoch": 2.12, + "learning_rate": 8.418810194347973e-06, + "loss": 2.0979, + "step": 10463 + }, + { + "epoch": 2.12, + "learning_rate": 8.417104611552323e-06, + "loss": 2.0405, + "step": 10464 + }, + { + "epoch": 2.12, + "learning_rate": 8.415399075985367e-06, + "loss": 2.1534, + "step": 10465 + }, + { + "epoch": 2.12, + "learning_rate": 8.413693587697994e-06, + "loss": 2.0523, + "step": 10466 + }, + { + "epoch": 2.12, + "learning_rate": 8.41198814674109e-06, + "loss": 2.1732, + "step": 10467 + }, + { + "epoch": 2.12, + "learning_rate": 8.410282753165538e-06, + "loss": 2.1645, + "step": 10468 + }, + { + "epoch": 2.13, + "learning_rate": 8.408577407022226e-06, + "loss": 2.179, + "step": 10469 + }, + { + "epoch": 2.13, + "learning_rate": 8.406872108362037e-06, + "loss": 2.1409, + "step": 10470 + }, + { + "epoch": 2.13, + "learning_rate": 8.405166857235846e-06, + "loss": 2.0604, + "step": 10471 + }, + { + "epoch": 2.13, + "learning_rate": 8.403461653694536e-06, + "loss": 2.1307, + "step": 10472 + }, + { + "epoch": 2.13, + "learning_rate": 8.401756497788986e-06, + "loss": 2.0527, + "step": 10473 + }, + { + "epoch": 2.13, + "learning_rate": 8.40005138957007e-06, + "loss": 2.1232, + "step": 10474 + }, + { + "epoch": 2.13, + "learning_rate": 8.398346329088663e-06, + "loss": 2.1268, + "step": 10475 + }, + { + "epoch": 2.13, + "learning_rate": 8.396641316395643e-06, + "loss": 2.1047, + "step": 10476 + }, + { + "epoch": 2.13, + "learning_rate": 8.394936351541878e-06, + "loss": 2.1085, + "step": 10477 + }, + { + "epoch": 2.13, + "learning_rate": 8.393231434578237e-06, + "loss": 2.1167, + "step": 10478 + }, + { + "epoch": 2.13, + "learning_rate": 8.391526565555598e-06, + "loss": 2.1163, + "step": 10479 + }, + { + "epoch": 2.13, + "learning_rate": 8.389821744524821e-06, + "loss": 2.034, + "step": 10480 + }, + { + "epoch": 2.13, + "learning_rate": 8.388116971536778e-06, + "loss": 2.1345, + "step": 10481 + }, + { + "epoch": 2.13, + "learning_rate": 8.386412246642331e-06, + "loss": 2.0833, + "step": 10482 + }, + { + "epoch": 2.13, + "learning_rate": 8.384707569892338e-06, + "loss": 2.1531, + "step": 10483 + }, + { + "epoch": 2.13, + "learning_rate": 8.383002941337672e-06, + "loss": 2.1807, + "step": 10484 + }, + { + "epoch": 2.13, + "learning_rate": 8.38129836102919e-06, + "loss": 2.077, + "step": 10485 + }, + { + "epoch": 2.13, + "learning_rate": 8.37959382901775e-06, + "loss": 2.1725, + "step": 10486 + }, + { + "epoch": 2.13, + "learning_rate": 8.37788934535421e-06, + "loss": 2.0329, + "step": 10487 + }, + { + "epoch": 2.13, + "learning_rate": 8.376184910089425e-06, + "loss": 2.112, + "step": 10488 + }, + { + "epoch": 2.13, + "learning_rate": 8.374480523274254e-06, + "loss": 2.144, + "step": 10489 + }, + { + "epoch": 2.13, + "learning_rate": 8.37277618495955e-06, + "loss": 2.12, + "step": 10490 + }, + { + "epoch": 2.13, + "learning_rate": 8.371071895196161e-06, + "loss": 2.1461, + "step": 10491 + }, + { + "epoch": 2.13, + "learning_rate": 8.369367654034945e-06, + "loss": 2.1457, + "step": 10492 + }, + { + "epoch": 2.13, + "learning_rate": 8.367663461526743e-06, + "loss": 2.1596, + "step": 10493 + }, + { + "epoch": 2.13, + "learning_rate": 8.365959317722408e-06, + "loss": 1.9882, + "step": 10494 + }, + { + "epoch": 2.13, + "learning_rate": 8.364255222672784e-06, + "loss": 2.0232, + "step": 10495 + }, + { + "epoch": 2.13, + "learning_rate": 8.362551176428719e-06, + "loss": 2.2228, + "step": 10496 + }, + { + "epoch": 2.13, + "learning_rate": 8.360847179041055e-06, + "loss": 2.0638, + "step": 10497 + }, + { + "epoch": 2.13, + "learning_rate": 8.359143230560632e-06, + "loss": 2.0566, + "step": 10498 + }, + { + "epoch": 2.13, + "learning_rate": 8.357439331038291e-06, + "loss": 2.042, + "step": 10499 + }, + { + "epoch": 2.13, + "learning_rate": 8.355735480524874e-06, + "loss": 2.1015, + "step": 10500 + }, + { + "epoch": 2.13, + "learning_rate": 8.354031679071218e-06, + "loss": 2.0674, + "step": 10501 + }, + { + "epoch": 2.13, + "learning_rate": 8.352327926728151e-06, + "loss": 2.1215, + "step": 10502 + }, + { + "epoch": 2.13, + "learning_rate": 8.350624223546521e-06, + "loss": 2.1642, + "step": 10503 + }, + { + "epoch": 2.13, + "learning_rate": 8.348920569577155e-06, + "loss": 2.0899, + "step": 10504 + }, + { + "epoch": 2.13, + "learning_rate": 8.347216964870884e-06, + "loss": 2.0716, + "step": 10505 + }, + { + "epoch": 2.13, + "learning_rate": 8.34551340947854e-06, + "loss": 2.0065, + "step": 10506 + }, + { + "epoch": 2.13, + "learning_rate": 8.343809903450945e-06, + "loss": 2.0992, + "step": 10507 + }, + { + "epoch": 2.13, + "learning_rate": 8.342106446838935e-06, + "loss": 2.0991, + "step": 10508 + }, + { + "epoch": 2.13, + "learning_rate": 8.340403039693335e-06, + "loss": 2.0903, + "step": 10509 + }, + { + "epoch": 2.13, + "learning_rate": 8.338699682064967e-06, + "loss": 2.1228, + "step": 10510 + }, + { + "epoch": 2.13, + "learning_rate": 8.336996374004654e-06, + "loss": 2.0917, + "step": 10511 + }, + { + "epoch": 2.13, + "learning_rate": 8.335293115563214e-06, + "loss": 2.1207, + "step": 10512 + }, + { + "epoch": 2.13, + "learning_rate": 8.333589906791475e-06, + "loss": 2.1244, + "step": 10513 + }, + { + "epoch": 2.13, + "learning_rate": 8.33188674774025e-06, + "loss": 2.188, + "step": 10514 + }, + { + "epoch": 2.13, + "learning_rate": 8.330183638460356e-06, + "loss": 2.0817, + "step": 10515 + }, + { + "epoch": 2.13, + "learning_rate": 8.32848057900261e-06, + "loss": 2.1189, + "step": 10516 + }, + { + "epoch": 2.13, + "learning_rate": 8.326777569417826e-06, + "loss": 1.9919, + "step": 10517 + }, + { + "epoch": 2.13, + "learning_rate": 8.325074609756816e-06, + "loss": 2.126, + "step": 10518 + }, + { + "epoch": 2.14, + "learning_rate": 8.323371700070391e-06, + "loss": 2.1303, + "step": 10519 + }, + { + "epoch": 2.14, + "learning_rate": 8.32166884040936e-06, + "loss": 2.1083, + "step": 10520 + }, + { + "epoch": 2.14, + "learning_rate": 8.319966030824534e-06, + "loss": 2.108, + "step": 10521 + }, + { + "epoch": 2.14, + "learning_rate": 8.318263271366715e-06, + "loss": 2.1221, + "step": 10522 + }, + { + "epoch": 2.14, + "learning_rate": 8.31656056208671e-06, + "loss": 2.1743, + "step": 10523 + }, + { + "epoch": 2.14, + "learning_rate": 8.314857903035325e-06, + "loss": 2.0863, + "step": 10524 + }, + { + "epoch": 2.14, + "learning_rate": 8.31315529426336e-06, + "loss": 2.0851, + "step": 10525 + }, + { + "epoch": 2.14, + "learning_rate": 8.311452735821616e-06, + "loss": 2.0861, + "step": 10526 + }, + { + "epoch": 2.14, + "learning_rate": 8.309750227760888e-06, + "loss": 2.0416, + "step": 10527 + }, + { + "epoch": 2.14, + "learning_rate": 8.30804777013198e-06, + "loss": 2.1597, + "step": 10528 + }, + { + "epoch": 2.14, + "learning_rate": 8.306345362985687e-06, + "loss": 2.1347, + "step": 10529 + }, + { + "epoch": 2.14, + "learning_rate": 8.3046430063728e-06, + "loss": 2.0662, + "step": 10530 + }, + { + "epoch": 2.14, + "learning_rate": 8.302940700344116e-06, + "loss": 2.1305, + "step": 10531 + }, + { + "epoch": 2.14, + "learning_rate": 8.301238444950419e-06, + "loss": 2.0765, + "step": 10532 + }, + { + "epoch": 2.14, + "learning_rate": 8.299536240242509e-06, + "loss": 2.1446, + "step": 10533 + }, + { + "epoch": 2.14, + "learning_rate": 8.29783408627117e-06, + "loss": 2.1684, + "step": 10534 + }, + { + "epoch": 2.14, + "learning_rate": 8.296131983087186e-06, + "loss": 2.0914, + "step": 10535 + }, + { + "epoch": 2.14, + "learning_rate": 8.294429930741348e-06, + "loss": 2.0992, + "step": 10536 + }, + { + "epoch": 2.14, + "learning_rate": 8.292727929284436e-06, + "loss": 2.068, + "step": 10537 + }, + { + "epoch": 2.14, + "learning_rate": 8.291025978767236e-06, + "loss": 2.1514, + "step": 10538 + }, + { + "epoch": 2.14, + "learning_rate": 8.289324079240525e-06, + "loss": 2.0924, + "step": 10539 + }, + { + "epoch": 2.14, + "learning_rate": 8.287622230755085e-06, + "loss": 2.0934, + "step": 10540 + }, + { + "epoch": 2.14, + "learning_rate": 8.285920433361695e-06, + "loss": 2.1287, + "step": 10541 + }, + { + "epoch": 2.14, + "learning_rate": 8.284218687111125e-06, + "loss": 2.0915, + "step": 10542 + }, + { + "epoch": 2.14, + "learning_rate": 8.282516992054156e-06, + "loss": 2.1311, + "step": 10543 + }, + { + "epoch": 2.14, + "learning_rate": 8.28081534824156e-06, + "loss": 2.0506, + "step": 10544 + }, + { + "epoch": 2.14, + "learning_rate": 8.27911375572411e-06, + "loss": 2.1017, + "step": 10545 + }, + { + "epoch": 2.14, + "learning_rate": 8.277412214552574e-06, + "loss": 2.0828, + "step": 10546 + }, + { + "epoch": 2.14, + "learning_rate": 8.275710724777717e-06, + "loss": 2.1391, + "step": 10547 + }, + { + "epoch": 2.14, + "learning_rate": 8.274009286450316e-06, + "loss": 2.0621, + "step": 10548 + }, + { + "epoch": 2.14, + "learning_rate": 8.272307899621131e-06, + "loss": 2.154, + "step": 10549 + }, + { + "epoch": 2.14, + "learning_rate": 8.270606564340925e-06, + "loss": 2.0531, + "step": 10550 + }, + { + "epoch": 2.14, + "learning_rate": 8.268905280660464e-06, + "loss": 2.1843, + "step": 10551 + }, + { + "epoch": 2.14, + "learning_rate": 8.267204048630501e-06, + "loss": 2.0693, + "step": 10552 + }, + { + "epoch": 2.14, + "learning_rate": 8.265502868301807e-06, + "loss": 2.0709, + "step": 10553 + }, + { + "epoch": 2.14, + "learning_rate": 8.263801739725133e-06, + "loss": 2.1154, + "step": 10554 + }, + { + "epoch": 2.14, + "learning_rate": 8.262100662951238e-06, + "loss": 2.1682, + "step": 10555 + }, + { + "epoch": 2.14, + "learning_rate": 8.260399638030873e-06, + "loss": 2.1631, + "step": 10556 + }, + { + "epoch": 2.14, + "learning_rate": 8.258698665014795e-06, + "loss": 2.0715, + "step": 10557 + }, + { + "epoch": 2.14, + "learning_rate": 8.256997743953755e-06, + "loss": 2.1715, + "step": 10558 + }, + { + "epoch": 2.14, + "learning_rate": 8.255296874898501e-06, + "loss": 2.0925, + "step": 10559 + }, + { + "epoch": 2.14, + "learning_rate": 8.253596057899788e-06, + "loss": 2.1723, + "step": 10560 + }, + { + "epoch": 2.14, + "learning_rate": 8.251895293008355e-06, + "loss": 2.0623, + "step": 10561 + }, + { + "epoch": 2.14, + "learning_rate": 8.250194580274953e-06, + "loss": 2.1878, + "step": 10562 + }, + { + "epoch": 2.14, + "learning_rate": 8.248493919750322e-06, + "loss": 2.1677, + "step": 10563 + }, + { + "epoch": 2.14, + "learning_rate": 8.246793311485209e-06, + "loss": 2.1683, + "step": 10564 + }, + { + "epoch": 2.14, + "learning_rate": 8.245092755530355e-06, + "loss": 2.1942, + "step": 10565 + }, + { + "epoch": 2.14, + "learning_rate": 8.243392251936494e-06, + "loss": 2.1325, + "step": 10566 + }, + { + "epoch": 2.14, + "learning_rate": 8.241691800754363e-06, + "loss": 2.0589, + "step": 10567 + }, + { + "epoch": 2.15, + "learning_rate": 8.239991402034706e-06, + "loss": 2.1219, + "step": 10568 + }, + { + "epoch": 2.15, + "learning_rate": 8.238291055828254e-06, + "loss": 2.0668, + "step": 10569 + }, + { + "epoch": 2.15, + "learning_rate": 8.236590762185741e-06, + "loss": 2.1858, + "step": 10570 + }, + { + "epoch": 2.15, + "learning_rate": 8.234890521157896e-06, + "loss": 2.0411, + "step": 10571 + }, + { + "epoch": 2.15, + "learning_rate": 8.233190332795444e-06, + "loss": 2.0996, + "step": 10572 + }, + { + "epoch": 2.15, + "learning_rate": 8.231490197149126e-06, + "loss": 2.1435, + "step": 10573 + }, + { + "epoch": 2.15, + "learning_rate": 8.22979011426966e-06, + "loss": 2.0853, + "step": 10574 + }, + { + "epoch": 2.15, + "learning_rate": 8.228090084207776e-06, + "loss": 2.1859, + "step": 10575 + }, + { + "epoch": 2.15, + "learning_rate": 8.22639010701419e-06, + "loss": 2.1021, + "step": 10576 + }, + { + "epoch": 2.15, + "learning_rate": 8.224690182739632e-06, + "loss": 2.1312, + "step": 10577 + }, + { + "epoch": 2.15, + "learning_rate": 8.22299031143482e-06, + "loss": 2.1604, + "step": 10578 + }, + { + "epoch": 2.15, + "learning_rate": 8.221290493150473e-06, + "loss": 2.1232, + "step": 10579 + }, + { + "epoch": 2.15, + "learning_rate": 8.219590727937304e-06, + "loss": 2.1676, + "step": 10580 + }, + { + "epoch": 2.15, + "learning_rate": 8.217891015846035e-06, + "loss": 2.1516, + "step": 10581 + }, + { + "epoch": 2.15, + "learning_rate": 8.216191356927376e-06, + "loss": 2.1615, + "step": 10582 + }, + { + "epoch": 2.15, + "learning_rate": 8.214491751232041e-06, + "loss": 2.0626, + "step": 10583 + }, + { + "epoch": 2.15, + "learning_rate": 8.212792198810744e-06, + "loss": 2.0634, + "step": 10584 + }, + { + "epoch": 2.15, + "learning_rate": 8.211092699714192e-06, + "loss": 2.1591, + "step": 10585 + }, + { + "epoch": 2.15, + "learning_rate": 8.209393253993089e-06, + "loss": 2.0931, + "step": 10586 + }, + { + "epoch": 2.15, + "learning_rate": 8.207693861698144e-06, + "loss": 2.1307, + "step": 10587 + }, + { + "epoch": 2.15, + "learning_rate": 8.205994522880065e-06, + "loss": 2.1151, + "step": 10588 + }, + { + "epoch": 2.15, + "learning_rate": 8.204295237589552e-06, + "loss": 2.1056, + "step": 10589 + }, + { + "epoch": 2.15, + "learning_rate": 8.202596005877307e-06, + "loss": 2.056, + "step": 10590 + }, + { + "epoch": 2.15, + "learning_rate": 8.200896827794022e-06, + "loss": 2.1915, + "step": 10591 + }, + { + "epoch": 2.15, + "learning_rate": 8.19919770339041e-06, + "loss": 2.1427, + "step": 10592 + }, + { + "epoch": 2.15, + "learning_rate": 8.197498632717159e-06, + "loss": 2.0803, + "step": 10593 + }, + { + "epoch": 2.15, + "learning_rate": 8.195799615824966e-06, + "loss": 2.1307, + "step": 10594 + }, + { + "epoch": 2.15, + "learning_rate": 8.194100652764523e-06, + "loss": 2.1444, + "step": 10595 + }, + { + "epoch": 2.15, + "learning_rate": 8.192401743586518e-06, + "loss": 2.1383, + "step": 10596 + }, + { + "epoch": 2.15, + "learning_rate": 8.19070288834165e-06, + "loss": 2.1255, + "step": 10597 + }, + { + "epoch": 2.15, + "learning_rate": 8.189004087080603e-06, + "loss": 2.0707, + "step": 10598 + }, + { + "epoch": 2.15, + "learning_rate": 8.187305339854063e-06, + "loss": 2.1394, + "step": 10599 + }, + { + "epoch": 2.15, + "learning_rate": 8.185606646712714e-06, + "loss": 2.1112, + "step": 10600 + }, + { + "epoch": 2.15, + "learning_rate": 8.183908007707244e-06, + "loss": 2.1066, + "step": 10601 + }, + { + "epoch": 2.15, + "learning_rate": 8.182209422888335e-06, + "loss": 2.0694, + "step": 10602 + }, + { + "epoch": 2.15, + "learning_rate": 8.180510892306665e-06, + "loss": 2.2003, + "step": 10603 + }, + { + "epoch": 2.15, + "learning_rate": 8.178812416012912e-06, + "loss": 2.0161, + "step": 10604 + }, + { + "epoch": 2.15, + "learning_rate": 8.177113994057757e-06, + "loss": 2.1286, + "step": 10605 + }, + { + "epoch": 2.15, + "learning_rate": 8.175415626491872e-06, + "loss": 2.0661, + "step": 10606 + }, + { + "epoch": 2.15, + "learning_rate": 8.173717313365932e-06, + "loss": 2.1616, + "step": 10607 + }, + { + "epoch": 2.15, + "learning_rate": 8.172019054730611e-06, + "loss": 2.1732, + "step": 10608 + }, + { + "epoch": 2.15, + "learning_rate": 8.170320850636581e-06, + "loss": 2.1432, + "step": 10609 + }, + { + "epoch": 2.15, + "learning_rate": 8.168622701134507e-06, + "loss": 2.1234, + "step": 10610 + }, + { + "epoch": 2.15, + "learning_rate": 8.166924606275056e-06, + "loss": 2.0974, + "step": 10611 + }, + { + "epoch": 2.15, + "learning_rate": 8.1652265661089e-06, + "loss": 2.0434, + "step": 10612 + }, + { + "epoch": 2.15, + "learning_rate": 8.163528580686699e-06, + "loss": 2.0881, + "step": 10613 + }, + { + "epoch": 2.15, + "learning_rate": 8.161830650059117e-06, + "loss": 2.1437, + "step": 10614 + }, + { + "epoch": 2.15, + "learning_rate": 8.160132774276814e-06, + "loss": 2.0132, + "step": 10615 + }, + { + "epoch": 2.15, + "learning_rate": 8.158434953390446e-06, + "loss": 2.1533, + "step": 10616 + }, + { + "epoch": 2.16, + "learning_rate": 8.156737187450677e-06, + "loss": 2.146, + "step": 10617 + }, + { + "epoch": 2.16, + "learning_rate": 8.155039476508162e-06, + "loss": 2.1524, + "step": 10618 + }, + { + "epoch": 2.16, + "learning_rate": 8.153341820613553e-06, + "loss": 2.1798, + "step": 10619 + }, + { + "epoch": 2.16, + "learning_rate": 8.1516442198175e-06, + "loss": 2.1925, + "step": 10620 + }, + { + "epoch": 2.16, + "learning_rate": 8.14994667417066e-06, + "loss": 2.1095, + "step": 10621 + }, + { + "epoch": 2.16, + "learning_rate": 8.148249183723682e-06, + "loss": 2.1478, + "step": 10622 + }, + { + "epoch": 2.16, + "learning_rate": 8.146551748527212e-06, + "loss": 2.2199, + "step": 10623 + }, + { + "epoch": 2.16, + "learning_rate": 8.144854368631894e-06, + "loss": 2.1148, + "step": 10624 + }, + { + "epoch": 2.16, + "learning_rate": 8.143157044088376e-06, + "loss": 2.0809, + "step": 10625 + }, + { + "epoch": 2.16, + "learning_rate": 8.1414597749473e-06, + "loss": 2.2079, + "step": 10626 + }, + { + "epoch": 2.16, + "learning_rate": 8.139762561259309e-06, + "loss": 2.0507, + "step": 10627 + }, + { + "epoch": 2.16, + "learning_rate": 8.138065403075034e-06, + "loss": 2.0371, + "step": 10628 + }, + { + "epoch": 2.16, + "learning_rate": 8.136368300445127e-06, + "loss": 2.1241, + "step": 10629 + }, + { + "epoch": 2.16, + "learning_rate": 8.134671253420214e-06, + "loss": 2.1797, + "step": 10630 + }, + { + "epoch": 2.16, + "learning_rate": 8.13297426205093e-06, + "loss": 2.1966, + "step": 10631 + }, + { + "epoch": 2.16, + "learning_rate": 8.131277326387914e-06, + "loss": 2.1323, + "step": 10632 + }, + { + "epoch": 2.16, + "learning_rate": 8.129580446481793e-06, + "loss": 2.089, + "step": 10633 + }, + { + "epoch": 2.16, + "learning_rate": 8.127883622383198e-06, + "loss": 2.0942, + "step": 10634 + }, + { + "epoch": 2.16, + "learning_rate": 8.126186854142757e-06, + "loss": 2.2212, + "step": 10635 + }, + { + "epoch": 2.16, + "learning_rate": 8.12449014181109e-06, + "loss": 2.1138, + "step": 10636 + }, + { + "epoch": 2.16, + "learning_rate": 8.122793485438833e-06, + "loss": 2.1347, + "step": 10637 + }, + { + "epoch": 2.16, + "learning_rate": 8.121096885076602e-06, + "loss": 2.1276, + "step": 10638 + }, + { + "epoch": 2.16, + "learning_rate": 8.119400340775021e-06, + "loss": 2.068, + "step": 10639 + }, + { + "epoch": 2.16, + "learning_rate": 8.117703852584706e-06, + "loss": 2.1625, + "step": 10640 + }, + { + "epoch": 2.16, + "learning_rate": 8.116007420556275e-06, + "loss": 2.1386, + "step": 10641 + }, + { + "epoch": 2.16, + "learning_rate": 8.114311044740351e-06, + "loss": 2.0994, + "step": 10642 + }, + { + "epoch": 2.16, + "learning_rate": 8.112614725187545e-06, + "loss": 2.1851, + "step": 10643 + }, + { + "epoch": 2.16, + "learning_rate": 8.110918461948464e-06, + "loss": 2.1023, + "step": 10644 + }, + { + "epoch": 2.16, + "learning_rate": 8.109222255073728e-06, + "loss": 2.093, + "step": 10645 + }, + { + "epoch": 2.16, + "learning_rate": 8.107526104613939e-06, + "loss": 2.1051, + "step": 10646 + }, + { + "epoch": 2.16, + "learning_rate": 8.105830010619711e-06, + "loss": 2.0871, + "step": 10647 + }, + { + "epoch": 2.16, + "learning_rate": 8.104133973141648e-06, + "loss": 2.0703, + "step": 10648 + }, + { + "epoch": 2.16, + "learning_rate": 8.102437992230354e-06, + "loss": 2.1097, + "step": 10649 + }, + { + "epoch": 2.16, + "learning_rate": 8.100742067936433e-06, + "loss": 2.1357, + "step": 10650 + }, + { + "epoch": 2.16, + "learning_rate": 8.099046200310484e-06, + "loss": 2.0994, + "step": 10651 + }, + { + "epoch": 2.16, + "learning_rate": 8.097350389403107e-06, + "loss": 2.0499, + "step": 10652 + }, + { + "epoch": 2.16, + "learning_rate": 8.095654635264904e-06, + "loss": 2.1364, + "step": 10653 + }, + { + "epoch": 2.16, + "learning_rate": 8.093958937946465e-06, + "loss": 2.1228, + "step": 10654 + }, + { + "epoch": 2.16, + "learning_rate": 8.092263297498389e-06, + "loss": 2.1701, + "step": 10655 + }, + { + "epoch": 2.16, + "learning_rate": 8.09056771397126e-06, + "loss": 2.1376, + "step": 10656 + }, + { + "epoch": 2.16, + "learning_rate": 8.088872187415682e-06, + "loss": 2.1959, + "step": 10657 + }, + { + "epoch": 2.16, + "learning_rate": 8.087176717882238e-06, + "loss": 2.1451, + "step": 10658 + }, + { + "epoch": 2.16, + "learning_rate": 8.085481305421513e-06, + "loss": 2.0778, + "step": 10659 + }, + { + "epoch": 2.16, + "learning_rate": 8.083785950084096e-06, + "loss": 2.0963, + "step": 10660 + }, + { + "epoch": 2.16, + "learning_rate": 8.082090651920567e-06, + "loss": 2.137, + "step": 10661 + }, + { + "epoch": 2.16, + "learning_rate": 8.080395410981515e-06, + "loss": 2.1295, + "step": 10662 + }, + { + "epoch": 2.16, + "learning_rate": 8.078700227317518e-06, + "loss": 2.1445, + "step": 10663 + }, + { + "epoch": 2.16, + "learning_rate": 8.077005100979153e-06, + "loss": 2.0894, + "step": 10664 + }, + { + "epoch": 2.16, + "learning_rate": 8.075310032017e-06, + "loss": 1.9961, + "step": 10665 + }, + { + "epoch": 2.17, + "learning_rate": 8.07361502048163e-06, + "loss": 2.0168, + "step": 10666 + }, + { + "epoch": 2.17, + "learning_rate": 8.071920066423625e-06, + "loss": 2.0666, + "step": 10667 + }, + { + "epoch": 2.17, + "learning_rate": 8.070225169893552e-06, + "loss": 2.1314, + "step": 10668 + }, + { + "epoch": 2.17, + "learning_rate": 8.068530330941979e-06, + "loss": 2.1904, + "step": 10669 + }, + { + "epoch": 2.17, + "learning_rate": 8.06683554961948e-06, + "loss": 2.1743, + "step": 10670 + }, + { + "epoch": 2.17, + "learning_rate": 8.065140825976619e-06, + "loss": 2.1065, + "step": 10671 + }, + { + "epoch": 2.17, + "learning_rate": 8.06344616006396e-06, + "loss": 2.1504, + "step": 10672 + }, + { + "epoch": 2.17, + "learning_rate": 8.061751551932073e-06, + "loss": 2.0938, + "step": 10673 + }, + { + "epoch": 2.17, + "learning_rate": 8.060057001631514e-06, + "loss": 2.1492, + "step": 10674 + }, + { + "epoch": 2.17, + "learning_rate": 8.058362509212847e-06, + "loss": 2.1639, + "step": 10675 + }, + { + "epoch": 2.17, + "learning_rate": 8.05666807472662e-06, + "loss": 2.0766, + "step": 10676 + }, + { + "epoch": 2.17, + "learning_rate": 8.054973698223403e-06, + "loss": 2.1681, + "step": 10677 + }, + { + "epoch": 2.17, + "learning_rate": 8.053279379753748e-06, + "loss": 2.163, + "step": 10678 + }, + { + "epoch": 2.17, + "learning_rate": 8.051585119368204e-06, + "loss": 2.1084, + "step": 10679 + }, + { + "epoch": 2.17, + "learning_rate": 8.049890917117324e-06, + "loss": 2.0645, + "step": 10680 + }, + { + "epoch": 2.17, + "learning_rate": 8.048196773051656e-06, + "loss": 2.0695, + "step": 10681 + }, + { + "epoch": 2.17, + "learning_rate": 8.04650268722175e-06, + "loss": 2.142, + "step": 10682 + }, + { + "epoch": 2.17, + "learning_rate": 8.044808659678156e-06, + "loss": 2.148, + "step": 10683 + }, + { + "epoch": 2.17, + "learning_rate": 8.043114690471415e-06, + "loss": 2.0966, + "step": 10684 + }, + { + "epoch": 2.17, + "learning_rate": 8.041420779652064e-06, + "loss": 2.1627, + "step": 10685 + }, + { + "epoch": 2.17, + "learning_rate": 8.039726927270655e-06, + "loss": 2.0799, + "step": 10686 + }, + { + "epoch": 2.17, + "learning_rate": 8.038033133377723e-06, + "loss": 2.1088, + "step": 10687 + }, + { + "epoch": 2.17, + "learning_rate": 8.0363393980238e-06, + "loss": 2.1823, + "step": 10688 + }, + { + "epoch": 2.17, + "learning_rate": 8.034645721259431e-06, + "loss": 2.1253, + "step": 10689 + }, + { + "epoch": 2.17, + "learning_rate": 8.032952103135141e-06, + "loss": 2.1257, + "step": 10690 + }, + { + "epoch": 2.17, + "learning_rate": 8.031258543701471e-06, + "loss": 2.1205, + "step": 10691 + }, + { + "epoch": 2.17, + "learning_rate": 8.029565043008944e-06, + "loss": 2.0431, + "step": 10692 + }, + { + "epoch": 2.17, + "learning_rate": 8.027871601108095e-06, + "loss": 2.1272, + "step": 10693 + }, + { + "epoch": 2.17, + "learning_rate": 8.026178218049448e-06, + "loss": 2.1345, + "step": 10694 + }, + { + "epoch": 2.17, + "learning_rate": 8.024484893883526e-06, + "loss": 2.1089, + "step": 10695 + }, + { + "epoch": 2.17, + "learning_rate": 8.022791628660856e-06, + "loss": 2.1231, + "step": 10696 + }, + { + "epoch": 2.17, + "learning_rate": 8.021098422431961e-06, + "loss": 2.1142, + "step": 10697 + }, + { + "epoch": 2.17, + "learning_rate": 8.01940527524736e-06, + "loss": 2.1235, + "step": 10698 + }, + { + "epoch": 2.17, + "learning_rate": 8.017712187157568e-06, + "loss": 2.0796, + "step": 10699 + }, + { + "epoch": 2.17, + "learning_rate": 8.0160191582131e-06, + "loss": 2.085, + "step": 10700 + }, + { + "epoch": 2.17, + "learning_rate": 8.01432618846448e-06, + "loss": 2.1636, + "step": 10701 + }, + { + "epoch": 2.17, + "learning_rate": 8.012633277962214e-06, + "loss": 2.121, + "step": 10702 + }, + { + "epoch": 2.17, + "learning_rate": 8.010940426756816e-06, + "loss": 2.1265, + "step": 10703 + }, + { + "epoch": 2.17, + "learning_rate": 8.009247634898792e-06, + "loss": 2.1072, + "step": 10704 + }, + { + "epoch": 2.17, + "learning_rate": 8.00755490243865e-06, + "loss": 2.1519, + "step": 10705 + }, + { + "epoch": 2.17, + "learning_rate": 8.0058622294269e-06, + "loss": 2.1867, + "step": 10706 + }, + { + "epoch": 2.17, + "learning_rate": 8.004169615914045e-06, + "loss": 2.0239, + "step": 10707 + }, + { + "epoch": 2.17, + "learning_rate": 8.002477061950586e-06, + "loss": 2.0868, + "step": 10708 + }, + { + "epoch": 2.17, + "learning_rate": 8.00078456758702e-06, + "loss": 2.2111, + "step": 10709 + }, + { + "epoch": 2.17, + "learning_rate": 7.999092132873851e-06, + "loss": 2.1407, + "step": 10710 + }, + { + "epoch": 2.17, + "learning_rate": 7.997399757861575e-06, + "loss": 2.1025, + "step": 10711 + }, + { + "epoch": 2.17, + "learning_rate": 7.995707442600686e-06, + "loss": 2.105, + "step": 10712 + }, + { + "epoch": 2.17, + "learning_rate": 7.99401518714168e-06, + "loss": 2.2018, + "step": 10713 + }, + { + "epoch": 2.17, + "learning_rate": 7.992322991535047e-06, + "loss": 2.1693, + "step": 10714 + }, + { + "epoch": 2.17, + "learning_rate": 7.990630855831275e-06, + "loss": 2.0345, + "step": 10715 + }, + { + "epoch": 2.18, + "learning_rate": 7.988938780080852e-06, + "loss": 2.1124, + "step": 10716 + }, + { + "epoch": 2.18, + "learning_rate": 7.98724676433427e-06, + "loss": 2.1665, + "step": 10717 + }, + { + "epoch": 2.18, + "learning_rate": 7.985554808642008e-06, + "loss": 2.0765, + "step": 10718 + }, + { + "epoch": 2.18, + "learning_rate": 7.983862913054552e-06, + "loss": 2.1497, + "step": 10719 + }, + { + "epoch": 2.18, + "learning_rate": 7.982171077622376e-06, + "loss": 2.1614, + "step": 10720 + }, + { + "epoch": 2.18, + "learning_rate": 7.98047930239597e-06, + "loss": 2.1447, + "step": 10721 + }, + { + "epoch": 2.18, + "learning_rate": 7.978787587425802e-06, + "loss": 2.1237, + "step": 10722 + }, + { + "epoch": 2.18, + "learning_rate": 7.977095932762355e-06, + "loss": 2.1584, + "step": 10723 + }, + { + "epoch": 2.18, + "learning_rate": 7.975404338456096e-06, + "loss": 2.1297, + "step": 10724 + }, + { + "epoch": 2.18, + "learning_rate": 7.973712804557498e-06, + "loss": 2.1967, + "step": 10725 + }, + { + "epoch": 2.18, + "learning_rate": 7.972021331117038e-06, + "loss": 2.1037, + "step": 10726 + }, + { + "epoch": 2.18, + "learning_rate": 7.970329918185175e-06, + "loss": 2.0677, + "step": 10727 + }, + { + "epoch": 2.18, + "learning_rate": 7.968638565812384e-06, + "loss": 2.1173, + "step": 10728 + }, + { + "epoch": 2.18, + "learning_rate": 7.966947274049121e-06, + "loss": 2.1048, + "step": 10729 + }, + { + "epoch": 2.18, + "learning_rate": 7.965256042945854e-06, + "loss": 2.1569, + "step": 10730 + }, + { + "epoch": 2.18, + "learning_rate": 7.963564872553046e-06, + "loss": 2.0956, + "step": 10731 + }, + { + "epoch": 2.18, + "learning_rate": 7.961873762921155e-06, + "loss": 2.1405, + "step": 10732 + }, + { + "epoch": 2.18, + "learning_rate": 7.960182714100635e-06, + "loss": 2.1151, + "step": 10733 + }, + { + "epoch": 2.18, + "learning_rate": 7.958491726141944e-06, + "loss": 2.0525, + "step": 10734 + }, + { + "epoch": 2.18, + "learning_rate": 7.956800799095536e-06, + "loss": 2.174, + "step": 10735 + }, + { + "epoch": 2.18, + "learning_rate": 7.955109933011862e-06, + "loss": 2.156, + "step": 10736 + }, + { + "epoch": 2.18, + "learning_rate": 7.953419127941375e-06, + "loss": 2.0773, + "step": 10737 + }, + { + "epoch": 2.18, + "learning_rate": 7.951728383934521e-06, + "loss": 2.1098, + "step": 10738 + }, + { + "epoch": 2.18, + "learning_rate": 7.950037701041748e-06, + "loss": 2.1607, + "step": 10739 + }, + { + "epoch": 2.18, + "learning_rate": 7.948347079313494e-06, + "loss": 2.1178, + "step": 10740 + }, + { + "epoch": 2.18, + "learning_rate": 7.946656518800214e-06, + "loss": 2.1466, + "step": 10741 + }, + { + "epoch": 2.18, + "learning_rate": 7.94496601955234e-06, + "loss": 2.1524, + "step": 10742 + }, + { + "epoch": 2.18, + "learning_rate": 7.943275581620317e-06, + "loss": 2.1043, + "step": 10743 + }, + { + "epoch": 2.18, + "learning_rate": 7.941585205054578e-06, + "loss": 2.1059, + "step": 10744 + }, + { + "epoch": 2.18, + "learning_rate": 7.939894889905557e-06, + "loss": 2.1736, + "step": 10745 + }, + { + "epoch": 2.18, + "learning_rate": 7.938204636223695e-06, + "loss": 2.1895, + "step": 10746 + }, + { + "epoch": 2.18, + "learning_rate": 7.936514444059417e-06, + "loss": 2.1325, + "step": 10747 + }, + { + "epoch": 2.18, + "learning_rate": 7.934824313463158e-06, + "loss": 2.1671, + "step": 10748 + }, + { + "epoch": 2.18, + "learning_rate": 7.933134244485342e-06, + "loss": 2.1274, + "step": 10749 + }, + { + "epoch": 2.18, + "learning_rate": 7.931444237176397e-06, + "loss": 2.1926, + "step": 10750 + }, + { + "epoch": 2.18, + "learning_rate": 7.929754291586751e-06, + "loss": 2.1097, + "step": 10751 + }, + { + "epoch": 2.18, + "learning_rate": 7.928064407766822e-06, + "loss": 2.1014, + "step": 10752 + }, + { + "epoch": 2.18, + "learning_rate": 7.926374585767032e-06, + "loss": 2.0504, + "step": 10753 + }, + { + "epoch": 2.18, + "learning_rate": 7.924684825637803e-06, + "loss": 2.1276, + "step": 10754 + }, + { + "epoch": 2.18, + "learning_rate": 7.922995127429547e-06, + "loss": 2.1967, + "step": 10755 + }, + { + "epoch": 2.18, + "learning_rate": 7.921305491192687e-06, + "loss": 2.0577, + "step": 10756 + }, + { + "epoch": 2.18, + "learning_rate": 7.919615916977627e-06, + "loss": 2.1288, + "step": 10757 + }, + { + "epoch": 2.18, + "learning_rate": 7.917926404834786e-06, + "loss": 2.176, + "step": 10758 + }, + { + "epoch": 2.18, + "learning_rate": 7.916236954814574e-06, + "loss": 2.1384, + "step": 10759 + }, + { + "epoch": 2.18, + "learning_rate": 7.91454756696739e-06, + "loss": 2.1318, + "step": 10760 + }, + { + "epoch": 2.18, + "learning_rate": 7.912858241343651e-06, + "loss": 2.1718, + "step": 10761 + }, + { + "epoch": 2.18, + "learning_rate": 7.911168977993755e-06, + "loss": 2.1468, + "step": 10762 + }, + { + "epoch": 2.18, + "learning_rate": 7.909479776968107e-06, + "loss": 2.1004, + "step": 10763 + }, + { + "epoch": 2.18, + "learning_rate": 7.907790638317107e-06, + "loss": 2.124, + "step": 10764 + }, + { + "epoch": 2.19, + "learning_rate": 7.90610156209115e-06, + "loss": 2.1158, + "step": 10765 + }, + { + "epoch": 2.19, + "learning_rate": 7.90441254834064e-06, + "loss": 2.0763, + "step": 10766 + }, + { + "epoch": 2.19, + "learning_rate": 7.902723597115967e-06, + "loss": 2.1227, + "step": 10767 + }, + { + "epoch": 2.19, + "learning_rate": 7.901034708467526e-06, + "loss": 2.2072, + "step": 10768 + }, + { + "epoch": 2.19, + "learning_rate": 7.899345882445703e-06, + "loss": 2.103, + "step": 10769 + }, + { + "epoch": 2.19, + "learning_rate": 7.897657119100894e-06, + "loss": 2.1247, + "step": 10770 + }, + { + "epoch": 2.19, + "learning_rate": 7.895968418483485e-06, + "loss": 2.1773, + "step": 10771 + }, + { + "epoch": 2.19, + "learning_rate": 7.894279780643861e-06, + "loss": 2.1189, + "step": 10772 + }, + { + "epoch": 2.19, + "learning_rate": 7.892591205632403e-06, + "loss": 2.1843, + "step": 10773 + }, + { + "epoch": 2.19, + "learning_rate": 7.890902693499496e-06, + "loss": 2.138, + "step": 10774 + }, + { + "epoch": 2.19, + "learning_rate": 7.88921424429552e-06, + "loss": 2.1379, + "step": 10775 + }, + { + "epoch": 2.19, + "learning_rate": 7.887525858070854e-06, + "loss": 2.1456, + "step": 10776 + }, + { + "epoch": 2.19, + "learning_rate": 7.88583753487587e-06, + "loss": 2.1021, + "step": 10777 + }, + { + "epoch": 2.19, + "learning_rate": 7.884149274760944e-06, + "loss": 2.1262, + "step": 10778 + }, + { + "epoch": 2.19, + "learning_rate": 7.88246107777645e-06, + "loss": 2.1415, + "step": 10779 + }, + { + "epoch": 2.19, + "learning_rate": 7.88077294397276e-06, + "loss": 2.1583, + "step": 10780 + }, + { + "epoch": 2.19, + "learning_rate": 7.879084873400238e-06, + "loss": 2.0818, + "step": 10781 + }, + { + "epoch": 2.19, + "learning_rate": 7.877396866109255e-06, + "loss": 2.1375, + "step": 10782 + }, + { + "epoch": 2.19, + "learning_rate": 7.875708922150175e-06, + "loss": 2.1792, + "step": 10783 + }, + { + "epoch": 2.19, + "learning_rate": 7.874021041573355e-06, + "loss": 2.1238, + "step": 10784 + }, + { + "epoch": 2.19, + "learning_rate": 7.872333224429166e-06, + "loss": 2.1305, + "step": 10785 + }, + { + "epoch": 2.19, + "learning_rate": 7.870645470767963e-06, + "loss": 2.0426, + "step": 10786 + }, + { + "epoch": 2.19, + "learning_rate": 7.868957780640104e-06, + "loss": 2.1162, + "step": 10787 + }, + { + "epoch": 2.19, + "learning_rate": 7.867270154095942e-06, + "loss": 2.0993, + "step": 10788 + }, + { + "epoch": 2.19, + "learning_rate": 7.865582591185828e-06, + "loss": 2.197, + "step": 10789 + }, + { + "epoch": 2.19, + "learning_rate": 7.863895091960121e-06, + "loss": 2.1553, + "step": 10790 + }, + { + "epoch": 2.19, + "learning_rate": 7.862207656469169e-06, + "loss": 2.1925, + "step": 10791 + }, + { + "epoch": 2.19, + "learning_rate": 7.860520284763317e-06, + "loss": 2.1584, + "step": 10792 + }, + { + "epoch": 2.19, + "learning_rate": 7.858832976892911e-06, + "loss": 2.1381, + "step": 10793 + }, + { + "epoch": 2.19, + "learning_rate": 7.857145732908296e-06, + "loss": 2.1818, + "step": 10794 + }, + { + "epoch": 2.19, + "learning_rate": 7.855458552859816e-06, + "loss": 2.0639, + "step": 10795 + }, + { + "epoch": 2.19, + "learning_rate": 7.85377143679781e-06, + "loss": 2.0552, + "step": 10796 + }, + { + "epoch": 2.19, + "learning_rate": 7.852084384772612e-06, + "loss": 2.1005, + "step": 10797 + }, + { + "epoch": 2.19, + "learning_rate": 7.850397396834565e-06, + "loss": 2.1236, + "step": 10798 + }, + { + "epoch": 2.19, + "learning_rate": 7.848710473034e-06, + "loss": 2.1201, + "step": 10799 + }, + { + "epoch": 2.19, + "learning_rate": 7.847023613421251e-06, + "loss": 2.0966, + "step": 10800 + }, + { + "epoch": 2.19, + "learning_rate": 7.845336818046649e-06, + "loss": 2.1764, + "step": 10801 + }, + { + "epoch": 2.19, + "learning_rate": 7.843650086960521e-06, + "loss": 2.0936, + "step": 10802 + }, + { + "epoch": 2.19, + "learning_rate": 7.841963420213195e-06, + "loss": 2.0509, + "step": 10803 + }, + { + "epoch": 2.19, + "learning_rate": 7.840276817854994e-06, + "loss": 2.1305, + "step": 10804 + }, + { + "epoch": 2.19, + "learning_rate": 7.838590279936243e-06, + "loss": 2.1482, + "step": 10805 + }, + { + "epoch": 2.19, + "learning_rate": 7.836903806507263e-06, + "loss": 2.0629, + "step": 10806 + }, + { + "epoch": 2.19, + "learning_rate": 7.835217397618372e-06, + "loss": 2.1739, + "step": 10807 + }, + { + "epoch": 2.19, + "learning_rate": 7.83353105331989e-06, + "loss": 2.0265, + "step": 10808 + }, + { + "epoch": 2.19, + "learning_rate": 7.831844773662125e-06, + "loss": 2.1588, + "step": 10809 + }, + { + "epoch": 2.19, + "learning_rate": 7.830158558695402e-06, + "loss": 2.1759, + "step": 10810 + }, + { + "epoch": 2.19, + "learning_rate": 7.828472408470024e-06, + "loss": 2.2264, + "step": 10811 + }, + { + "epoch": 2.19, + "learning_rate": 7.826786323036304e-06, + "loss": 2.1535, + "step": 10812 + }, + { + "epoch": 2.19, + "learning_rate": 7.825100302444546e-06, + "loss": 2.0396, + "step": 10813 + }, + { + "epoch": 2.2, + "learning_rate": 7.823414346745053e-06, + "loss": 2.0682, + "step": 10814 + }, + { + "epoch": 2.2, + "learning_rate": 7.82172845598814e-06, + "loss": 2.0994, + "step": 10815 + }, + { + "epoch": 2.2, + "learning_rate": 7.8200426302241e-06, + "loss": 2.1005, + "step": 10816 + }, + { + "epoch": 2.2, + "learning_rate": 7.818356869503235e-06, + "loss": 2.1466, + "step": 10817 + }, + { + "epoch": 2.2, + "learning_rate": 7.816671173875843e-06, + "loss": 2.0767, + "step": 10818 + }, + { + "epoch": 2.2, + "learning_rate": 7.81498554339222e-06, + "loss": 2.1828, + "step": 10819 + }, + { + "epoch": 2.2, + "learning_rate": 7.81329997810266e-06, + "loss": 2.0775, + "step": 10820 + }, + { + "epoch": 2.2, + "learning_rate": 7.811614478057453e-06, + "loss": 2.1364, + "step": 10821 + }, + { + "epoch": 2.2, + "learning_rate": 7.809929043306894e-06, + "loss": 2.0923, + "step": 10822 + }, + { + "epoch": 2.2, + "learning_rate": 7.808243673901266e-06, + "loss": 2.1495, + "step": 10823 + }, + { + "epoch": 2.2, + "learning_rate": 7.806558369890858e-06, + "loss": 2.0959, + "step": 10824 + }, + { + "epoch": 2.2, + "learning_rate": 7.804873131325951e-06, + "loss": 2.1529, + "step": 10825 + }, + { + "epoch": 2.2, + "learning_rate": 7.803187958256833e-06, + "loss": 2.1113, + "step": 10826 + }, + { + "epoch": 2.2, + "learning_rate": 7.801502850733782e-06, + "loss": 2.1252, + "step": 10827 + }, + { + "epoch": 2.2, + "learning_rate": 7.799817808807075e-06, + "loss": 2.1346, + "step": 10828 + }, + { + "epoch": 2.2, + "learning_rate": 7.798132832526983e-06, + "loss": 2.1155, + "step": 10829 + }, + { + "epoch": 2.2, + "learning_rate": 7.796447921943793e-06, + "loss": 2.1837, + "step": 10830 + }, + { + "epoch": 2.2, + "learning_rate": 7.794763077107771e-06, + "loss": 2.1771, + "step": 10831 + }, + { + "epoch": 2.2, + "learning_rate": 7.793078298069186e-06, + "loss": 2.085, + "step": 10832 + }, + { + "epoch": 2.2, + "learning_rate": 7.791393584878309e-06, + "loss": 2.0877, + "step": 10833 + }, + { + "epoch": 2.2, + "learning_rate": 7.789708937585402e-06, + "loss": 2.1639, + "step": 10834 + }, + { + "epoch": 2.2, + "learning_rate": 7.788024356240736e-06, + "loss": 2.0382, + "step": 10835 + }, + { + "epoch": 2.2, + "learning_rate": 7.786339840894573e-06, + "loss": 2.1256, + "step": 10836 + }, + { + "epoch": 2.2, + "learning_rate": 7.78465539159717e-06, + "loss": 2.0899, + "step": 10837 + }, + { + "epoch": 2.2, + "learning_rate": 7.782971008398786e-06, + "loss": 2.0323, + "step": 10838 + }, + { + "epoch": 2.2, + "learning_rate": 7.78128669134968e-06, + "loss": 2.1237, + "step": 10839 + }, + { + "epoch": 2.2, + "learning_rate": 7.779602440500107e-06, + "loss": 2.0284, + "step": 10840 + }, + { + "epoch": 2.2, + "learning_rate": 7.777918255900318e-06, + "loss": 2.1592, + "step": 10841 + }, + { + "epoch": 2.2, + "learning_rate": 7.776234137600568e-06, + "loss": 2.1694, + "step": 10842 + }, + { + "epoch": 2.2, + "learning_rate": 7.7745500856511e-06, + "loss": 2.1063, + "step": 10843 + }, + { + "epoch": 2.2, + "learning_rate": 7.772866100102164e-06, + "loss": 2.1115, + "step": 10844 + }, + { + "epoch": 2.2, + "learning_rate": 7.771182181004004e-06, + "loss": 2.1319, + "step": 10845 + }, + { + "epoch": 2.2, + "learning_rate": 7.769498328406864e-06, + "loss": 2.1717, + "step": 10846 + }, + { + "epoch": 2.2, + "learning_rate": 7.767814542360985e-06, + "loss": 2.0935, + "step": 10847 + }, + { + "epoch": 2.2, + "learning_rate": 7.766130822916607e-06, + "loss": 2.1607, + "step": 10848 + }, + { + "epoch": 2.2, + "learning_rate": 7.764447170123959e-06, + "loss": 2.1038, + "step": 10849 + }, + { + "epoch": 2.2, + "learning_rate": 7.762763584033286e-06, + "loss": 2.1179, + "step": 10850 + }, + { + "epoch": 2.2, + "learning_rate": 7.76108006469482e-06, + "loss": 2.0272, + "step": 10851 + }, + { + "epoch": 2.2, + "learning_rate": 7.759396612158788e-06, + "loss": 2.135, + "step": 10852 + }, + { + "epoch": 2.2, + "learning_rate": 7.757713226475418e-06, + "loss": 2.0995, + "step": 10853 + }, + { + "epoch": 2.2, + "learning_rate": 7.756029907694937e-06, + "loss": 2.0635, + "step": 10854 + }, + { + "epoch": 2.2, + "learning_rate": 7.754346655867578e-06, + "loss": 1.9844, + "step": 10855 + }, + { + "epoch": 2.2, + "learning_rate": 7.752663471043557e-06, + "loss": 2.1388, + "step": 10856 + }, + { + "epoch": 2.2, + "learning_rate": 7.750980353273096e-06, + "loss": 2.1429, + "step": 10857 + }, + { + "epoch": 2.2, + "learning_rate": 7.749297302606414e-06, + "loss": 2.0869, + "step": 10858 + }, + { + "epoch": 2.2, + "learning_rate": 7.747614319093725e-06, + "loss": 2.1314, + "step": 10859 + }, + { + "epoch": 2.2, + "learning_rate": 7.745931402785252e-06, + "loss": 2.068, + "step": 10860 + }, + { + "epoch": 2.2, + "learning_rate": 7.744248553731203e-06, + "loss": 2.079, + "step": 10861 + }, + { + "epoch": 2.2, + "learning_rate": 7.742565771981786e-06, + "loss": 2.0217, + "step": 10862 + }, + { + "epoch": 2.21, + "learning_rate": 7.740883057587216e-06, + "loss": 2.0825, + "step": 10863 + }, + { + "epoch": 2.21, + "learning_rate": 7.739200410597699e-06, + "loss": 2.1108, + "step": 10864 + }, + { + "epoch": 2.21, + "learning_rate": 7.737517831063435e-06, + "loss": 2.0837, + "step": 10865 + }, + { + "epoch": 2.21, + "learning_rate": 7.735835319034633e-06, + "loss": 2.0649, + "step": 10866 + }, + { + "epoch": 2.21, + "learning_rate": 7.734152874561491e-06, + "loss": 2.157, + "step": 10867 + }, + { + "epoch": 2.21, + "learning_rate": 7.732470497694207e-06, + "loss": 2.205, + "step": 10868 + }, + { + "epoch": 2.21, + "learning_rate": 7.730788188482979e-06, + "loss": 2.1219, + "step": 10869 + }, + { + "epoch": 2.21, + "learning_rate": 7.729105946978002e-06, + "loss": 2.1262, + "step": 10870 + }, + { + "epoch": 2.21, + "learning_rate": 7.72742377322947e-06, + "loss": 2.1797, + "step": 10871 + }, + { + "epoch": 2.21, + "learning_rate": 7.725741667287574e-06, + "loss": 2.1041, + "step": 10872 + }, + { + "epoch": 2.21, + "learning_rate": 7.724059629202498e-06, + "loss": 2.0575, + "step": 10873 + }, + { + "epoch": 2.21, + "learning_rate": 7.722377659024433e-06, + "loss": 2.2173, + "step": 10874 + }, + { + "epoch": 2.21, + "learning_rate": 7.720695756803567e-06, + "loss": 2.1748, + "step": 10875 + }, + { + "epoch": 2.21, + "learning_rate": 7.719013922590077e-06, + "loss": 2.1041, + "step": 10876 + }, + { + "epoch": 2.21, + "learning_rate": 7.717332156434147e-06, + "loss": 2.1426, + "step": 10877 + }, + { + "epoch": 2.21, + "learning_rate": 7.715650458385948e-06, + "loss": 2.1475, + "step": 10878 + }, + { + "epoch": 2.21, + "learning_rate": 7.713968828495669e-06, + "loss": 2.1259, + "step": 10879 + }, + { + "epoch": 2.21, + "learning_rate": 7.712287266813478e-06, + "loss": 2.1777, + "step": 10880 + }, + { + "epoch": 2.21, + "learning_rate": 7.710605773389547e-06, + "loss": 2.0068, + "step": 10881 + }, + { + "epoch": 2.21, + "learning_rate": 7.708924348274048e-06, + "loss": 2.0958, + "step": 10882 + }, + { + "epoch": 2.21, + "learning_rate": 7.707242991517147e-06, + "loss": 2.1147, + "step": 10883 + }, + { + "epoch": 2.21, + "learning_rate": 7.705561703169017e-06, + "loss": 2.163, + "step": 10884 + }, + { + "epoch": 2.21, + "learning_rate": 7.703880483279818e-06, + "loss": 2.0945, + "step": 10885 + }, + { + "epoch": 2.21, + "learning_rate": 7.702199331899709e-06, + "loss": 2.0775, + "step": 10886 + }, + { + "epoch": 2.21, + "learning_rate": 7.700518249078857e-06, + "loss": 2.1535, + "step": 10887 + }, + { + "epoch": 2.21, + "learning_rate": 7.698837234867416e-06, + "loss": 2.1087, + "step": 10888 + }, + { + "epoch": 2.21, + "learning_rate": 7.697156289315541e-06, + "loss": 2.1749, + "step": 10889 + }, + { + "epoch": 2.21, + "learning_rate": 7.695475412473393e-06, + "loss": 2.0949, + "step": 10890 + }, + { + "epoch": 2.21, + "learning_rate": 7.69379460439112e-06, + "loss": 2.0933, + "step": 10891 + }, + { + "epoch": 2.21, + "learning_rate": 7.69211386511887e-06, + "loss": 2.1197, + "step": 10892 + }, + { + "epoch": 2.21, + "learning_rate": 7.69043319470679e-06, + "loss": 2.1773, + "step": 10893 + }, + { + "epoch": 2.21, + "learning_rate": 7.688752593205034e-06, + "loss": 2.0703, + "step": 10894 + }, + { + "epoch": 2.21, + "learning_rate": 7.687072060663739e-06, + "loss": 2.0877, + "step": 10895 + }, + { + "epoch": 2.21, + "learning_rate": 7.68539159713305e-06, + "loss": 2.0969, + "step": 10896 + }, + { + "epoch": 2.21, + "learning_rate": 7.683711202663106e-06, + "loss": 2.1516, + "step": 10897 + }, + { + "epoch": 2.21, + "learning_rate": 7.68203087730404e-06, + "loss": 2.0725, + "step": 10898 + }, + { + "epoch": 2.21, + "learning_rate": 7.680350621105996e-06, + "loss": 2.147, + "step": 10899 + }, + { + "epoch": 2.21, + "learning_rate": 7.678670434119105e-06, + "loss": 2.1115, + "step": 10900 + }, + { + "epoch": 2.21, + "learning_rate": 7.676990316393496e-06, + "loss": 2.0136, + "step": 10901 + }, + { + "epoch": 2.21, + "learning_rate": 7.6753102679793e-06, + "loss": 2.1238, + "step": 10902 + }, + { + "epoch": 2.21, + "learning_rate": 7.67363028892664e-06, + "loss": 2.1705, + "step": 10903 + }, + { + "epoch": 2.21, + "learning_rate": 7.671950379285652e-06, + "loss": 2.1303, + "step": 10904 + }, + { + "epoch": 2.21, + "learning_rate": 7.670270539106452e-06, + "loss": 2.1597, + "step": 10905 + }, + { + "epoch": 2.21, + "learning_rate": 7.66859076843916e-06, + "loss": 2.155, + "step": 10906 + }, + { + "epoch": 2.21, + "learning_rate": 7.666911067333898e-06, + "loss": 2.0453, + "step": 10907 + }, + { + "epoch": 2.21, + "learning_rate": 7.665231435840782e-06, + "loss": 2.0768, + "step": 10908 + }, + { + "epoch": 2.21, + "learning_rate": 7.663551874009928e-06, + "loss": 2.0729, + "step": 10909 + }, + { + "epoch": 2.21, + "learning_rate": 7.661872381891447e-06, + "loss": 2.0827, + "step": 10910 + }, + { + "epoch": 2.21, + "learning_rate": 7.660192959535455e-06, + "loss": 2.093, + "step": 10911 + }, + { + "epoch": 2.21, + "learning_rate": 7.658513606992055e-06, + "loss": 2.0764, + "step": 10912 + }, + { + "epoch": 2.22, + "learning_rate": 7.656834324311352e-06, + "loss": 2.1403, + "step": 10913 + }, + { + "epoch": 2.22, + "learning_rate": 7.65515511154346e-06, + "loss": 2.1433, + "step": 10914 + }, + { + "epoch": 2.22, + "learning_rate": 7.653475968738474e-06, + "loss": 2.0066, + "step": 10915 + }, + { + "epoch": 2.22, + "learning_rate": 7.651796895946495e-06, + "loss": 2.1203, + "step": 10916 + }, + { + "epoch": 2.22, + "learning_rate": 7.650117893217623e-06, + "loss": 2.1147, + "step": 10917 + }, + { + "epoch": 2.22, + "learning_rate": 7.648438960601951e-06, + "loss": 2.1756, + "step": 10918 + }, + { + "epoch": 2.22, + "learning_rate": 7.646760098149578e-06, + "loss": 2.1475, + "step": 10919 + }, + { + "epoch": 2.22, + "learning_rate": 7.645081305910596e-06, + "loss": 2.0894, + "step": 10920 + }, + { + "epoch": 2.22, + "learning_rate": 7.643402583935094e-06, + "loss": 2.1905, + "step": 10921 + }, + { + "epoch": 2.22, + "learning_rate": 7.641723932273154e-06, + "loss": 2.1867, + "step": 10922 + }, + { + "epoch": 2.22, + "learning_rate": 7.640045350974867e-06, + "loss": 2.1467, + "step": 10923 + }, + { + "epoch": 2.22, + "learning_rate": 7.63836684009032e-06, + "loss": 2.1155, + "step": 10924 + }, + { + "epoch": 2.22, + "learning_rate": 7.636688399669589e-06, + "loss": 2.1921, + "step": 10925 + }, + { + "epoch": 2.22, + "learning_rate": 7.635010029762754e-06, + "loss": 2.08, + "step": 10926 + }, + { + "epoch": 2.22, + "learning_rate": 7.633331730419895e-06, + "loss": 2.098, + "step": 10927 + }, + { + "epoch": 2.22, + "learning_rate": 7.631653501691086e-06, + "loss": 2.1033, + "step": 10928 + }, + { + "epoch": 2.22, + "learning_rate": 7.6299753436264e-06, + "loss": 2.0978, + "step": 10929 + }, + { + "epoch": 2.22, + "learning_rate": 7.628297256275905e-06, + "loss": 2.1351, + "step": 10930 + }, + { + "epoch": 2.22, + "learning_rate": 7.626619239689676e-06, + "loss": 2.1739, + "step": 10931 + }, + { + "epoch": 2.22, + "learning_rate": 7.624941293917778e-06, + "loss": 2.0805, + "step": 10932 + }, + { + "epoch": 2.22, + "learning_rate": 7.623263419010272e-06, + "loss": 2.0983, + "step": 10933 + }, + { + "epoch": 2.22, + "learning_rate": 7.621585615017222e-06, + "loss": 2.061, + "step": 10934 + }, + { + "epoch": 2.22, + "learning_rate": 7.619907881988692e-06, + "loss": 2.1099, + "step": 10935 + }, + { + "epoch": 2.22, + "learning_rate": 7.618230219974738e-06, + "loss": 2.1248, + "step": 10936 + }, + { + "epoch": 2.22, + "learning_rate": 7.616552629025415e-06, + "loss": 2.0215, + "step": 10937 + }, + { + "epoch": 2.22, + "learning_rate": 7.6148751091907755e-06, + "loss": 2.1009, + "step": 10938 + }, + { + "epoch": 2.22, + "learning_rate": 7.613197660520879e-06, + "loss": 2.0564, + "step": 10939 + }, + { + "epoch": 2.22, + "learning_rate": 7.6115202830657694e-06, + "loss": 2.0699, + "step": 10940 + }, + { + "epoch": 2.22, + "learning_rate": 7.609842976875497e-06, + "loss": 2.0683, + "step": 10941 + }, + { + "epoch": 2.22, + "learning_rate": 7.608165742000104e-06, + "loss": 2.0475, + "step": 10942 + }, + { + "epoch": 2.22, + "learning_rate": 7.606488578489634e-06, + "loss": 2.0849, + "step": 10943 + }, + { + "epoch": 2.22, + "learning_rate": 7.604811486394133e-06, + "loss": 2.1097, + "step": 10944 + }, + { + "epoch": 2.22, + "learning_rate": 7.6031344657636376e-06, + "loss": 2.1002, + "step": 10945 + }, + { + "epoch": 2.22, + "learning_rate": 7.601457516648184e-06, + "loss": 2.1304, + "step": 10946 + }, + { + "epoch": 2.22, + "learning_rate": 7.599780639097809e-06, + "loss": 2.1268, + "step": 10947 + }, + { + "epoch": 2.22, + "learning_rate": 7.598103833162542e-06, + "loss": 2.0417, + "step": 10948 + }, + { + "epoch": 2.22, + "learning_rate": 7.596427098892421e-06, + "loss": 2.1214, + "step": 10949 + }, + { + "epoch": 2.22, + "learning_rate": 7.594750436337465e-06, + "loss": 2.1092, + "step": 10950 + }, + { + "epoch": 2.22, + "learning_rate": 7.593073845547707e-06, + "loss": 2.1885, + "step": 10951 + }, + { + "epoch": 2.22, + "learning_rate": 7.591397326573171e-06, + "loss": 2.1242, + "step": 10952 + }, + { + "epoch": 2.22, + "learning_rate": 7.589720879463875e-06, + "loss": 2.0945, + "step": 10953 + }, + { + "epoch": 2.22, + "learning_rate": 7.588044504269841e-06, + "loss": 2.1152, + "step": 10954 + }, + { + "epoch": 2.22, + "learning_rate": 7.58636820104109e-06, + "loss": 2.082, + "step": 10955 + }, + { + "epoch": 2.22, + "learning_rate": 7.584691969827635e-06, + "loss": 2.155, + "step": 10956 + }, + { + "epoch": 2.22, + "learning_rate": 7.583015810679487e-06, + "loss": 2.077, + "step": 10957 + }, + { + "epoch": 2.22, + "learning_rate": 7.5813397236466605e-06, + "loss": 2.0839, + "step": 10958 + }, + { + "epoch": 2.22, + "learning_rate": 7.579663708779166e-06, + "loss": 2.2025, + "step": 10959 + }, + { + "epoch": 2.22, + "learning_rate": 7.577987766127009e-06, + "loss": 2.1039, + "step": 10960 + }, + { + "epoch": 2.22, + "learning_rate": 7.576311895740195e-06, + "loss": 2.1044, + "step": 10961 + }, + { + "epoch": 2.23, + "learning_rate": 7.574636097668722e-06, + "loss": 2.0789, + "step": 10962 + }, + { + "epoch": 2.23, + "learning_rate": 7.5729603719625976e-06, + "loss": 2.1221, + "step": 10963 + }, + { + "epoch": 2.23, + "learning_rate": 7.571284718671819e-06, + "loss": 2.0712, + "step": 10964 + }, + { + "epoch": 2.23, + "learning_rate": 7.569609137846379e-06, + "loss": 2.0581, + "step": 10965 + }, + { + "epoch": 2.23, + "learning_rate": 7.567933629536275e-06, + "loss": 2.0764, + "step": 10966 + }, + { + "epoch": 2.23, + "learning_rate": 7.566258193791492e-06, + "loss": 2.1967, + "step": 10967 + }, + { + "epoch": 2.23, + "learning_rate": 7.564582830662031e-06, + "loss": 2.07, + "step": 10968 + }, + { + "epoch": 2.23, + "learning_rate": 7.5629075401978735e-06, + "loss": 1.9931, + "step": 10969 + }, + { + "epoch": 2.23, + "learning_rate": 7.5612323224490035e-06, + "loss": 2.2183, + "step": 10970 + }, + { + "epoch": 2.23, + "learning_rate": 7.559557177465409e-06, + "loss": 2.1422, + "step": 10971 + }, + { + "epoch": 2.23, + "learning_rate": 7.557882105297065e-06, + "loss": 2.0973, + "step": 10972 + }, + { + "epoch": 2.23, + "learning_rate": 7.556207105993957e-06, + "loss": 2.0402, + "step": 10973 + }, + { + "epoch": 2.23, + "learning_rate": 7.554532179606056e-06, + "loss": 2.1595, + "step": 10974 + }, + { + "epoch": 2.23, + "learning_rate": 7.5528573261833425e-06, + "loss": 2.1736, + "step": 10975 + }, + { + "epoch": 2.23, + "learning_rate": 7.551182545775785e-06, + "loss": 2.0805, + "step": 10976 + }, + { + "epoch": 2.23, + "learning_rate": 7.549507838433352e-06, + "loss": 2.1768, + "step": 10977 + }, + { + "epoch": 2.23, + "learning_rate": 7.547833204206014e-06, + "loss": 2.1395, + "step": 10978 + }, + { + "epoch": 2.23, + "learning_rate": 7.5461586431437405e-06, + "loss": 2.0588, + "step": 10979 + }, + { + "epoch": 2.23, + "learning_rate": 7.544484155296492e-06, + "loss": 2.0485, + "step": 10980 + }, + { + "epoch": 2.23, + "learning_rate": 7.542809740714228e-06, + "loss": 2.1517, + "step": 10981 + }, + { + "epoch": 2.23, + "learning_rate": 7.541135399446906e-06, + "loss": 2.0678, + "step": 10982 + }, + { + "epoch": 2.23, + "learning_rate": 7.539461131544492e-06, + "loss": 2.0432, + "step": 10983 + }, + { + "epoch": 2.23, + "learning_rate": 7.537786937056935e-06, + "loss": 2.1912, + "step": 10984 + }, + { + "epoch": 2.23, + "learning_rate": 7.5361128160341876e-06, + "loss": 2.1815, + "step": 10985 + }, + { + "epoch": 2.23, + "learning_rate": 7.5344387685262025e-06, + "loss": 2.1272, + "step": 10986 + }, + { + "epoch": 2.23, + "learning_rate": 7.532764794582922e-06, + "loss": 2.0933, + "step": 10987 + }, + { + "epoch": 2.23, + "learning_rate": 7.531090894254301e-06, + "loss": 2.1369, + "step": 10988 + }, + { + "epoch": 2.23, + "learning_rate": 7.529417067590281e-06, + "loss": 1.9973, + "step": 10989 + }, + { + "epoch": 2.23, + "learning_rate": 7.527743314640802e-06, + "loss": 2.1057, + "step": 10990 + }, + { + "epoch": 2.23, + "learning_rate": 7.526069635455802e-06, + "loss": 2.1142, + "step": 10991 + }, + { + "epoch": 2.23, + "learning_rate": 7.5243960300852205e-06, + "loss": 2.1849, + "step": 10992 + }, + { + "epoch": 2.23, + "learning_rate": 7.522722498578994e-06, + "loss": 2.0906, + "step": 10993 + }, + { + "epoch": 2.23, + "learning_rate": 7.521049040987053e-06, + "loss": 2.0914, + "step": 10994 + }, + { + "epoch": 2.23, + "learning_rate": 7.519375657359331e-06, + "loss": 2.1333, + "step": 10995 + }, + { + "epoch": 2.23, + "learning_rate": 7.517702347745756e-06, + "loss": 2.1025, + "step": 10996 + }, + { + "epoch": 2.23, + "learning_rate": 7.51602911219625e-06, + "loss": 2.1607, + "step": 10997 + }, + { + "epoch": 2.23, + "learning_rate": 7.514355950760741e-06, + "loss": 2.0865, + "step": 10998 + }, + { + "epoch": 2.23, + "learning_rate": 7.512682863489153e-06, + "loss": 2.0901, + "step": 10999 + }, + { + "epoch": 2.23, + "learning_rate": 7.511009850431403e-06, + "loss": 2.17, + "step": 11000 + }, + { + "epoch": 2.23, + "learning_rate": 7.509336911637409e-06, + "loss": 2.1335, + "step": 11001 + }, + { + "epoch": 2.23, + "learning_rate": 7.507664047157081e-06, + "loss": 2.1948, + "step": 11002 + }, + { + "epoch": 2.23, + "learning_rate": 7.505991257040344e-06, + "loss": 2.094, + "step": 11003 + }, + { + "epoch": 2.23, + "learning_rate": 7.5043185413371e-06, + "loss": 2.1053, + "step": 11004 + }, + { + "epoch": 2.23, + "learning_rate": 7.502645900097261e-06, + "loss": 2.1809, + "step": 11005 + }, + { + "epoch": 2.23, + "learning_rate": 7.5009733333707315e-06, + "loss": 2.1365, + "step": 11006 + }, + { + "epoch": 2.23, + "learning_rate": 7.499300841207411e-06, + "loss": 2.0959, + "step": 11007 + }, + { + "epoch": 2.23, + "learning_rate": 7.497628423657213e-06, + "loss": 2.0847, + "step": 11008 + }, + { + "epoch": 2.23, + "learning_rate": 7.495956080770031e-06, + "loss": 2.1522, + "step": 11009 + }, + { + "epoch": 2.23, + "learning_rate": 7.49428381259576e-06, + "loss": 2.1394, + "step": 11010 + }, + { + "epoch": 2.24, + "learning_rate": 7.492611619184299e-06, + "loss": 2.1091, + "step": 11011 + }, + { + "epoch": 2.24, + "learning_rate": 7.490939500585539e-06, + "loss": 2.0457, + "step": 11012 + }, + { + "epoch": 2.24, + "learning_rate": 7.489267456849374e-06, + "loss": 2.1171, + "step": 11013 + }, + { + "epoch": 2.24, + "learning_rate": 7.4875954880256895e-06, + "loss": 2.1057, + "step": 11014 + }, + { + "epoch": 2.24, + "learning_rate": 7.485923594164372e-06, + "loss": 2.1372, + "step": 11015 + }, + { + "epoch": 2.24, + "learning_rate": 7.484251775315309e-06, + "loss": 2.0528, + "step": 11016 + }, + { + "epoch": 2.24, + "learning_rate": 7.482580031528377e-06, + "loss": 2.1034, + "step": 11017 + }, + { + "epoch": 2.24, + "learning_rate": 7.480908362853459e-06, + "loss": 2.1249, + "step": 11018 + }, + { + "epoch": 2.24, + "learning_rate": 7.479236769340433e-06, + "loss": 2.06, + "step": 11019 + }, + { + "epoch": 2.24, + "learning_rate": 7.477565251039174e-06, + "loss": 2.2161, + "step": 11020 + }, + { + "epoch": 2.24, + "learning_rate": 7.475893807999554e-06, + "loss": 2.0763, + "step": 11021 + }, + { + "epoch": 2.24, + "learning_rate": 7.474222440271439e-06, + "loss": 2.1505, + "step": 11022 + }, + { + "epoch": 2.24, + "learning_rate": 7.472551147904708e-06, + "loss": 2.1077, + "step": 11023 + }, + { + "epoch": 2.24, + "learning_rate": 7.47087993094922e-06, + "loss": 2.0472, + "step": 11024 + }, + { + "epoch": 2.24, + "learning_rate": 7.469208789454841e-06, + "loss": 2.1494, + "step": 11025 + }, + { + "epoch": 2.24, + "learning_rate": 7.4675377234714295e-06, + "loss": 2.1926, + "step": 11026 + }, + { + "epoch": 2.24, + "learning_rate": 7.465866733048845e-06, + "loss": 2.1089, + "step": 11027 + }, + { + "epoch": 2.24, + "learning_rate": 7.464195818236952e-06, + "loss": 2.1345, + "step": 11028 + }, + { + "epoch": 2.24, + "learning_rate": 7.462524979085601e-06, + "loss": 2.1525, + "step": 11029 + }, + { + "epoch": 2.24, + "learning_rate": 7.4608542156446425e-06, + "loss": 2.1222, + "step": 11030 + }, + { + "epoch": 2.24, + "learning_rate": 7.459183527963928e-06, + "loss": 2.1103, + "step": 11031 + }, + { + "epoch": 2.24, + "learning_rate": 7.457512916093306e-06, + "loss": 2.0664, + "step": 11032 + }, + { + "epoch": 2.24, + "learning_rate": 7.455842380082623e-06, + "loss": 2.1938, + "step": 11033 + }, + { + "epoch": 2.24, + "learning_rate": 7.454171919981724e-06, + "loss": 2.1838, + "step": 11034 + }, + { + "epoch": 2.24, + "learning_rate": 7.452501535840446e-06, + "loss": 2.1076, + "step": 11035 + }, + { + "epoch": 2.24, + "learning_rate": 7.450831227708633e-06, + "loss": 2.1131, + "step": 11036 + }, + { + "epoch": 2.24, + "learning_rate": 7.4491609956361165e-06, + "loss": 2.1737, + "step": 11037 + }, + { + "epoch": 2.24, + "learning_rate": 7.447490839672737e-06, + "loss": 2.162, + "step": 11038 + }, + { + "epoch": 2.24, + "learning_rate": 7.445820759868322e-06, + "loss": 2.1614, + "step": 11039 + }, + { + "epoch": 2.24, + "learning_rate": 7.444150756272704e-06, + "loss": 2.097, + "step": 11040 + }, + { + "epoch": 2.24, + "learning_rate": 7.442480828935712e-06, + "loss": 2.0839, + "step": 11041 + }, + { + "epoch": 2.24, + "learning_rate": 7.440810977907165e-06, + "loss": 2.1325, + "step": 11042 + }, + { + "epoch": 2.24, + "learning_rate": 7.439141203236892e-06, + "loss": 2.1711, + "step": 11043 + }, + { + "epoch": 2.24, + "learning_rate": 7.437471504974716e-06, + "loss": 2.054, + "step": 11044 + }, + { + "epoch": 2.24, + "learning_rate": 7.435801883170451e-06, + "loss": 2.1383, + "step": 11045 + }, + { + "epoch": 2.24, + "learning_rate": 7.434132337873911e-06, + "loss": 2.1652, + "step": 11046 + }, + { + "epoch": 2.24, + "learning_rate": 7.432462869134917e-06, + "loss": 2.0565, + "step": 11047 + }, + { + "epoch": 2.24, + "learning_rate": 7.430793477003279e-06, + "loss": 2.1055, + "step": 11048 + }, + { + "epoch": 2.24, + "learning_rate": 7.429124161528803e-06, + "loss": 2.0986, + "step": 11049 + }, + { + "epoch": 2.24, + "learning_rate": 7.427454922761299e-06, + "loss": 2.1138, + "step": 11050 + }, + { + "epoch": 2.24, + "learning_rate": 7.425785760750567e-06, + "loss": 2.0906, + "step": 11051 + }, + { + "epoch": 2.24, + "learning_rate": 7.424116675546418e-06, + "loss": 2.1004, + "step": 11052 + }, + { + "epoch": 2.24, + "learning_rate": 7.422447667198647e-06, + "loss": 2.1065, + "step": 11053 + }, + { + "epoch": 2.24, + "learning_rate": 7.420778735757054e-06, + "loss": 2.1504, + "step": 11054 + }, + { + "epoch": 2.24, + "learning_rate": 7.419109881271431e-06, + "loss": 2.1513, + "step": 11055 + }, + { + "epoch": 2.24, + "learning_rate": 7.4174411037915735e-06, + "loss": 2.1203, + "step": 11056 + }, + { + "epoch": 2.24, + "learning_rate": 7.415772403367277e-06, + "loss": 2.1098, + "step": 11057 + }, + { + "epoch": 2.24, + "learning_rate": 7.414103780048327e-06, + "loss": 2.2936, + "step": 11058 + }, + { + "epoch": 2.24, + "learning_rate": 7.412435233884507e-06, + "loss": 2.0916, + "step": 11059 + }, + { + "epoch": 2.25, + "learning_rate": 7.410766764925606e-06, + "loss": 2.1333, + "step": 11060 + }, + { + "epoch": 2.25, + "learning_rate": 7.4090983732214016e-06, + "loss": 2.1167, + "step": 11061 + }, + { + "epoch": 2.25, + "learning_rate": 7.407430058821679e-06, + "loss": 2.1582, + "step": 11062 + }, + { + "epoch": 2.25, + "learning_rate": 7.405761821776208e-06, + "loss": 2.1318, + "step": 11063 + }, + { + "epoch": 2.25, + "learning_rate": 7.404093662134772e-06, + "loss": 2.107, + "step": 11064 + }, + { + "epoch": 2.25, + "learning_rate": 7.402425579947139e-06, + "loss": 2.0496, + "step": 11065 + }, + { + "epoch": 2.25, + "learning_rate": 7.400757575263075e-06, + "loss": 2.1818, + "step": 11066 + }, + { + "epoch": 2.25, + "learning_rate": 7.39908964813236e-06, + "loss": 2.1725, + "step": 11067 + }, + { + "epoch": 2.25, + "learning_rate": 7.397421798604751e-06, + "loss": 2.181, + "step": 11068 + }, + { + "epoch": 2.25, + "learning_rate": 7.395754026730013e-06, + "loss": 2.1454, + "step": 11069 + }, + { + "epoch": 2.25, + "learning_rate": 7.394086332557909e-06, + "loss": 2.089, + "step": 11070 + }, + { + "epoch": 2.25, + "learning_rate": 7.392418716138192e-06, + "loss": 2.1495, + "step": 11071 + }, + { + "epoch": 2.25, + "learning_rate": 7.390751177520626e-06, + "loss": 2.1039, + "step": 11072 + }, + { + "epoch": 2.25, + "learning_rate": 7.389083716754964e-06, + "loss": 2.0505, + "step": 11073 + }, + { + "epoch": 2.25, + "learning_rate": 7.387416333890953e-06, + "loss": 2.1542, + "step": 11074 + }, + { + "epoch": 2.25, + "learning_rate": 7.385749028978347e-06, + "loss": 2.1528, + "step": 11075 + }, + { + "epoch": 2.25, + "learning_rate": 7.384081802066889e-06, + "loss": 2.1417, + "step": 11076 + }, + { + "epoch": 2.25, + "learning_rate": 7.382414653206329e-06, + "loss": 2.1108, + "step": 11077 + }, + { + "epoch": 2.25, + "learning_rate": 7.380747582446408e-06, + "loss": 2.188, + "step": 11078 + }, + { + "epoch": 2.25, + "learning_rate": 7.379080589836863e-06, + "loss": 2.1218, + "step": 11079 + }, + { + "epoch": 2.25, + "learning_rate": 7.377413675427436e-06, + "loss": 2.0864, + "step": 11080 + }, + { + "epoch": 2.25, + "learning_rate": 7.375746839267859e-06, + "loss": 2.1708, + "step": 11081 + }, + { + "epoch": 2.25, + "learning_rate": 7.3740800814078696e-06, + "loss": 2.122, + "step": 11082 + }, + { + "epoch": 2.25, + "learning_rate": 7.372413401897194e-06, + "loss": 2.0988, + "step": 11083 + }, + { + "epoch": 2.25, + "learning_rate": 7.3707468007855646e-06, + "loss": 2.0336, + "step": 11084 + }, + { + "epoch": 2.25, + "learning_rate": 7.3690802781227056e-06, + "loss": 2.1513, + "step": 11085 + }, + { + "epoch": 2.25, + "learning_rate": 7.367413833958339e-06, + "loss": 2.1254, + "step": 11086 + }, + { + "epoch": 2.25, + "learning_rate": 7.365747468342191e-06, + "loss": 2.0932, + "step": 11087 + }, + { + "epoch": 2.25, + "learning_rate": 7.364081181323976e-06, + "loss": 2.1274, + "step": 11088 + }, + { + "epoch": 2.25, + "learning_rate": 7.362414972953417e-06, + "loss": 2.1655, + "step": 11089 + }, + { + "epoch": 2.25, + "learning_rate": 7.360748843280223e-06, + "loss": 2.1007, + "step": 11090 + }, + { + "epoch": 2.25, + "learning_rate": 7.3590827923541045e-06, + "loss": 2.1136, + "step": 11091 + }, + { + "epoch": 2.25, + "learning_rate": 7.357416820224778e-06, + "loss": 2.0401, + "step": 11092 + }, + { + "epoch": 2.25, + "learning_rate": 7.355750926941947e-06, + "loss": 2.1738, + "step": 11093 + }, + { + "epoch": 2.25, + "learning_rate": 7.354085112555318e-06, + "loss": 2.1193, + "step": 11094 + }, + { + "epoch": 2.25, + "learning_rate": 7.352419377114594e-06, + "loss": 2.0402, + "step": 11095 + }, + { + "epoch": 2.25, + "learning_rate": 7.3507537206694676e-06, + "loss": 2.1023, + "step": 11096 + }, + { + "epoch": 2.25, + "learning_rate": 7.349088143269649e-06, + "loss": 2.1225, + "step": 11097 + }, + { + "epoch": 2.25, + "learning_rate": 7.347422644964829e-06, + "loss": 2.0881, + "step": 11098 + }, + { + "epoch": 2.25, + "learning_rate": 7.345757225804698e-06, + "loss": 2.0961, + "step": 11099 + }, + { + "epoch": 2.25, + "learning_rate": 7.344091885838951e-06, + "loss": 2.1261, + "step": 11100 + }, + { + "epoch": 2.25, + "learning_rate": 7.342426625117271e-06, + "loss": 2.1662, + "step": 11101 + }, + { + "epoch": 2.25, + "learning_rate": 7.340761443689353e-06, + "loss": 2.1358, + "step": 11102 + }, + { + "epoch": 2.25, + "learning_rate": 7.339096341604872e-06, + "loss": 2.0313, + "step": 11103 + }, + { + "epoch": 2.25, + "learning_rate": 7.337431318913516e-06, + "loss": 2.0807, + "step": 11104 + }, + { + "epoch": 2.25, + "learning_rate": 7.335766375664961e-06, + "loss": 2.0546, + "step": 11105 + }, + { + "epoch": 2.25, + "learning_rate": 7.334101511908883e-06, + "loss": 2.1128, + "step": 11106 + }, + { + "epoch": 2.25, + "learning_rate": 7.3324367276949586e-06, + "loss": 2.1589, + "step": 11107 + }, + { + "epoch": 2.25, + "learning_rate": 7.330772023072858e-06, + "loss": 2.1586, + "step": 11108 + }, + { + "epoch": 2.25, + "learning_rate": 7.329107398092254e-06, + "loss": 2.1369, + "step": 11109 + }, + { + "epoch": 2.26, + "learning_rate": 7.3274428528028106e-06, + "loss": 2.0961, + "step": 11110 + }, + { + "epoch": 2.26, + "learning_rate": 7.32577838725419e-06, + "loss": 2.118, + "step": 11111 + }, + { + "epoch": 2.26, + "learning_rate": 7.324114001496061e-06, + "loss": 2.0462, + "step": 11112 + }, + { + "epoch": 2.26, + "learning_rate": 7.322449695578082e-06, + "loss": 2.1066, + "step": 11113 + }, + { + "epoch": 2.26, + "learning_rate": 7.320785469549911e-06, + "loss": 2.1685, + "step": 11114 + }, + { + "epoch": 2.26, + "learning_rate": 7.3191213234612004e-06, + "loss": 2.0681, + "step": 11115 + }, + { + "epoch": 2.26, + "learning_rate": 7.317457257361601e-06, + "loss": 2.16, + "step": 11116 + }, + { + "epoch": 2.26, + "learning_rate": 7.315793271300771e-06, + "loss": 2.1198, + "step": 11117 + }, + { + "epoch": 2.26, + "learning_rate": 7.314129365328355e-06, + "loss": 2.1155, + "step": 11118 + }, + { + "epoch": 2.26, + "learning_rate": 7.312465539494e-06, + "loss": 2.1155, + "step": 11119 + }, + { + "epoch": 2.26, + "learning_rate": 7.310801793847344e-06, + "loss": 2.1306, + "step": 11120 + }, + { + "epoch": 2.26, + "learning_rate": 7.309138128438033e-06, + "loss": 2.083, + "step": 11121 + }, + { + "epoch": 2.26, + "learning_rate": 7.3074745433157065e-06, + "loss": 2.054, + "step": 11122 + }, + { + "epoch": 2.26, + "learning_rate": 7.305811038529997e-06, + "loss": 2.1186, + "step": 11123 + }, + { + "epoch": 2.26, + "learning_rate": 7.304147614130543e-06, + "loss": 2.1239, + "step": 11124 + }, + { + "epoch": 2.26, + "learning_rate": 7.302484270166973e-06, + "loss": 2.1849, + "step": 11125 + }, + { + "epoch": 2.26, + "learning_rate": 7.300821006688913e-06, + "loss": 2.1183, + "step": 11126 + }, + { + "epoch": 2.26, + "learning_rate": 7.299157823745993e-06, + "loss": 2.0963, + "step": 11127 + }, + { + "epoch": 2.26, + "learning_rate": 7.29749472138784e-06, + "loss": 2.1414, + "step": 11128 + }, + { + "epoch": 2.26, + "learning_rate": 7.295831699664072e-06, + "loss": 2.0585, + "step": 11129 + }, + { + "epoch": 2.26, + "learning_rate": 7.294168758624309e-06, + "loss": 2.1307, + "step": 11130 + }, + { + "epoch": 2.26, + "learning_rate": 7.292505898318165e-06, + "loss": 2.0962, + "step": 11131 + }, + { + "epoch": 2.26, + "learning_rate": 7.290843118795262e-06, + "loss": 2.2088, + "step": 11132 + }, + { + "epoch": 2.26, + "learning_rate": 7.289180420105208e-06, + "loss": 2.0901, + "step": 11133 + }, + { + "epoch": 2.26, + "learning_rate": 7.287517802297612e-06, + "loss": 2.1089, + "step": 11134 + }, + { + "epoch": 2.26, + "learning_rate": 7.285855265422084e-06, + "loss": 2.0286, + "step": 11135 + }, + { + "epoch": 2.26, + "learning_rate": 7.284192809528221e-06, + "loss": 2.1697, + "step": 11136 + }, + { + "epoch": 2.26, + "learning_rate": 7.282530434665638e-06, + "loss": 2.1631, + "step": 11137 + }, + { + "epoch": 2.26, + "learning_rate": 7.280868140883928e-06, + "loss": 2.0345, + "step": 11138 + }, + { + "epoch": 2.26, + "learning_rate": 7.2792059282326884e-06, + "loss": 2.1032, + "step": 11139 + }, + { + "epoch": 2.26, + "learning_rate": 7.2775437967615125e-06, + "loss": 2.1522, + "step": 11140 + }, + { + "epoch": 2.26, + "learning_rate": 7.275881746519999e-06, + "loss": 2.1485, + "step": 11141 + }, + { + "epoch": 2.26, + "learning_rate": 7.274219777557737e-06, + "loss": 2.1676, + "step": 11142 + }, + { + "epoch": 2.26, + "learning_rate": 7.272557889924313e-06, + "loss": 2.032, + "step": 11143 + }, + { + "epoch": 2.26, + "learning_rate": 7.27089608366931e-06, + "loss": 2.1596, + "step": 11144 + }, + { + "epoch": 2.26, + "learning_rate": 7.269234358842314e-06, + "loss": 2.0592, + "step": 11145 + }, + { + "epoch": 2.26, + "learning_rate": 7.267572715492908e-06, + "loss": 2.1836, + "step": 11146 + }, + { + "epoch": 2.26, + "learning_rate": 7.265911153670666e-06, + "loss": 2.1197, + "step": 11147 + }, + { + "epoch": 2.26, + "learning_rate": 7.264249673425166e-06, + "loss": 2.0981, + "step": 11148 + }, + { + "epoch": 2.26, + "learning_rate": 7.262588274805984e-06, + "loss": 2.0913, + "step": 11149 + }, + { + "epoch": 2.26, + "learning_rate": 7.260926957862684e-06, + "loss": 2.1162, + "step": 11150 + }, + { + "epoch": 2.26, + "learning_rate": 7.259265722644838e-06, + "loss": 2.1749, + "step": 11151 + }, + { + "epoch": 2.26, + "learning_rate": 7.257604569202017e-06, + "loss": 2.1262, + "step": 11152 + }, + { + "epoch": 2.26, + "learning_rate": 7.2559434975837785e-06, + "loss": 2.1428, + "step": 11153 + }, + { + "epoch": 2.26, + "learning_rate": 7.254282507839687e-06, + "loss": 2.0977, + "step": 11154 + }, + { + "epoch": 2.26, + "learning_rate": 7.252621600019294e-06, + "loss": 2.0484, + "step": 11155 + }, + { + "epoch": 2.26, + "learning_rate": 7.250960774172168e-06, + "loss": 2.1216, + "step": 11156 + }, + { + "epoch": 2.26, + "learning_rate": 7.249300030347856e-06, + "loss": 2.1186, + "step": 11157 + }, + { + "epoch": 2.26, + "learning_rate": 7.247639368595909e-06, + "loss": 2.0545, + "step": 11158 + }, + { + "epoch": 2.27, + "learning_rate": 7.24597878896588e-06, + "loss": 2.0566, + "step": 11159 + }, + { + "epoch": 2.27, + "learning_rate": 7.2443182915073065e-06, + "loss": 2.0819, + "step": 11160 + }, + { + "epoch": 2.27, + "learning_rate": 7.242657876269743e-06, + "loss": 2.1246, + "step": 11161 + }, + { + "epoch": 2.27, + "learning_rate": 7.240997543302726e-06, + "loss": 2.1455, + "step": 11162 + }, + { + "epoch": 2.27, + "learning_rate": 7.239337292655797e-06, + "loss": 2.069, + "step": 11163 + }, + { + "epoch": 2.27, + "learning_rate": 7.23767712437849e-06, + "loss": 2.1583, + "step": 11164 + }, + { + "epoch": 2.27, + "learning_rate": 7.236017038520339e-06, + "loss": 2.2122, + "step": 11165 + }, + { + "epoch": 2.27, + "learning_rate": 7.23435703513088e-06, + "loss": 2.107, + "step": 11166 + }, + { + "epoch": 2.27, + "learning_rate": 7.23269711425964e-06, + "loss": 2.099, + "step": 11167 + }, + { + "epoch": 2.27, + "learning_rate": 7.231037275956144e-06, + "loss": 2.0925, + "step": 11168 + }, + { + "epoch": 2.27, + "learning_rate": 7.22937752026992e-06, + "loss": 2.1946, + "step": 11169 + }, + { + "epoch": 2.27, + "learning_rate": 7.227717847250486e-06, + "loss": 2.0904, + "step": 11170 + }, + { + "epoch": 2.27, + "learning_rate": 7.2260582569473635e-06, + "loss": 2.0379, + "step": 11171 + }, + { + "epoch": 2.27, + "learning_rate": 7.224398749410072e-06, + "loss": 2.1426, + "step": 11172 + }, + { + "epoch": 2.27, + "learning_rate": 7.222739324688124e-06, + "loss": 2.1337, + "step": 11173 + }, + { + "epoch": 2.27, + "learning_rate": 7.221079982831031e-06, + "loss": 2.1298, + "step": 11174 + }, + { + "epoch": 2.27, + "learning_rate": 7.2194207238882985e-06, + "loss": 2.1441, + "step": 11175 + }, + { + "epoch": 2.27, + "learning_rate": 7.217761547909443e-06, + "loss": 2.1046, + "step": 11176 + }, + { + "epoch": 2.27, + "learning_rate": 7.216102454943963e-06, + "loss": 2.1535, + "step": 11177 + }, + { + "epoch": 2.27, + "learning_rate": 7.214443445041364e-06, + "loss": 2.0939, + "step": 11178 + }, + { + "epoch": 2.27, + "learning_rate": 7.212784518251142e-06, + "loss": 2.0333, + "step": 11179 + }, + { + "epoch": 2.27, + "learning_rate": 7.211125674622792e-06, + "loss": 2.1388, + "step": 11180 + }, + { + "epoch": 2.27, + "learning_rate": 7.209466914205816e-06, + "loss": 2.1215, + "step": 11181 + }, + { + "epoch": 2.27, + "learning_rate": 7.207808237049705e-06, + "loss": 2.0793, + "step": 11182 + }, + { + "epoch": 2.27, + "learning_rate": 7.206149643203945e-06, + "loss": 2.1079, + "step": 11183 + }, + { + "epoch": 2.27, + "learning_rate": 7.204491132718024e-06, + "loss": 2.1408, + "step": 11184 + }, + { + "epoch": 2.27, + "learning_rate": 7.202832705641427e-06, + "loss": 2.1106, + "step": 11185 + }, + { + "epoch": 2.27, + "learning_rate": 7.2011743620236396e-06, + "loss": 2.0552, + "step": 11186 + }, + { + "epoch": 2.27, + "learning_rate": 7.199516101914139e-06, + "loss": 2.0494, + "step": 11187 + }, + { + "epoch": 2.27, + "learning_rate": 7.197857925362401e-06, + "loss": 2.1157, + "step": 11188 + }, + { + "epoch": 2.27, + "learning_rate": 7.196199832417905e-06, + "loss": 2.1537, + "step": 11189 + }, + { + "epoch": 2.27, + "learning_rate": 7.194541823130117e-06, + "loss": 2.1255, + "step": 11190 + }, + { + "epoch": 2.27, + "learning_rate": 7.192883897548513e-06, + "loss": 2.1619, + "step": 11191 + }, + { + "epoch": 2.27, + "learning_rate": 7.191226055722556e-06, + "loss": 2.0948, + "step": 11192 + }, + { + "epoch": 2.27, + "learning_rate": 7.1895682977017145e-06, + "loss": 2.0964, + "step": 11193 + }, + { + "epoch": 2.27, + "learning_rate": 7.18791062353545e-06, + "loss": 2.117, + "step": 11194 + }, + { + "epoch": 2.27, + "learning_rate": 7.186253033273216e-06, + "loss": 2.1139, + "step": 11195 + }, + { + "epoch": 2.27, + "learning_rate": 7.18459552696448e-06, + "loss": 2.0846, + "step": 11196 + }, + { + "epoch": 2.27, + "learning_rate": 7.182938104658692e-06, + "loss": 2.079, + "step": 11197 + }, + { + "epoch": 2.27, + "learning_rate": 7.1812807664053045e-06, + "loss": 2.0752, + "step": 11198 + }, + { + "epoch": 2.27, + "learning_rate": 7.179623512253766e-06, + "loss": 2.0694, + "step": 11199 + }, + { + "epoch": 2.27, + "learning_rate": 7.177966342253522e-06, + "loss": 2.0704, + "step": 11200 + }, + { + "epoch": 2.27, + "learning_rate": 7.176309256454026e-06, + "loss": 2.1546, + "step": 11201 + }, + { + "epoch": 2.27, + "learning_rate": 7.174652254904712e-06, + "loss": 2.116, + "step": 11202 + }, + { + "epoch": 2.27, + "learning_rate": 7.172995337655024e-06, + "loss": 2.0897, + "step": 11203 + }, + { + "epoch": 2.27, + "learning_rate": 7.171338504754396e-06, + "loss": 2.2157, + "step": 11204 + }, + { + "epoch": 2.27, + "learning_rate": 7.169681756252265e-06, + "loss": 2.0886, + "step": 11205 + }, + { + "epoch": 2.27, + "learning_rate": 7.168025092198063e-06, + "loss": 2.0938, + "step": 11206 + }, + { + "epoch": 2.27, + "learning_rate": 7.166368512641221e-06, + "loss": 2.1339, + "step": 11207 + }, + { + "epoch": 2.28, + "learning_rate": 7.164712017631162e-06, + "loss": 2.1545, + "step": 11208 + }, + { + "epoch": 2.28, + "learning_rate": 7.163055607217315e-06, + "loss": 2.1746, + "step": 11209 + }, + { + "epoch": 2.28, + "learning_rate": 7.161399281449099e-06, + "loss": 2.0964, + "step": 11210 + }, + { + "epoch": 2.28, + "learning_rate": 7.159743040375938e-06, + "loss": 2.1009, + "step": 11211 + }, + { + "epoch": 2.28, + "learning_rate": 7.158086884047244e-06, + "loss": 2.1308, + "step": 11212 + }, + { + "epoch": 2.28, + "learning_rate": 7.156430812512436e-06, + "loss": 2.1574, + "step": 11213 + }, + { + "epoch": 2.28, + "learning_rate": 7.1547748258209225e-06, + "loss": 2.0869, + "step": 11214 + }, + { + "epoch": 2.28, + "learning_rate": 7.153118924022114e-06, + "loss": 2.0706, + "step": 11215 + }, + { + "epoch": 2.28, + "learning_rate": 7.151463107165417e-06, + "loss": 2.1624, + "step": 11216 + }, + { + "epoch": 2.28, + "learning_rate": 7.149807375300239e-06, + "loss": 2.1583, + "step": 11217 + }, + { + "epoch": 2.28, + "learning_rate": 7.148151728475981e-06, + "loss": 2.1105, + "step": 11218 + }, + { + "epoch": 2.28, + "learning_rate": 7.146496166742041e-06, + "loss": 2.1781, + "step": 11219 + }, + { + "epoch": 2.28, + "learning_rate": 7.1448406901478096e-06, + "loss": 2.1202, + "step": 11220 + }, + { + "epoch": 2.28, + "learning_rate": 7.143185298742694e-06, + "loss": 2.0814, + "step": 11221 + }, + { + "epoch": 2.28, + "learning_rate": 7.141529992576077e-06, + "loss": 2.1405, + "step": 11222 + }, + { + "epoch": 2.28, + "learning_rate": 7.139874771697353e-06, + "loss": 2.1532, + "step": 11223 + }, + { + "epoch": 2.28, + "learning_rate": 7.138219636155904e-06, + "loss": 2.1537, + "step": 11224 + }, + { + "epoch": 2.28, + "learning_rate": 7.1365645860011135e-06, + "loss": 2.1827, + "step": 11225 + }, + { + "epoch": 2.28, + "learning_rate": 7.134909621282369e-06, + "loss": 2.186, + "step": 11226 + }, + { + "epoch": 2.28, + "learning_rate": 7.133254742049045e-06, + "loss": 2.1354, + "step": 11227 + }, + { + "epoch": 2.28, + "learning_rate": 7.131599948350518e-06, + "loss": 2.0916, + "step": 11228 + }, + { + "epoch": 2.28, + "learning_rate": 7.129945240236162e-06, + "loss": 2.1647, + "step": 11229 + }, + { + "epoch": 2.28, + "learning_rate": 7.128290617755352e-06, + "loss": 2.1815, + "step": 11230 + }, + { + "epoch": 2.28, + "learning_rate": 7.126636080957454e-06, + "loss": 2.1202, + "step": 11231 + }, + { + "epoch": 2.28, + "learning_rate": 7.124981629891834e-06, + "loss": 2.0905, + "step": 11232 + }, + { + "epoch": 2.28, + "learning_rate": 7.123327264607855e-06, + "loss": 2.1258, + "step": 11233 + }, + { + "epoch": 2.28, + "learning_rate": 7.1216729851548805e-06, + "loss": 2.0537, + "step": 11234 + }, + { + "epoch": 2.28, + "learning_rate": 7.120018791582268e-06, + "loss": 2.1661, + "step": 11235 + }, + { + "epoch": 2.28, + "learning_rate": 7.118364683939372e-06, + "loss": 2.1155, + "step": 11236 + }, + { + "epoch": 2.28, + "learning_rate": 7.116710662275549e-06, + "loss": 2.1861, + "step": 11237 + }, + { + "epoch": 2.28, + "learning_rate": 7.115056726640148e-06, + "loss": 2.1679, + "step": 11238 + }, + { + "epoch": 2.28, + "learning_rate": 7.113402877082516e-06, + "loss": 2.1358, + "step": 11239 + }, + { + "epoch": 2.28, + "learning_rate": 7.111749113652e-06, + "loss": 2.1161, + "step": 11240 + }, + { + "epoch": 2.28, + "learning_rate": 7.110095436397944e-06, + "loss": 2.1872, + "step": 11241 + }, + { + "epoch": 2.28, + "learning_rate": 7.1084418453696906e-06, + "loss": 2.0979, + "step": 11242 + }, + { + "epoch": 2.28, + "learning_rate": 7.106788340616573e-06, + "loss": 2.1352, + "step": 11243 + }, + { + "epoch": 2.28, + "learning_rate": 7.105134922187926e-06, + "loss": 2.0918, + "step": 11244 + }, + { + "epoch": 2.28, + "learning_rate": 7.10348159013309e-06, + "loss": 2.0655, + "step": 11245 + }, + { + "epoch": 2.28, + "learning_rate": 7.101828344501389e-06, + "loss": 2.0493, + "step": 11246 + }, + { + "epoch": 2.28, + "learning_rate": 7.100175185342153e-06, + "loss": 2.0752, + "step": 11247 + }, + { + "epoch": 2.28, + "learning_rate": 7.098522112704707e-06, + "loss": 2.0942, + "step": 11248 + }, + { + "epoch": 2.28, + "learning_rate": 7.0968691266383696e-06, + "loss": 2.1191, + "step": 11249 + }, + { + "epoch": 2.28, + "learning_rate": 7.095216227192467e-06, + "loss": 2.0832, + "step": 11250 + }, + { + "epoch": 2.28, + "learning_rate": 7.093563414416316e-06, + "loss": 2.1055, + "step": 11251 + }, + { + "epoch": 2.28, + "learning_rate": 7.091910688359225e-06, + "loss": 2.1325, + "step": 11252 + }, + { + "epoch": 2.28, + "learning_rate": 7.090258049070514e-06, + "loss": 2.1176, + "step": 11253 + }, + { + "epoch": 2.28, + "learning_rate": 7.0886054965994865e-06, + "loss": 2.1021, + "step": 11254 + }, + { + "epoch": 2.28, + "learning_rate": 7.0869530309954545e-06, + "loss": 2.1172, + "step": 11255 + }, + { + "epoch": 2.28, + "learning_rate": 7.085300652307719e-06, + "loss": 2.0971, + "step": 11256 + }, + { + "epoch": 2.28, + "learning_rate": 7.083648360585585e-06, + "loss": 2.1599, + "step": 11257 + }, + { + "epoch": 2.29, + "learning_rate": 7.081996155878348e-06, + "loss": 2.1423, + "step": 11258 + }, + { + "epoch": 2.29, + "learning_rate": 7.080344038235305e-06, + "loss": 2.157, + "step": 11259 + }, + { + "epoch": 2.29, + "learning_rate": 7.078692007705752e-06, + "loss": 2.0559, + "step": 11260 + }, + { + "epoch": 2.29, + "learning_rate": 7.077040064338982e-06, + "loss": 2.1467, + "step": 11261 + }, + { + "epoch": 2.29, + "learning_rate": 7.075388208184283e-06, + "loss": 2.1295, + "step": 11262 + }, + { + "epoch": 2.29, + "learning_rate": 7.073736439290939e-06, + "loss": 2.1331, + "step": 11263 + }, + { + "epoch": 2.29, + "learning_rate": 7.072084757708231e-06, + "loss": 2.0412, + "step": 11264 + }, + { + "epoch": 2.29, + "learning_rate": 7.070433163485448e-06, + "loss": 2.1365, + "step": 11265 + }, + { + "epoch": 2.29, + "learning_rate": 7.068781656671865e-06, + "loss": 2.1007, + "step": 11266 + }, + { + "epoch": 2.29, + "learning_rate": 7.067130237316756e-06, + "loss": 2.0527, + "step": 11267 + }, + { + "epoch": 2.29, + "learning_rate": 7.065478905469396e-06, + "loss": 2.0818, + "step": 11268 + }, + { + "epoch": 2.29, + "learning_rate": 7.063827661179051e-06, + "loss": 2.1226, + "step": 11269 + }, + { + "epoch": 2.29, + "learning_rate": 7.062176504494998e-06, + "loss": 2.1649, + "step": 11270 + }, + { + "epoch": 2.29, + "learning_rate": 7.060525435466496e-06, + "loss": 2.1636, + "step": 11271 + }, + { + "epoch": 2.29, + "learning_rate": 7.05887445414281e-06, + "loss": 2.0828, + "step": 11272 + }, + { + "epoch": 2.29, + "learning_rate": 7.057223560573199e-06, + "loss": 2.0183, + "step": 11273 + }, + { + "epoch": 2.29, + "learning_rate": 7.055572754806918e-06, + "loss": 2.1032, + "step": 11274 + }, + { + "epoch": 2.29, + "learning_rate": 7.0539220368932295e-06, + "loss": 2.1122, + "step": 11275 + }, + { + "epoch": 2.29, + "learning_rate": 7.052271406881377e-06, + "loss": 2.1481, + "step": 11276 + }, + { + "epoch": 2.29, + "learning_rate": 7.050620864820618e-06, + "loss": 2.0991, + "step": 11277 + }, + { + "epoch": 2.29, + "learning_rate": 7.048970410760197e-06, + "loss": 2.1545, + "step": 11278 + }, + { + "epoch": 2.29, + "learning_rate": 7.047320044749353e-06, + "loss": 2.1274, + "step": 11279 + }, + { + "epoch": 2.29, + "learning_rate": 7.045669766837333e-06, + "loss": 2.0893, + "step": 11280 + }, + { + "epoch": 2.29, + "learning_rate": 7.044019577073378e-06, + "loss": 2.123, + "step": 11281 + }, + { + "epoch": 2.29, + "learning_rate": 7.0423694755067206e-06, + "loss": 2.1088, + "step": 11282 + }, + { + "epoch": 2.29, + "learning_rate": 7.040719462186598e-06, + "loss": 2.0684, + "step": 11283 + }, + { + "epoch": 2.29, + "learning_rate": 7.039069537162234e-06, + "loss": 2.0788, + "step": 11284 + }, + { + "epoch": 2.29, + "learning_rate": 7.0374197004828685e-06, + "loss": 2.1589, + "step": 11285 + }, + { + "epoch": 2.29, + "learning_rate": 7.035769952197719e-06, + "loss": 2.1219, + "step": 11286 + }, + { + "epoch": 2.29, + "learning_rate": 7.034120292356015e-06, + "loss": 1.9923, + "step": 11287 + }, + { + "epoch": 2.29, + "learning_rate": 7.032470721006972e-06, + "loss": 2.1079, + "step": 11288 + }, + { + "epoch": 2.29, + "learning_rate": 7.030821238199807e-06, + "loss": 2.0451, + "step": 11289 + }, + { + "epoch": 2.29, + "learning_rate": 7.029171843983742e-06, + "loss": 2.0352, + "step": 11290 + }, + { + "epoch": 2.29, + "learning_rate": 7.027522538407987e-06, + "loss": 2.1796, + "step": 11291 + }, + { + "epoch": 2.29, + "learning_rate": 7.02587332152175e-06, + "loss": 2.0352, + "step": 11292 + }, + { + "epoch": 2.29, + "learning_rate": 7.024224193374239e-06, + "loss": 2.152, + "step": 11293 + }, + { + "epoch": 2.29, + "learning_rate": 7.02257515401466e-06, + "loss": 2.1312, + "step": 11294 + }, + { + "epoch": 2.29, + "learning_rate": 7.020926203492218e-06, + "loss": 2.1101, + "step": 11295 + }, + { + "epoch": 2.29, + "learning_rate": 7.019277341856108e-06, + "loss": 2.0919, + "step": 11296 + }, + { + "epoch": 2.29, + "learning_rate": 7.017628569155526e-06, + "loss": 2.0259, + "step": 11297 + }, + { + "epoch": 2.29, + "learning_rate": 7.015979885439674e-06, + "loss": 2.1695, + "step": 11298 + }, + { + "epoch": 2.29, + "learning_rate": 7.014331290757734e-06, + "loss": 2.1362, + "step": 11299 + }, + { + "epoch": 2.29, + "learning_rate": 7.0126827851588995e-06, + "loss": 2.0865, + "step": 11300 + }, + { + "epoch": 2.29, + "learning_rate": 7.01103436869236e-06, + "loss": 2.1232, + "step": 11301 + }, + { + "epoch": 2.29, + "learning_rate": 7.009386041407296e-06, + "loss": 2.0157, + "step": 11302 + }, + { + "epoch": 2.29, + "learning_rate": 7.0077378033528865e-06, + "loss": 2.1163, + "step": 11303 + }, + { + "epoch": 2.29, + "learning_rate": 7.006089654578309e-06, + "loss": 2.0555, + "step": 11304 + }, + { + "epoch": 2.29, + "learning_rate": 7.004441595132745e-06, + "loss": 2.1361, + "step": 11305 + }, + { + "epoch": 2.29, + "learning_rate": 7.002793625065365e-06, + "loss": 2.1459, + "step": 11306 + }, + { + "epoch": 2.3, + "learning_rate": 7.00114574442534e-06, + "loss": 2.1992, + "step": 11307 + }, + { + "epoch": 2.3, + "learning_rate": 6.999497953261835e-06, + "loss": 2.1023, + "step": 11308 + }, + { + "epoch": 2.3, + "learning_rate": 6.997850251624012e-06, + "loss": 2.1105, + "step": 11309 + }, + { + "epoch": 2.3, + "learning_rate": 6.9962026395610416e-06, + "loss": 2.1215, + "step": 11310 + }, + { + "epoch": 2.3, + "learning_rate": 6.994555117122079e-06, + "loss": 2.1313, + "step": 11311 + }, + { + "epoch": 2.3, + "learning_rate": 6.992907684356282e-06, + "loss": 2.1092, + "step": 11312 + }, + { + "epoch": 2.3, + "learning_rate": 6.9912603413128045e-06, + "loss": 2.2066, + "step": 11313 + }, + { + "epoch": 2.3, + "learning_rate": 6.9896130880407965e-06, + "loss": 2.1163, + "step": 11314 + }, + { + "epoch": 2.3, + "learning_rate": 6.987965924589411e-06, + "loss": 2.1487, + "step": 11315 + }, + { + "epoch": 2.3, + "learning_rate": 6.986318851007792e-06, + "loss": 2.1, + "step": 11316 + }, + { + "epoch": 2.3, + "learning_rate": 6.984671867345079e-06, + "loss": 2.0849, + "step": 11317 + }, + { + "epoch": 2.3, + "learning_rate": 6.9830249736504186e-06, + "loss": 2.1381, + "step": 11318 + }, + { + "epoch": 2.3, + "learning_rate": 6.981378169972949e-06, + "loss": 2.1451, + "step": 11319 + }, + { + "epoch": 2.3, + "learning_rate": 6.9797314563618025e-06, + "loss": 2.1581, + "step": 11320 + }, + { + "epoch": 2.3, + "learning_rate": 6.978084832866113e-06, + "loss": 2.0929, + "step": 11321 + }, + { + "epoch": 2.3, + "learning_rate": 6.976438299535013e-06, + "loss": 2.1489, + "step": 11322 + }, + { + "epoch": 2.3, + "learning_rate": 6.974791856417624e-06, + "loss": 2.1715, + "step": 11323 + }, + { + "epoch": 2.3, + "learning_rate": 6.973145503563077e-06, + "loss": 2.0991, + "step": 11324 + }, + { + "epoch": 2.3, + "learning_rate": 6.971499241020493e-06, + "loss": 2.1504, + "step": 11325 + }, + { + "epoch": 2.3, + "learning_rate": 6.96985306883899e-06, + "loss": 2.0834, + "step": 11326 + }, + { + "epoch": 2.3, + "learning_rate": 6.968206987067685e-06, + "loss": 2.0983, + "step": 11327 + }, + { + "epoch": 2.3, + "learning_rate": 6.966560995755687e-06, + "loss": 2.2032, + "step": 11328 + }, + { + "epoch": 2.3, + "learning_rate": 6.9649150949521175e-06, + "loss": 2.1115, + "step": 11329 + }, + { + "epoch": 2.3, + "learning_rate": 6.963269284706078e-06, + "loss": 2.143, + "step": 11330 + }, + { + "epoch": 2.3, + "learning_rate": 6.961623565066678e-06, + "loss": 2.1949, + "step": 11331 + }, + { + "epoch": 2.3, + "learning_rate": 6.9599779360830176e-06, + "loss": 2.1923, + "step": 11332 + }, + { + "epoch": 2.3, + "learning_rate": 6.958332397804194e-06, + "loss": 2.1058, + "step": 11333 + }, + { + "epoch": 2.3, + "learning_rate": 6.9566869502793135e-06, + "loss": 2.1091, + "step": 11334 + }, + { + "epoch": 2.3, + "learning_rate": 6.955041593557468e-06, + "loss": 2.1646, + "step": 11335 + }, + { + "epoch": 2.3, + "learning_rate": 6.953396327687747e-06, + "loss": 2.0798, + "step": 11336 + }, + { + "epoch": 2.3, + "learning_rate": 6.951751152719242e-06, + "loss": 2.1103, + "step": 11337 + }, + { + "epoch": 2.3, + "learning_rate": 6.950106068701039e-06, + "loss": 2.1726, + "step": 11338 + }, + { + "epoch": 2.3, + "learning_rate": 6.948461075682225e-06, + "loss": 2.1313, + "step": 11339 + }, + { + "epoch": 2.3, + "learning_rate": 6.94681617371188e-06, + "loss": 2.1963, + "step": 11340 + }, + { + "epoch": 2.3, + "learning_rate": 6.94517136283908e-06, + "loss": 2.1483, + "step": 11341 + }, + { + "epoch": 2.3, + "learning_rate": 6.943526643112906e-06, + "loss": 2.0734, + "step": 11342 + }, + { + "epoch": 2.3, + "learning_rate": 6.941882014582426e-06, + "loss": 2.078, + "step": 11343 + }, + { + "epoch": 2.3, + "learning_rate": 6.940237477296716e-06, + "loss": 2.1, + "step": 11344 + }, + { + "epoch": 2.3, + "learning_rate": 6.93859303130484e-06, + "loss": 2.0877, + "step": 11345 + }, + { + "epoch": 2.3, + "learning_rate": 6.936948676655866e-06, + "loss": 2.1334, + "step": 11346 + }, + { + "epoch": 2.3, + "learning_rate": 6.935304413398854e-06, + "loss": 2.1598, + "step": 11347 + }, + { + "epoch": 2.3, + "learning_rate": 6.933660241582862e-06, + "loss": 2.0806, + "step": 11348 + }, + { + "epoch": 2.3, + "learning_rate": 6.932016161256954e-06, + "loss": 2.089, + "step": 11349 + }, + { + "epoch": 2.3, + "learning_rate": 6.930372172470179e-06, + "loss": 2.2062, + "step": 11350 + }, + { + "epoch": 2.3, + "learning_rate": 6.928728275271588e-06, + "loss": 1.9851, + "step": 11351 + }, + { + "epoch": 2.3, + "learning_rate": 6.927084469710233e-06, + "loss": 2.1624, + "step": 11352 + }, + { + "epoch": 2.3, + "learning_rate": 6.9254407558351545e-06, + "loss": 2.1439, + "step": 11353 + }, + { + "epoch": 2.3, + "learning_rate": 6.923797133695403e-06, + "loss": 2.186, + "step": 11354 + }, + { + "epoch": 2.3, + "learning_rate": 6.922153603340016e-06, + "loss": 2.1003, + "step": 11355 + }, + { + "epoch": 2.31, + "learning_rate": 6.9205101648180315e-06, + "loss": 2.1409, + "step": 11356 + }, + { + "epoch": 2.31, + "learning_rate": 6.918866818178482e-06, + "loss": 2.1197, + "step": 11357 + }, + { + "epoch": 2.31, + "learning_rate": 6.917223563470402e-06, + "loss": 2.0955, + "step": 11358 + }, + { + "epoch": 2.31, + "learning_rate": 6.915580400742823e-06, + "loss": 2.0786, + "step": 11359 + }, + { + "epoch": 2.31, + "learning_rate": 6.913937330044771e-06, + "loss": 2.0931, + "step": 11360 + }, + { + "epoch": 2.31, + "learning_rate": 6.912294351425266e-06, + "loss": 2.1134, + "step": 11361 + }, + { + "epoch": 2.31, + "learning_rate": 6.910651464933335e-06, + "loss": 2.1221, + "step": 11362 + }, + { + "epoch": 2.31, + "learning_rate": 6.909008670617994e-06, + "loss": 2.1149, + "step": 11363 + }, + { + "epoch": 2.31, + "learning_rate": 6.907365968528259e-06, + "loss": 2.2228, + "step": 11364 + }, + { + "epoch": 2.31, + "learning_rate": 6.905723358713141e-06, + "loss": 2.1343, + "step": 11365 + }, + { + "epoch": 2.31, + "learning_rate": 6.904080841221657e-06, + "loss": 2.0833, + "step": 11366 + }, + { + "epoch": 2.31, + "learning_rate": 6.902438416102809e-06, + "loss": 2.0889, + "step": 11367 + }, + { + "epoch": 2.31, + "learning_rate": 6.900796083405601e-06, + "loss": 2.0691, + "step": 11368 + }, + { + "epoch": 2.31, + "learning_rate": 6.8991538431790385e-06, + "loss": 2.1593, + "step": 11369 + }, + { + "epoch": 2.31, + "learning_rate": 6.897511695472121e-06, + "loss": 2.0481, + "step": 11370 + }, + { + "epoch": 2.31, + "learning_rate": 6.895869640333843e-06, + "loss": 2.1242, + "step": 11371 + }, + { + "epoch": 2.31, + "learning_rate": 6.8942276778132e-06, + "loss": 2.1248, + "step": 11372 + }, + { + "epoch": 2.31, + "learning_rate": 6.892585807959178e-06, + "loss": 2.1377, + "step": 11373 + }, + { + "epoch": 2.31, + "learning_rate": 6.890944030820774e-06, + "loss": 2.1163, + "step": 11374 + }, + { + "epoch": 2.31, + "learning_rate": 6.889302346446969e-06, + "loss": 2.0751, + "step": 11375 + }, + { + "epoch": 2.31, + "learning_rate": 6.887660754886745e-06, + "loss": 2.1155, + "step": 11376 + }, + { + "epoch": 2.31, + "learning_rate": 6.886019256189083e-06, + "loss": 2.0531, + "step": 11377 + }, + { + "epoch": 2.31, + "learning_rate": 6.884377850402957e-06, + "loss": 2.156, + "step": 11378 + }, + { + "epoch": 2.31, + "learning_rate": 6.882736537577348e-06, + "loss": 2.2016, + "step": 11379 + }, + { + "epoch": 2.31, + "learning_rate": 6.881095317761224e-06, + "loss": 2.1306, + "step": 11380 + }, + { + "epoch": 2.31, + "learning_rate": 6.879454191003553e-06, + "loss": 2.1204, + "step": 11381 + }, + { + "epoch": 2.31, + "learning_rate": 6.877813157353303e-06, + "loss": 2.1424, + "step": 11382 + }, + { + "epoch": 2.31, + "learning_rate": 6.8761722168594355e-06, + "loss": 2.0884, + "step": 11383 + }, + { + "epoch": 2.31, + "learning_rate": 6.874531369570915e-06, + "loss": 2.0727, + "step": 11384 + }, + { + "epoch": 2.31, + "learning_rate": 6.872890615536694e-06, + "loss": 2.1055, + "step": 11385 + }, + { + "epoch": 2.31, + "learning_rate": 6.871249954805732e-06, + "loss": 2.0727, + "step": 11386 + }, + { + "epoch": 2.31, + "learning_rate": 6.869609387426979e-06, + "loss": 2.0876, + "step": 11387 + }, + { + "epoch": 2.31, + "learning_rate": 6.8679689134493835e-06, + "loss": 2.1259, + "step": 11388 + }, + { + "epoch": 2.31, + "learning_rate": 6.866328532921891e-06, + "loss": 2.1752, + "step": 11389 + }, + { + "epoch": 2.31, + "learning_rate": 6.864688245893451e-06, + "loss": 2.0774, + "step": 11390 + }, + { + "epoch": 2.31, + "learning_rate": 6.8630480524130035e-06, + "loss": 2.0681, + "step": 11391 + }, + { + "epoch": 2.31, + "learning_rate": 6.861407952529483e-06, + "loss": 2.0876, + "step": 11392 + }, + { + "epoch": 2.31, + "learning_rate": 6.859767946291822e-06, + "loss": 2.0991, + "step": 11393 + }, + { + "epoch": 2.31, + "learning_rate": 6.8581280337489634e-06, + "loss": 2.0846, + "step": 11394 + }, + { + "epoch": 2.31, + "learning_rate": 6.85648821494983e-06, + "loss": 2.1338, + "step": 11395 + }, + { + "epoch": 2.31, + "learning_rate": 6.8548484899433525e-06, + "loss": 2.1522, + "step": 11396 + }, + { + "epoch": 2.31, + "learning_rate": 6.853208858778451e-06, + "loss": 2.1563, + "step": 11397 + }, + { + "epoch": 2.31, + "learning_rate": 6.851569321504045e-06, + "loss": 2.1699, + "step": 11398 + }, + { + "epoch": 2.31, + "learning_rate": 6.849929878169064e-06, + "loss": 2.1512, + "step": 11399 + }, + { + "epoch": 2.31, + "learning_rate": 6.848290528822417e-06, + "loss": 2.0903, + "step": 11400 + }, + { + "epoch": 2.31, + "learning_rate": 6.846651273513016e-06, + "loss": 2.1595, + "step": 11401 + }, + { + "epoch": 2.31, + "learning_rate": 6.84501211228977e-06, + "loss": 2.1423, + "step": 11402 + }, + { + "epoch": 2.31, + "learning_rate": 6.843373045201591e-06, + "loss": 2.1643, + "step": 11403 + }, + { + "epoch": 2.31, + "learning_rate": 6.841734072297383e-06, + "loss": 2.083, + "step": 11404 + }, + { + "epoch": 2.32, + "learning_rate": 6.840095193626046e-06, + "loss": 2.0826, + "step": 11405 + }, + { + "epoch": 2.32, + "learning_rate": 6.83845640923648e-06, + "loss": 2.1367, + "step": 11406 + }, + { + "epoch": 2.32, + "learning_rate": 6.836817719177582e-06, + "loss": 2.0749, + "step": 11407 + }, + { + "epoch": 2.32, + "learning_rate": 6.835179123498243e-06, + "loss": 2.1028, + "step": 11408 + }, + { + "epoch": 2.32, + "learning_rate": 6.833540622247354e-06, + "loss": 2.1647, + "step": 11409 + }, + { + "epoch": 2.32, + "learning_rate": 6.831902215473808e-06, + "loss": 2.049, + "step": 11410 + }, + { + "epoch": 2.32, + "learning_rate": 6.830263903226485e-06, + "loss": 2.1662, + "step": 11411 + }, + { + "epoch": 2.32, + "learning_rate": 6.828625685554265e-06, + "loss": 2.0728, + "step": 11412 + }, + { + "epoch": 2.32, + "learning_rate": 6.8269875625060315e-06, + "loss": 2.1012, + "step": 11413 + }, + { + "epoch": 2.32, + "learning_rate": 6.825349534130662e-06, + "loss": 2.2141, + "step": 11414 + }, + { + "epoch": 2.32, + "learning_rate": 6.823711600477028e-06, + "loss": 2.1398, + "step": 11415 + }, + { + "epoch": 2.32, + "learning_rate": 6.822073761594002e-06, + "loss": 2.0391, + "step": 11416 + }, + { + "epoch": 2.32, + "learning_rate": 6.820436017530443e-06, + "loss": 2.1169, + "step": 11417 + }, + { + "epoch": 2.32, + "learning_rate": 6.818798368335231e-06, + "loss": 2.1782, + "step": 11418 + }, + { + "epoch": 2.32, + "learning_rate": 6.81716081405722e-06, + "loss": 2.1425, + "step": 11419 + }, + { + "epoch": 2.32, + "learning_rate": 6.8155233547452704e-06, + "loss": 2.0949, + "step": 11420 + }, + { + "epoch": 2.32, + "learning_rate": 6.813885990448239e-06, + "loss": 2.1307, + "step": 11421 + }, + { + "epoch": 2.32, + "learning_rate": 6.812248721214977e-06, + "loss": 2.1223, + "step": 11422 + }, + { + "epoch": 2.32, + "learning_rate": 6.810611547094341e-06, + "loss": 2.1475, + "step": 11423 + }, + { + "epoch": 2.32, + "learning_rate": 6.808974468135178e-06, + "loss": 2.188, + "step": 11424 + }, + { + "epoch": 2.32, + "learning_rate": 6.807337484386332e-06, + "loss": 2.1086, + "step": 11425 + }, + { + "epoch": 2.32, + "learning_rate": 6.805700595896642e-06, + "loss": 2.1687, + "step": 11426 + }, + { + "epoch": 2.32, + "learning_rate": 6.80406380271495e-06, + "loss": 2.1735, + "step": 11427 + }, + { + "epoch": 2.32, + "learning_rate": 6.802427104890099e-06, + "loss": 2.1517, + "step": 11428 + }, + { + "epoch": 2.32, + "learning_rate": 6.800790502470915e-06, + "loss": 1.9938, + "step": 11429 + }, + { + "epoch": 2.32, + "learning_rate": 6.799153995506234e-06, + "loss": 2.0955, + "step": 11430 + }, + { + "epoch": 2.32, + "learning_rate": 6.797517584044882e-06, + "loss": 2.1102, + "step": 11431 + }, + { + "epoch": 2.32, + "learning_rate": 6.795881268135683e-06, + "loss": 2.0456, + "step": 11432 + }, + { + "epoch": 2.32, + "learning_rate": 6.794245047827461e-06, + "loss": 2.0488, + "step": 11433 + }, + { + "epoch": 2.32, + "learning_rate": 6.792608923169038e-06, + "loss": 2.1257, + "step": 11434 + }, + { + "epoch": 2.32, + "learning_rate": 6.79097289420923e-06, + "loss": 2.1786, + "step": 11435 + }, + { + "epoch": 2.32, + "learning_rate": 6.789336960996851e-06, + "loss": 2.0997, + "step": 11436 + }, + { + "epoch": 2.32, + "learning_rate": 6.787701123580706e-06, + "loss": 2.0605, + "step": 11437 + }, + { + "epoch": 2.32, + "learning_rate": 6.786065382009612e-06, + "loss": 2.1145, + "step": 11438 + }, + { + "epoch": 2.32, + "learning_rate": 6.784429736332372e-06, + "loss": 2.0992, + "step": 11439 + }, + { + "epoch": 2.32, + "learning_rate": 6.782794186597788e-06, + "loss": 2.1313, + "step": 11440 + }, + { + "epoch": 2.32, + "learning_rate": 6.781158732854658e-06, + "loss": 2.1861, + "step": 11441 + }, + { + "epoch": 2.32, + "learning_rate": 6.779523375151776e-06, + "loss": 2.1086, + "step": 11442 + }, + { + "epoch": 2.32, + "learning_rate": 6.777888113537946e-06, + "loss": 2.0909, + "step": 11443 + }, + { + "epoch": 2.32, + "learning_rate": 6.776252948061952e-06, + "loss": 2.1949, + "step": 11444 + }, + { + "epoch": 2.32, + "learning_rate": 6.774617878772585e-06, + "loss": 2.1036, + "step": 11445 + }, + { + "epoch": 2.32, + "learning_rate": 6.772982905718627e-06, + "loss": 2.1749, + "step": 11446 + }, + { + "epoch": 2.32, + "learning_rate": 6.771348028948862e-06, + "loss": 2.1318, + "step": 11447 + }, + { + "epoch": 2.32, + "learning_rate": 6.769713248512073e-06, + "loss": 2.0671, + "step": 11448 + }, + { + "epoch": 2.32, + "learning_rate": 6.768078564457034e-06, + "loss": 2.1692, + "step": 11449 + }, + { + "epoch": 2.32, + "learning_rate": 6.766443976832516e-06, + "loss": 2.1739, + "step": 11450 + }, + { + "epoch": 2.32, + "learning_rate": 6.764809485687296e-06, + "loss": 2.0443, + "step": 11451 + }, + { + "epoch": 2.32, + "learning_rate": 6.763175091070136e-06, + "loss": 2.0789, + "step": 11452 + }, + { + "epoch": 2.32, + "learning_rate": 6.761540793029806e-06, + "loss": 2.0909, + "step": 11453 + }, + { + "epoch": 2.32, + "learning_rate": 6.759906591615069e-06, + "loss": 2.0887, + "step": 11454 + }, + { + "epoch": 2.33, + "learning_rate": 6.75827248687468e-06, + "loss": 2.025, + "step": 11455 + }, + { + "epoch": 2.33, + "learning_rate": 6.7566384788574005e-06, + "loss": 2.0984, + "step": 11456 + }, + { + "epoch": 2.33, + "learning_rate": 6.755004567611976e-06, + "loss": 2.131, + "step": 11457 + }, + { + "epoch": 2.33, + "learning_rate": 6.753370753187168e-06, + "loss": 2.0537, + "step": 11458 + }, + { + "epoch": 2.33, + "learning_rate": 6.751737035631718e-06, + "loss": 2.0427, + "step": 11459 + }, + { + "epoch": 2.33, + "learning_rate": 6.750103414994374e-06, + "loss": 2.0156, + "step": 11460 + }, + { + "epoch": 2.33, + "learning_rate": 6.748469891323877e-06, + "loss": 2.2073, + "step": 11461 + }, + { + "epoch": 2.33, + "learning_rate": 6.7468364646689625e-06, + "loss": 2.1058, + "step": 11462 + }, + { + "epoch": 2.33, + "learning_rate": 6.745203135078373e-06, + "loss": 2.1032, + "step": 11463 + }, + { + "epoch": 2.33, + "learning_rate": 6.74356990260084e-06, + "loss": 2.1008, + "step": 11464 + }, + { + "epoch": 2.33, + "learning_rate": 6.741936767285093e-06, + "loss": 2.1903, + "step": 11465 + }, + { + "epoch": 2.33, + "learning_rate": 6.740303729179859e-06, + "loss": 2.1034, + "step": 11466 + }, + { + "epoch": 2.33, + "learning_rate": 6.738670788333863e-06, + "loss": 2.1562, + "step": 11467 + }, + { + "epoch": 2.33, + "learning_rate": 6.737037944795832e-06, + "loss": 2.1664, + "step": 11468 + }, + { + "epoch": 2.33, + "learning_rate": 6.7354051986144796e-06, + "loss": 2.1664, + "step": 11469 + }, + { + "epoch": 2.33, + "learning_rate": 6.733772549838521e-06, + "loss": 2.1344, + "step": 11470 + }, + { + "epoch": 2.33, + "learning_rate": 6.732139998516674e-06, + "loss": 2.0896, + "step": 11471 + }, + { + "epoch": 2.33, + "learning_rate": 6.730507544697644e-06, + "loss": 2.1259, + "step": 11472 + }, + { + "epoch": 2.33, + "learning_rate": 6.728875188430143e-06, + "loss": 2.1618, + "step": 11473 + }, + { + "epoch": 2.33, + "learning_rate": 6.727242929762872e-06, + "loss": 2.0741, + "step": 11474 + }, + { + "epoch": 2.33, + "learning_rate": 6.725610768744535e-06, + "loss": 2.0506, + "step": 11475 + }, + { + "epoch": 2.33, + "learning_rate": 6.72397870542383e-06, + "loss": 2.0765, + "step": 11476 + }, + { + "epoch": 2.33, + "learning_rate": 6.722346739849448e-06, + "loss": 2.0802, + "step": 11477 + }, + { + "epoch": 2.33, + "learning_rate": 6.720714872070089e-06, + "loss": 2.1153, + "step": 11478 + }, + { + "epoch": 2.33, + "learning_rate": 6.719083102134441e-06, + "loss": 2.1271, + "step": 11479 + }, + { + "epoch": 2.33, + "learning_rate": 6.71745143009119e-06, + "loss": 2.1175, + "step": 11480 + }, + { + "epoch": 2.33, + "learning_rate": 6.715819855989018e-06, + "loss": 2.118, + "step": 11481 + }, + { + "epoch": 2.33, + "learning_rate": 6.714188379876606e-06, + "loss": 2.0936, + "step": 11482 + }, + { + "epoch": 2.33, + "learning_rate": 6.7125570018026375e-06, + "loss": 2.1097, + "step": 11483 + }, + { + "epoch": 2.33, + "learning_rate": 6.710925721815784e-06, + "loss": 2.1471, + "step": 11484 + }, + { + "epoch": 2.33, + "learning_rate": 6.709294539964719e-06, + "loss": 2.1692, + "step": 11485 + }, + { + "epoch": 2.33, + "learning_rate": 6.707663456298109e-06, + "loss": 2.0329, + "step": 11486 + }, + { + "epoch": 2.33, + "learning_rate": 6.7060324708646225e-06, + "loss": 2.0793, + "step": 11487 + }, + { + "epoch": 2.33, + "learning_rate": 6.704401583712925e-06, + "loss": 2.1379, + "step": 11488 + }, + { + "epoch": 2.33, + "learning_rate": 6.702770794891675e-06, + "loss": 2.1493, + "step": 11489 + }, + { + "epoch": 2.33, + "learning_rate": 6.7011401044495304e-06, + "loss": 2.1688, + "step": 11490 + }, + { + "epoch": 2.33, + "learning_rate": 6.699509512435147e-06, + "loss": 2.2114, + "step": 11491 + }, + { + "epoch": 2.33, + "learning_rate": 6.697879018897173e-06, + "loss": 2.1278, + "step": 11492 + }, + { + "epoch": 2.33, + "learning_rate": 6.696248623884263e-06, + "loss": 2.0606, + "step": 11493 + }, + { + "epoch": 2.33, + "learning_rate": 6.694618327445057e-06, + "loss": 2.101, + "step": 11494 + }, + { + "epoch": 2.33, + "learning_rate": 6.692988129628204e-06, + "loss": 2.101, + "step": 11495 + }, + { + "epoch": 2.33, + "learning_rate": 6.69135803048234e-06, + "loss": 2.1333, + "step": 11496 + }, + { + "epoch": 2.33, + "learning_rate": 6.689728030056101e-06, + "loss": 2.1157, + "step": 11497 + }, + { + "epoch": 2.33, + "learning_rate": 6.688098128398124e-06, + "loss": 2.0576, + "step": 11498 + }, + { + "epoch": 2.33, + "learning_rate": 6.686468325557041e-06, + "loss": 2.2265, + "step": 11499 + }, + { + "epoch": 2.33, + "learning_rate": 6.684838621581477e-06, + "loss": 2.1267, + "step": 11500 + }, + { + "epoch": 2.33, + "learning_rate": 6.683209016520058e-06, + "loss": 2.1313, + "step": 11501 + }, + { + "epoch": 2.33, + "learning_rate": 6.68157951042141e-06, + "loss": 2.0492, + "step": 11502 + }, + { + "epoch": 2.33, + "learning_rate": 6.67995010333415e-06, + "loss": 2.1361, + "step": 11503 + }, + { + "epoch": 2.34, + "learning_rate": 6.678320795306894e-06, + "loss": 2.1274, + "step": 11504 + }, + { + "epoch": 2.34, + "learning_rate": 6.676691586388255e-06, + "loss": 2.1219, + "step": 11505 + }, + { + "epoch": 2.34, + "learning_rate": 6.675062476626841e-06, + "loss": 2.1266, + "step": 11506 + }, + { + "epoch": 2.34, + "learning_rate": 6.673433466071269e-06, + "loss": 2.1634, + "step": 11507 + }, + { + "epoch": 2.34, + "learning_rate": 6.671804554770135e-06, + "loss": 2.1475, + "step": 11508 + }, + { + "epoch": 2.34, + "learning_rate": 6.670175742772044e-06, + "loss": 2.135, + "step": 11509 + }, + { + "epoch": 2.34, + "learning_rate": 6.668547030125592e-06, + "loss": 2.0803, + "step": 11510 + }, + { + "epoch": 2.34, + "learning_rate": 6.666918416879376e-06, + "loss": 2.0476, + "step": 11511 + }, + { + "epoch": 2.34, + "learning_rate": 6.665289903081993e-06, + "loss": 2.0788, + "step": 11512 + }, + { + "epoch": 2.34, + "learning_rate": 6.663661488782027e-06, + "loss": 2.0894, + "step": 11513 + }, + { + "epoch": 2.34, + "learning_rate": 6.662033174028067e-06, + "loss": 2.0796, + "step": 11514 + }, + { + "epoch": 2.34, + "learning_rate": 6.660404958868698e-06, + "loss": 2.2123, + "step": 11515 + }, + { + "epoch": 2.34, + "learning_rate": 6.658776843352497e-06, + "loss": 2.1212, + "step": 11516 + }, + { + "epoch": 2.34, + "learning_rate": 6.657148827528048e-06, + "loss": 2.0735, + "step": 11517 + }, + { + "epoch": 2.34, + "learning_rate": 6.65552091144392e-06, + "loss": 2.0603, + "step": 11518 + }, + { + "epoch": 2.34, + "learning_rate": 6.6538930951486915e-06, + "loss": 2.0125, + "step": 11519 + }, + { + "epoch": 2.34, + "learning_rate": 6.652265378690925e-06, + "loss": 2.1886, + "step": 11520 + }, + { + "epoch": 2.34, + "learning_rate": 6.65063776211919e-06, + "loss": 2.0845, + "step": 11521 + }, + { + "epoch": 2.34, + "learning_rate": 6.649010245482046e-06, + "loss": 2.093, + "step": 11522 + }, + { + "epoch": 2.34, + "learning_rate": 6.6473828288280595e-06, + "loss": 2.1938, + "step": 11523 + }, + { + "epoch": 2.34, + "learning_rate": 6.645755512205783e-06, + "loss": 2.1713, + "step": 11524 + }, + { + "epoch": 2.34, + "learning_rate": 6.644128295663772e-06, + "loss": 2.1531, + "step": 11525 + }, + { + "epoch": 2.34, + "learning_rate": 6.642501179250573e-06, + "loss": 2.0554, + "step": 11526 + }, + { + "epoch": 2.34, + "learning_rate": 6.640874163014741e-06, + "loss": 2.1143, + "step": 11527 + }, + { + "epoch": 2.34, + "learning_rate": 6.639247247004821e-06, + "loss": 2.101, + "step": 11528 + }, + { + "epoch": 2.34, + "learning_rate": 6.63762043126935e-06, + "loss": 2.1378, + "step": 11529 + }, + { + "epoch": 2.34, + "learning_rate": 6.63599371585687e-06, + "loss": 2.1548, + "step": 11530 + }, + { + "epoch": 2.34, + "learning_rate": 6.634367100815913e-06, + "loss": 2.1237, + "step": 11531 + }, + { + "epoch": 2.34, + "learning_rate": 6.63274058619502e-06, + "loss": 2.1748, + "step": 11532 + }, + { + "epoch": 2.34, + "learning_rate": 6.631114172042716e-06, + "loss": 2.1899, + "step": 11533 + }, + { + "epoch": 2.34, + "learning_rate": 6.629487858407528e-06, + "loss": 2.0299, + "step": 11534 + }, + { + "epoch": 2.34, + "learning_rate": 6.627861645337984e-06, + "loss": 2.1439, + "step": 11535 + }, + { + "epoch": 2.34, + "learning_rate": 6.6262355328826e-06, + "loss": 2.0357, + "step": 11536 + }, + { + "epoch": 2.34, + "learning_rate": 6.624609521089898e-06, + "loss": 2.0499, + "step": 11537 + }, + { + "epoch": 2.34, + "learning_rate": 6.622983610008391e-06, + "loss": 2.0948, + "step": 11538 + }, + { + "epoch": 2.34, + "learning_rate": 6.6213577996865945e-06, + "loss": 2.1258, + "step": 11539 + }, + { + "epoch": 2.34, + "learning_rate": 6.619732090173012e-06, + "loss": 2.1846, + "step": 11540 + }, + { + "epoch": 2.34, + "learning_rate": 6.618106481516153e-06, + "loss": 2.0878, + "step": 11541 + }, + { + "epoch": 2.34, + "learning_rate": 6.6164809737645205e-06, + "loss": 2.0973, + "step": 11542 + }, + { + "epoch": 2.34, + "learning_rate": 6.614855566966616e-06, + "loss": 2.1283, + "step": 11543 + }, + { + "epoch": 2.34, + "learning_rate": 6.6132302611709355e-06, + "loss": 2.1128, + "step": 11544 + }, + { + "epoch": 2.34, + "learning_rate": 6.611605056425971e-06, + "loss": 2.0947, + "step": 11545 + }, + { + "epoch": 2.34, + "learning_rate": 6.609979952780213e-06, + "loss": 2.1602, + "step": 11546 + }, + { + "epoch": 2.34, + "learning_rate": 6.608354950282156e-06, + "loss": 2.1075, + "step": 11547 + }, + { + "epoch": 2.34, + "learning_rate": 6.60673004898028e-06, + "loss": 2.1456, + "step": 11548 + }, + { + "epoch": 2.34, + "learning_rate": 6.605105248923069e-06, + "loss": 2.069, + "step": 11549 + }, + { + "epoch": 2.34, + "learning_rate": 6.603480550158999e-06, + "loss": 2.0833, + "step": 11550 + }, + { + "epoch": 2.34, + "learning_rate": 6.601855952736545e-06, + "loss": 2.1022, + "step": 11551 + }, + { + "epoch": 2.34, + "learning_rate": 6.600231456704187e-06, + "loss": 2.1336, + "step": 11552 + }, + { + "epoch": 2.35, + "learning_rate": 6.598607062110389e-06, + "loss": 2.1929, + "step": 11553 + }, + { + "epoch": 2.35, + "learning_rate": 6.596982769003621e-06, + "loss": 2.2193, + "step": 11554 + }, + { + "epoch": 2.35, + "learning_rate": 6.595358577432345e-06, + "loss": 2.1533, + "step": 11555 + }, + { + "epoch": 2.35, + "learning_rate": 6.5937344874450204e-06, + "loss": 2.1076, + "step": 11556 + }, + { + "epoch": 2.35, + "learning_rate": 6.59211049909011e-06, + "loss": 2.1495, + "step": 11557 + }, + { + "epoch": 2.35, + "learning_rate": 6.5904866124160626e-06, + "loss": 2.0895, + "step": 11558 + }, + { + "epoch": 2.35, + "learning_rate": 6.588862827471334e-06, + "loss": 2.0694, + "step": 11559 + }, + { + "epoch": 2.35, + "learning_rate": 6.587239144304374e-06, + "loss": 2.2256, + "step": 11560 + }, + { + "epoch": 2.35, + "learning_rate": 6.585615562963623e-06, + "loss": 2.1101, + "step": 11561 + }, + { + "epoch": 2.35, + "learning_rate": 6.583992083497527e-06, + "loss": 2.1068, + "step": 11562 + }, + { + "epoch": 2.35, + "learning_rate": 6.5823687059545275e-06, + "loss": 2.0451, + "step": 11563 + }, + { + "epoch": 2.35, + "learning_rate": 6.580745430383059e-06, + "loss": 2.1228, + "step": 11564 + }, + { + "epoch": 2.35, + "learning_rate": 6.579122256831554e-06, + "loss": 2.1051, + "step": 11565 + }, + { + "epoch": 2.35, + "learning_rate": 6.577499185348441e-06, + "loss": 2.1249, + "step": 11566 + }, + { + "epoch": 2.35, + "learning_rate": 6.575876215982153e-06, + "loss": 2.0997, + "step": 11567 + }, + { + "epoch": 2.35, + "learning_rate": 6.574253348781112e-06, + "loss": 2.1441, + "step": 11568 + }, + { + "epoch": 2.35, + "learning_rate": 6.57263058379374e-06, + "loss": 2.1484, + "step": 11569 + }, + { + "epoch": 2.35, + "learning_rate": 6.571007921068453e-06, + "loss": 2.1131, + "step": 11570 + }, + { + "epoch": 2.35, + "learning_rate": 6.569385360653665e-06, + "loss": 2.1577, + "step": 11571 + }, + { + "epoch": 2.35, + "learning_rate": 6.567762902597794e-06, + "loss": 2.1107, + "step": 11572 + }, + { + "epoch": 2.35, + "learning_rate": 6.566140546949245e-06, + "loss": 2.1008, + "step": 11573 + }, + { + "epoch": 2.35, + "learning_rate": 6.564518293756426e-06, + "loss": 2.0855, + "step": 11574 + }, + { + "epoch": 2.35, + "learning_rate": 6.562896143067736e-06, + "loss": 2.1939, + "step": 11575 + }, + { + "epoch": 2.35, + "learning_rate": 6.561274094931579e-06, + "loss": 2.1076, + "step": 11576 + }, + { + "epoch": 2.35, + "learning_rate": 6.559652149396351e-06, + "loss": 2.076, + "step": 11577 + }, + { + "epoch": 2.35, + "learning_rate": 6.558030306510448e-06, + "loss": 2.1872, + "step": 11578 + }, + { + "epoch": 2.35, + "learning_rate": 6.556408566322254e-06, + "loss": 2.0732, + "step": 11579 + }, + { + "epoch": 2.35, + "learning_rate": 6.554786928880165e-06, + "loss": 2.048, + "step": 11580 + }, + { + "epoch": 2.35, + "learning_rate": 6.553165394232558e-06, + "loss": 2.1214, + "step": 11581 + }, + { + "epoch": 2.35, + "learning_rate": 6.5515439624278184e-06, + "loss": 2.1114, + "step": 11582 + }, + { + "epoch": 2.35, + "learning_rate": 6.549922633514328e-06, + "loss": 2.0495, + "step": 11583 + }, + { + "epoch": 2.35, + "learning_rate": 6.548301407540458e-06, + "loss": 2.0595, + "step": 11584 + }, + { + "epoch": 2.35, + "learning_rate": 6.546680284554581e-06, + "loss": 2.0422, + "step": 11585 + }, + { + "epoch": 2.35, + "learning_rate": 6.545059264605063e-06, + "loss": 2.0543, + "step": 11586 + }, + { + "epoch": 2.35, + "learning_rate": 6.543438347740278e-06, + "loss": 2.0519, + "step": 11587 + }, + { + "epoch": 2.35, + "learning_rate": 6.541817534008586e-06, + "loss": 2.144, + "step": 11588 + }, + { + "epoch": 2.35, + "learning_rate": 6.5401968234583444e-06, + "loss": 2.0913, + "step": 11589 + }, + { + "epoch": 2.35, + "learning_rate": 6.538576216137911e-06, + "loss": 2.1497, + "step": 11590 + }, + { + "epoch": 2.35, + "learning_rate": 6.536955712095642e-06, + "loss": 2.2229, + "step": 11591 + }, + { + "epoch": 2.35, + "learning_rate": 6.535335311379888e-06, + "loss": 2.1475, + "step": 11592 + }, + { + "epoch": 2.35, + "learning_rate": 6.533715014038996e-06, + "loss": 2.1924, + "step": 11593 + }, + { + "epoch": 2.35, + "learning_rate": 6.5320948201213095e-06, + "loss": 2.1684, + "step": 11594 + }, + { + "epoch": 2.35, + "learning_rate": 6.530474729675167e-06, + "loss": 2.1325, + "step": 11595 + }, + { + "epoch": 2.35, + "learning_rate": 6.528854742748916e-06, + "loss": 2.1115, + "step": 11596 + }, + { + "epoch": 2.35, + "learning_rate": 6.527234859390885e-06, + "loss": 2.1573, + "step": 11597 + }, + { + "epoch": 2.35, + "learning_rate": 6.52561507964941e-06, + "loss": 2.1504, + "step": 11598 + }, + { + "epoch": 2.35, + "learning_rate": 6.523995403572816e-06, + "loss": 2.1252, + "step": 11599 + }, + { + "epoch": 2.35, + "learning_rate": 6.5223758312094294e-06, + "loss": 2.1454, + "step": 11600 + }, + { + "epoch": 2.35, + "learning_rate": 6.520756362607579e-06, + "loss": 2.1198, + "step": 11601 + }, + { + "epoch": 2.36, + "learning_rate": 6.51913699781558e-06, + "loss": 2.1203, + "step": 11602 + }, + { + "epoch": 2.36, + "learning_rate": 6.517517736881748e-06, + "loss": 2.1066, + "step": 11603 + }, + { + "epoch": 2.36, + "learning_rate": 6.515898579854403e-06, + "loss": 2.0509, + "step": 11604 + }, + { + "epoch": 2.36, + "learning_rate": 6.514279526781848e-06, + "loss": 2.1108, + "step": 11605 + }, + { + "epoch": 2.36, + "learning_rate": 6.512660577712395e-06, + "loss": 2.0428, + "step": 11606 + }, + { + "epoch": 2.36, + "learning_rate": 6.511041732694349e-06, + "loss": 1.9975, + "step": 11607 + }, + { + "epoch": 2.36, + "learning_rate": 6.509422991776009e-06, + "loss": 2.1101, + "step": 11608 + }, + { + "epoch": 2.36, + "learning_rate": 6.507804355005675e-06, + "loss": 2.1674, + "step": 11609 + }, + { + "epoch": 2.36, + "learning_rate": 6.506185822431636e-06, + "loss": 2.1123, + "step": 11610 + }, + { + "epoch": 2.36, + "learning_rate": 6.504567394102195e-06, + "loss": 2.1372, + "step": 11611 + }, + { + "epoch": 2.36, + "learning_rate": 6.5029490700656336e-06, + "loss": 2.1624, + "step": 11612 + }, + { + "epoch": 2.36, + "learning_rate": 6.501330850370239e-06, + "loss": 2.0377, + "step": 11613 + }, + { + "epoch": 2.36, + "learning_rate": 6.499712735064294e-06, + "loss": 2.0728, + "step": 11614 + }, + { + "epoch": 2.36, + "learning_rate": 6.498094724196074e-06, + "loss": 2.0905, + "step": 11615 + }, + { + "epoch": 2.36, + "learning_rate": 6.496476817813864e-06, + "loss": 2.1255, + "step": 11616 + }, + { + "epoch": 2.36, + "learning_rate": 6.494859015965933e-06, + "loss": 2.0714, + "step": 11617 + }, + { + "epoch": 2.36, + "learning_rate": 6.49324131870055e-06, + "loss": 2.1195, + "step": 11618 + }, + { + "epoch": 2.36, + "learning_rate": 6.491623726065981e-06, + "loss": 2.0454, + "step": 11619 + }, + { + "epoch": 2.36, + "learning_rate": 6.4900062381104926e-06, + "loss": 2.1463, + "step": 11620 + }, + { + "epoch": 2.36, + "learning_rate": 6.488388854882346e-06, + "loss": 2.1438, + "step": 11621 + }, + { + "epoch": 2.36, + "learning_rate": 6.4867715764298e-06, + "loss": 2.0374, + "step": 11622 + }, + { + "epoch": 2.36, + "learning_rate": 6.485154402801104e-06, + "loss": 2.0901, + "step": 11623 + }, + { + "epoch": 2.36, + "learning_rate": 6.483537334044514e-06, + "loss": 2.086, + "step": 11624 + }, + { + "epoch": 2.36, + "learning_rate": 6.481920370208274e-06, + "loss": 2.1061, + "step": 11625 + }, + { + "epoch": 2.36, + "learning_rate": 6.480303511340635e-06, + "loss": 2.0833, + "step": 11626 + }, + { + "epoch": 2.36, + "learning_rate": 6.4786867574898345e-06, + "loss": 2.1469, + "step": 11627 + }, + { + "epoch": 2.36, + "learning_rate": 6.477070108704115e-06, + "loss": 2.0558, + "step": 11628 + }, + { + "epoch": 2.36, + "learning_rate": 6.4754535650317105e-06, + "loss": 2.0991, + "step": 11629 + }, + { + "epoch": 2.36, + "learning_rate": 6.47383712652085e-06, + "loss": 2.1041, + "step": 11630 + }, + { + "epoch": 2.36, + "learning_rate": 6.47222079321977e-06, + "loss": 2.0867, + "step": 11631 + }, + { + "epoch": 2.36, + "learning_rate": 6.470604565176693e-06, + "loss": 2.1369, + "step": 11632 + }, + { + "epoch": 2.36, + "learning_rate": 6.468988442439843e-06, + "loss": 2.169, + "step": 11633 + }, + { + "epoch": 2.36, + "learning_rate": 6.4673724250574385e-06, + "loss": 2.1166, + "step": 11634 + }, + { + "epoch": 2.36, + "learning_rate": 6.465756513077694e-06, + "loss": 2.1548, + "step": 11635 + }, + { + "epoch": 2.36, + "learning_rate": 6.464140706548833e-06, + "loss": 2.1218, + "step": 11636 + }, + { + "epoch": 2.36, + "learning_rate": 6.462525005519058e-06, + "loss": 2.0814, + "step": 11637 + }, + { + "epoch": 2.36, + "learning_rate": 6.46090941003658e-06, + "loss": 2.1095, + "step": 11638 + }, + { + "epoch": 2.36, + "learning_rate": 6.4592939201496e-06, + "loss": 2.1454, + "step": 11639 + }, + { + "epoch": 2.36, + "learning_rate": 6.45767853590632e-06, + "loss": 2.1538, + "step": 11640 + }, + { + "epoch": 2.36, + "learning_rate": 6.456063257354943e-06, + "loss": 2.0834, + "step": 11641 + }, + { + "epoch": 2.36, + "learning_rate": 6.454448084543659e-06, + "loss": 2.1142, + "step": 11642 + }, + { + "epoch": 2.36, + "learning_rate": 6.4528330175206585e-06, + "loss": 2.1775, + "step": 11643 + }, + { + "epoch": 2.36, + "learning_rate": 6.451218056334136e-06, + "loss": 2.0686, + "step": 11644 + }, + { + "epoch": 2.36, + "learning_rate": 6.44960320103227e-06, + "loss": 2.0965, + "step": 11645 + }, + { + "epoch": 2.36, + "learning_rate": 6.447988451663249e-06, + "loss": 2.2036, + "step": 11646 + }, + { + "epoch": 2.36, + "learning_rate": 6.446373808275248e-06, + "loss": 2.1548, + "step": 11647 + }, + { + "epoch": 2.36, + "learning_rate": 6.444759270916445e-06, + "loss": 2.0458, + "step": 11648 + }, + { + "epoch": 2.36, + "learning_rate": 6.443144839635014e-06, + "loss": 2.0799, + "step": 11649 + }, + { + "epoch": 2.36, + "learning_rate": 6.4415305144791195e-06, + "loss": 2.0902, + "step": 11650 + }, + { + "epoch": 2.36, + "learning_rate": 6.439916295496932e-06, + "loss": 2.1094, + "step": 11651 + }, + { + "epoch": 2.37, + "learning_rate": 6.438302182736616e-06, + "loss": 2.0983, + "step": 11652 + }, + { + "epoch": 2.37, + "learning_rate": 6.436688176246328e-06, + "loss": 2.1074, + "step": 11653 + }, + { + "epoch": 2.37, + "learning_rate": 6.435074276074229e-06, + "loss": 2.0851, + "step": 11654 + }, + { + "epoch": 2.37, + "learning_rate": 6.4334604822684645e-06, + "loss": 2.0101, + "step": 11655 + }, + { + "epoch": 2.37, + "learning_rate": 6.431846794877196e-06, + "loss": 2.0806, + "step": 11656 + }, + { + "epoch": 2.37, + "learning_rate": 6.430233213948564e-06, + "loss": 2.1008, + "step": 11657 + }, + { + "epoch": 2.37, + "learning_rate": 6.428619739530717e-06, + "loss": 2.1363, + "step": 11658 + }, + { + "epoch": 2.37, + "learning_rate": 6.427006371671794e-06, + "loss": 2.0331, + "step": 11659 + }, + { + "epoch": 2.37, + "learning_rate": 6.425393110419928e-06, + "loss": 2.1259, + "step": 11660 + }, + { + "epoch": 2.37, + "learning_rate": 6.4237799558232614e-06, + "loss": 2.1704, + "step": 11661 + }, + { + "epoch": 2.37, + "learning_rate": 6.422166907929923e-06, + "loss": 2.0988, + "step": 11662 + }, + { + "epoch": 2.37, + "learning_rate": 6.42055396678804e-06, + "loss": 2.1041, + "step": 11663 + }, + { + "epoch": 2.37, + "learning_rate": 6.4189411324457394e-06, + "loss": 2.1183, + "step": 11664 + }, + { + "epoch": 2.37, + "learning_rate": 6.4173284049511406e-06, + "loss": 2.1026, + "step": 11665 + }, + { + "epoch": 2.37, + "learning_rate": 6.4157157843523665e-06, + "loss": 2.0401, + "step": 11666 + }, + { + "epoch": 2.37, + "learning_rate": 6.414103270697527e-06, + "loss": 2.1699, + "step": 11667 + }, + { + "epoch": 2.37, + "learning_rate": 6.412490864034742e-06, + "loss": 2.1167, + "step": 11668 + }, + { + "epoch": 2.37, + "learning_rate": 6.410878564412114e-06, + "loss": 2.0496, + "step": 11669 + }, + { + "epoch": 2.37, + "learning_rate": 6.409266371877751e-06, + "loss": 2.2144, + "step": 11670 + }, + { + "epoch": 2.37, + "learning_rate": 6.407654286479755e-06, + "loss": 2.11, + "step": 11671 + }, + { + "epoch": 2.37, + "learning_rate": 6.406042308266229e-06, + "loss": 2.1565, + "step": 11672 + }, + { + "epoch": 2.37, + "learning_rate": 6.404430437285269e-06, + "loss": 2.1761, + "step": 11673 + }, + { + "epoch": 2.37, + "learning_rate": 6.402818673584965e-06, + "loss": 2.0671, + "step": 11674 + }, + { + "epoch": 2.37, + "learning_rate": 6.401207017213407e-06, + "loss": 2.0899, + "step": 11675 + }, + { + "epoch": 2.37, + "learning_rate": 6.399595468218684e-06, + "loss": 2.1498, + "step": 11676 + }, + { + "epoch": 2.37, + "learning_rate": 6.397984026648882e-06, + "loss": 2.1664, + "step": 11677 + }, + { + "epoch": 2.37, + "learning_rate": 6.396372692552077e-06, + "loss": 2.1205, + "step": 11678 + }, + { + "epoch": 2.37, + "learning_rate": 6.394761465976349e-06, + "loss": 2.137, + "step": 11679 + }, + { + "epoch": 2.37, + "learning_rate": 6.393150346969766e-06, + "loss": 2.1102, + "step": 11680 + }, + { + "epoch": 2.37, + "learning_rate": 6.391539335580408e-06, + "loss": 2.0985, + "step": 11681 + }, + { + "epoch": 2.37, + "learning_rate": 6.389928431856337e-06, + "loss": 2.1093, + "step": 11682 + }, + { + "epoch": 2.37, + "learning_rate": 6.388317635845619e-06, + "loss": 2.1647, + "step": 11683 + }, + { + "epoch": 2.37, + "learning_rate": 6.3867069475963104e-06, + "loss": 2.0949, + "step": 11684 + }, + { + "epoch": 2.37, + "learning_rate": 6.3850963671564785e-06, + "loss": 2.1491, + "step": 11685 + }, + { + "epoch": 2.37, + "learning_rate": 6.383485894574173e-06, + "loss": 2.1379, + "step": 11686 + }, + { + "epoch": 2.37, + "learning_rate": 6.381875529897442e-06, + "loss": 2.0522, + "step": 11687 + }, + { + "epoch": 2.37, + "learning_rate": 6.38026527317434e-06, + "loss": 2.0477, + "step": 11688 + }, + { + "epoch": 2.37, + "learning_rate": 6.378655124452908e-06, + "loss": 2.052, + "step": 11689 + }, + { + "epoch": 2.37, + "learning_rate": 6.37704508378119e-06, + "loss": 2.1879, + "step": 11690 + }, + { + "epoch": 2.37, + "learning_rate": 6.3754351512072225e-06, + "loss": 2.0951, + "step": 11691 + }, + { + "epoch": 2.37, + "learning_rate": 6.373825326779044e-06, + "loss": 2.0811, + "step": 11692 + }, + { + "epoch": 2.37, + "learning_rate": 6.372215610544684e-06, + "loss": 2.0227, + "step": 11693 + }, + { + "epoch": 2.37, + "learning_rate": 6.370606002552171e-06, + "loss": 2.0629, + "step": 11694 + }, + { + "epoch": 2.37, + "learning_rate": 6.368996502849531e-06, + "loss": 2.1686, + "step": 11695 + }, + { + "epoch": 2.37, + "learning_rate": 6.367387111484791e-06, + "loss": 2.1496, + "step": 11696 + }, + { + "epoch": 2.37, + "learning_rate": 6.3657778285059655e-06, + "loss": 2.1058, + "step": 11697 + }, + { + "epoch": 2.37, + "learning_rate": 6.364168653961074e-06, + "loss": 2.1202, + "step": 11698 + }, + { + "epoch": 2.37, + "learning_rate": 6.362559587898121e-06, + "loss": 2.1656, + "step": 11699 + }, + { + "epoch": 2.37, + "learning_rate": 6.360950630365126e-06, + "loss": 2.0914, + "step": 11700 + }, + { + "epoch": 2.38, + "learning_rate": 6.359341781410091e-06, + "loss": 2.1061, + "step": 11701 + }, + { + "epoch": 2.38, + "learning_rate": 6.35773304108102e-06, + "loss": 2.066, + "step": 11702 + }, + { + "epoch": 2.38, + "learning_rate": 6.356124409425913e-06, + "loss": 2.0402, + "step": 11703 + }, + { + "epoch": 2.38, + "learning_rate": 6.354515886492761e-06, + "loss": 2.1229, + "step": 11704 + }, + { + "epoch": 2.38, + "learning_rate": 6.352907472329566e-06, + "loss": 2.1255, + "step": 11705 + }, + { + "epoch": 2.38, + "learning_rate": 6.351299166984315e-06, + "loss": 2.1312, + "step": 11706 + }, + { + "epoch": 2.38, + "learning_rate": 6.349690970504994e-06, + "loss": 2.1247, + "step": 11707 + }, + { + "epoch": 2.38, + "learning_rate": 6.348082882939584e-06, + "loss": 2.1121, + "step": 11708 + }, + { + "epoch": 2.38, + "learning_rate": 6.3464749043360685e-06, + "loss": 2.1488, + "step": 11709 + }, + { + "epoch": 2.38, + "learning_rate": 6.344867034742426e-06, + "loss": 2.2228, + "step": 11710 + }, + { + "epoch": 2.38, + "learning_rate": 6.343259274206628e-06, + "loss": 2.141, + "step": 11711 + }, + { + "epoch": 2.38, + "learning_rate": 6.341651622776645e-06, + "loss": 2.1469, + "step": 11712 + }, + { + "epoch": 2.38, + "learning_rate": 6.340044080500447e-06, + "loss": 2.131, + "step": 11713 + }, + { + "epoch": 2.38, + "learning_rate": 6.338436647425994e-06, + "loss": 2.1113, + "step": 11714 + }, + { + "epoch": 2.38, + "learning_rate": 6.3368293236012475e-06, + "loss": 1.9996, + "step": 11715 + }, + { + "epoch": 2.38, + "learning_rate": 6.335222109074171e-06, + "loss": 2.0328, + "step": 11716 + }, + { + "epoch": 2.38, + "learning_rate": 6.333615003892711e-06, + "loss": 2.0198, + "step": 11717 + }, + { + "epoch": 2.38, + "learning_rate": 6.3320080081048245e-06, + "loss": 2.1754, + "step": 11718 + }, + { + "epoch": 2.38, + "learning_rate": 6.330401121758451e-06, + "loss": 2.144, + "step": 11719 + }, + { + "epoch": 2.38, + "learning_rate": 6.328794344901545e-06, + "loss": 2.0607, + "step": 11720 + }, + { + "epoch": 2.38, + "learning_rate": 6.327187677582043e-06, + "loss": 2.1593, + "step": 11721 + }, + { + "epoch": 2.38, + "learning_rate": 6.325581119847883e-06, + "loss": 2.0943, + "step": 11722 + }, + { + "epoch": 2.38, + "learning_rate": 6.323974671747001e-06, + "loss": 2.1265, + "step": 11723 + }, + { + "epoch": 2.38, + "learning_rate": 6.322368333327322e-06, + "loss": 2.1339, + "step": 11724 + }, + { + "epoch": 2.38, + "learning_rate": 6.320762104636786e-06, + "loss": 2.1679, + "step": 11725 + }, + { + "epoch": 2.38, + "learning_rate": 6.31915598572331e-06, + "loss": 2.1728, + "step": 11726 + }, + { + "epoch": 2.38, + "learning_rate": 6.3175499766348166e-06, + "loss": 2.1615, + "step": 11727 + }, + { + "epoch": 2.38, + "learning_rate": 6.3159440774192225e-06, + "loss": 2.096, + "step": 11728 + }, + { + "epoch": 2.38, + "learning_rate": 6.314338288124447e-06, + "loss": 2.1764, + "step": 11729 + }, + { + "epoch": 2.38, + "learning_rate": 6.3127326087984e-06, + "loss": 2.0977, + "step": 11730 + }, + { + "epoch": 2.38, + "learning_rate": 6.31112703948899e-06, + "loss": 2.0975, + "step": 11731 + }, + { + "epoch": 2.38, + "learning_rate": 6.30952158024412e-06, + "loss": 2.126, + "step": 11732 + }, + { + "epoch": 2.38, + "learning_rate": 6.307916231111696e-06, + "loss": 2.1135, + "step": 11733 + }, + { + "epoch": 2.38, + "learning_rate": 6.306310992139612e-06, + "loss": 2.1763, + "step": 11734 + }, + { + "epoch": 2.38, + "learning_rate": 6.304705863375766e-06, + "loss": 2.0892, + "step": 11735 + }, + { + "epoch": 2.38, + "learning_rate": 6.303100844868053e-06, + "loss": 2.1374, + "step": 11736 + }, + { + "epoch": 2.38, + "learning_rate": 6.3014959366643595e-06, + "loss": 2.1763, + "step": 11737 + }, + { + "epoch": 2.38, + "learning_rate": 6.299891138812568e-06, + "loss": 2.0811, + "step": 11738 + }, + { + "epoch": 2.38, + "learning_rate": 6.29828645136056e-06, + "loss": 2.088, + "step": 11739 + }, + { + "epoch": 2.38, + "learning_rate": 6.296681874356221e-06, + "loss": 2.141, + "step": 11740 + }, + { + "epoch": 2.38, + "learning_rate": 6.295077407847423e-06, + "loss": 2.0922, + "step": 11741 + }, + { + "epoch": 2.38, + "learning_rate": 6.293473051882039e-06, + "loss": 1.9662, + "step": 11742 + }, + { + "epoch": 2.38, + "learning_rate": 6.291868806507937e-06, + "loss": 2.0927, + "step": 11743 + }, + { + "epoch": 2.38, + "learning_rate": 6.2902646717729786e-06, + "loss": 2.1489, + "step": 11744 + }, + { + "epoch": 2.38, + "learning_rate": 6.2886606477250345e-06, + "loss": 2.236, + "step": 11745 + }, + { + "epoch": 2.38, + "learning_rate": 6.287056734411961e-06, + "loss": 2.0621, + "step": 11746 + }, + { + "epoch": 2.38, + "learning_rate": 6.28545293188161e-06, + "loss": 2.1787, + "step": 11747 + }, + { + "epoch": 2.38, + "learning_rate": 6.283849240181838e-06, + "loss": 2.1301, + "step": 11748 + }, + { + "epoch": 2.38, + "learning_rate": 6.282245659360491e-06, + "loss": 2.057, + "step": 11749 + }, + { + "epoch": 2.39, + "learning_rate": 6.28064218946542e-06, + "loss": 2.1906, + "step": 11750 + }, + { + "epoch": 2.39, + "learning_rate": 6.2790388305444634e-06, + "loss": 2.0907, + "step": 11751 + }, + { + "epoch": 2.39, + "learning_rate": 6.277435582645461e-06, + "loss": 2.0906, + "step": 11752 + }, + { + "epoch": 2.39, + "learning_rate": 6.27583244581625e-06, + "loss": 2.1234, + "step": 11753 + }, + { + "epoch": 2.39, + "learning_rate": 6.2742294201046615e-06, + "loss": 2.1477, + "step": 11754 + }, + { + "epoch": 2.39, + "learning_rate": 6.272626505558528e-06, + "loss": 2.0995, + "step": 11755 + }, + { + "epoch": 2.39, + "learning_rate": 6.2710237022256695e-06, + "loss": 2.1698, + "step": 11756 + }, + { + "epoch": 2.39, + "learning_rate": 6.2694210101539155e-06, + "loss": 2.1007, + "step": 11757 + }, + { + "epoch": 2.39, + "learning_rate": 6.267818429391081e-06, + "loss": 2.132, + "step": 11758 + }, + { + "epoch": 2.39, + "learning_rate": 6.26621595998498e-06, + "loss": 2.1598, + "step": 11759 + }, + { + "epoch": 2.39, + "learning_rate": 6.264613601983432e-06, + "loss": 2.0869, + "step": 11760 + }, + { + "epoch": 2.39, + "learning_rate": 6.263011355434243e-06, + "loss": 2.0492, + "step": 11761 + }, + { + "epoch": 2.39, + "learning_rate": 6.2614092203852194e-06, + "loss": 2.0991, + "step": 11762 + }, + { + "epoch": 2.39, + "learning_rate": 6.259807196884164e-06, + "loss": 2.1907, + "step": 11763 + }, + { + "epoch": 2.39, + "learning_rate": 6.258205284978871e-06, + "loss": 2.1557, + "step": 11764 + }, + { + "epoch": 2.39, + "learning_rate": 6.256603484717145e-06, + "loss": 2.1481, + "step": 11765 + }, + { + "epoch": 2.39, + "learning_rate": 6.255001796146776e-06, + "loss": 2.1441, + "step": 11766 + }, + { + "epoch": 2.39, + "learning_rate": 6.2534002193155505e-06, + "loss": 2.1471, + "step": 11767 + }, + { + "epoch": 2.39, + "learning_rate": 6.251798754271257e-06, + "loss": 2.1161, + "step": 11768 + }, + { + "epoch": 2.39, + "learning_rate": 6.250197401061675e-06, + "loss": 2.0927, + "step": 11769 + }, + { + "epoch": 2.39, + "learning_rate": 6.24859615973459e-06, + "loss": 2.0813, + "step": 11770 + }, + { + "epoch": 2.39, + "learning_rate": 6.246995030337774e-06, + "loss": 2.0604, + "step": 11771 + }, + { + "epoch": 2.39, + "learning_rate": 6.245394012919e-06, + "loss": 2.133, + "step": 11772 + }, + { + "epoch": 2.39, + "learning_rate": 6.243793107526037e-06, + "loss": 2.0902, + "step": 11773 + }, + { + "epoch": 2.39, + "learning_rate": 6.242192314206654e-06, + "loss": 2.0705, + "step": 11774 + }, + { + "epoch": 2.39, + "learning_rate": 6.2405916330086106e-06, + "loss": 2.1018, + "step": 11775 + }, + { + "epoch": 2.39, + "learning_rate": 6.238991063979667e-06, + "loss": 2.0726, + "step": 11776 + }, + { + "epoch": 2.39, + "learning_rate": 6.23739060716758e-06, + "loss": 2.1184, + "step": 11777 + }, + { + "epoch": 2.39, + "learning_rate": 6.2357902626201e-06, + "loss": 2.0539, + "step": 11778 + }, + { + "epoch": 2.39, + "learning_rate": 6.2341900303849816e-06, + "loss": 2.1401, + "step": 11779 + }, + { + "epoch": 2.39, + "learning_rate": 6.232589910509963e-06, + "loss": 2.0865, + "step": 11780 + }, + { + "epoch": 2.39, + "learning_rate": 6.230989903042794e-06, + "loss": 2.0998, + "step": 11781 + }, + { + "epoch": 2.39, + "learning_rate": 6.229390008031211e-06, + "loss": 2.1289, + "step": 11782 + }, + { + "epoch": 2.39, + "learning_rate": 6.227790225522945e-06, + "loss": 2.0578, + "step": 11783 + }, + { + "epoch": 2.39, + "learning_rate": 6.226190555565739e-06, + "loss": 2.0753, + "step": 11784 + }, + { + "epoch": 2.39, + "learning_rate": 6.224590998207314e-06, + "loss": 2.0525, + "step": 11785 + }, + { + "epoch": 2.39, + "learning_rate": 6.222991553495401e-06, + "loss": 2.1165, + "step": 11786 + }, + { + "epoch": 2.39, + "learning_rate": 6.221392221477718e-06, + "loss": 2.0906, + "step": 11787 + }, + { + "epoch": 2.39, + "learning_rate": 6.219793002201983e-06, + "loss": 2.0976, + "step": 11788 + }, + { + "epoch": 2.39, + "learning_rate": 6.218193895715919e-06, + "loss": 2.1702, + "step": 11789 + }, + { + "epoch": 2.39, + "learning_rate": 6.216594902067233e-06, + "loss": 2.0925, + "step": 11790 + }, + { + "epoch": 2.39, + "learning_rate": 6.2149960213036355e-06, + "loss": 2.0963, + "step": 11791 + }, + { + "epoch": 2.39, + "learning_rate": 6.21339725347283e-06, + "loss": 2.1025, + "step": 11792 + }, + { + "epoch": 2.39, + "learning_rate": 6.211798598622521e-06, + "loss": 2.0958, + "step": 11793 + }, + { + "epoch": 2.39, + "learning_rate": 6.210200056800408e-06, + "loss": 2.1321, + "step": 11794 + }, + { + "epoch": 2.39, + "learning_rate": 6.208601628054185e-06, + "loss": 2.1372, + "step": 11795 + }, + { + "epoch": 2.39, + "learning_rate": 6.207003312431543e-06, + "loss": 2.1206, + "step": 11796 + }, + { + "epoch": 2.39, + "learning_rate": 6.205405109980176e-06, + "loss": 2.0636, + "step": 11797 + }, + { + "epoch": 2.39, + "learning_rate": 6.203807020747763e-06, + "loss": 2.0884, + "step": 11798 + }, + { + "epoch": 2.4, + "learning_rate": 6.202209044781991e-06, + "loss": 2.075, + "step": 11799 + }, + { + "epoch": 2.4, + "learning_rate": 6.200611182130533e-06, + "loss": 2.0983, + "step": 11800 + }, + { + "epoch": 2.4, + "learning_rate": 6.199013432841072e-06, + "loss": 2.114, + "step": 11801 + }, + { + "epoch": 2.4, + "learning_rate": 6.197415796961275e-06, + "loss": 2.0686, + "step": 11802 + }, + { + "epoch": 2.4, + "learning_rate": 6.195818274538809e-06, + "loss": 2.0883, + "step": 11803 + }, + { + "epoch": 2.4, + "learning_rate": 6.194220865621341e-06, + "loss": 2.1405, + "step": 11804 + }, + { + "epoch": 2.4, + "learning_rate": 6.192623570256535e-06, + "loss": 2.1427, + "step": 11805 + }, + { + "epoch": 2.4, + "learning_rate": 6.1910263884920475e-06, + "loss": 2.1477, + "step": 11806 + }, + { + "epoch": 2.4, + "learning_rate": 6.189429320375534e-06, + "loss": 2.0573, + "step": 11807 + }, + { + "epoch": 2.4, + "learning_rate": 6.1878323659546416e-06, + "loss": 2.0871, + "step": 11808 + }, + { + "epoch": 2.4, + "learning_rate": 6.186235525277026e-06, + "loss": 2.1566, + "step": 11809 + }, + { + "epoch": 2.4, + "learning_rate": 6.1846387983903276e-06, + "loss": 2.1633, + "step": 11810 + }, + { + "epoch": 2.4, + "learning_rate": 6.183042185342189e-06, + "loss": 2.1005, + "step": 11811 + }, + { + "epoch": 2.4, + "learning_rate": 6.1814456861802475e-06, + "loss": 2.1126, + "step": 11812 + }, + { + "epoch": 2.4, + "learning_rate": 6.179849300952135e-06, + "loss": 2.0641, + "step": 11813 + }, + { + "epoch": 2.4, + "learning_rate": 6.178253029705487e-06, + "loss": 2.0598, + "step": 11814 + }, + { + "epoch": 2.4, + "learning_rate": 6.1766568724879316e-06, + "loss": 2.0946, + "step": 11815 + }, + { + "epoch": 2.4, + "learning_rate": 6.175060829347089e-06, + "loss": 2.1441, + "step": 11816 + }, + { + "epoch": 2.4, + "learning_rate": 6.173464900330585e-06, + "loss": 2.1529, + "step": 11817 + }, + { + "epoch": 2.4, + "learning_rate": 6.171869085486033e-06, + "loss": 2.1477, + "step": 11818 + }, + { + "epoch": 2.4, + "learning_rate": 6.17027338486105e-06, + "loss": 2.0655, + "step": 11819 + }, + { + "epoch": 2.4, + "learning_rate": 6.168677798503244e-06, + "loss": 2.1612, + "step": 11820 + }, + { + "epoch": 2.4, + "learning_rate": 6.1670823264602255e-06, + "loss": 2.1405, + "step": 11821 + }, + { + "epoch": 2.4, + "learning_rate": 6.165486968779598e-06, + "loss": 2.1217, + "step": 11822 + }, + { + "epoch": 2.4, + "learning_rate": 6.163891725508959e-06, + "loss": 2.1029, + "step": 11823 + }, + { + "epoch": 2.4, + "learning_rate": 6.162296596695907e-06, + "loss": 2.0965, + "step": 11824 + }, + { + "epoch": 2.4, + "learning_rate": 6.160701582388039e-06, + "loss": 2.1287, + "step": 11825 + }, + { + "epoch": 2.4, + "learning_rate": 6.159106682632942e-06, + "loss": 2.0988, + "step": 11826 + }, + { + "epoch": 2.4, + "learning_rate": 6.157511897478204e-06, + "loss": 2.1185, + "step": 11827 + }, + { + "epoch": 2.4, + "learning_rate": 6.155917226971404e-06, + "loss": 2.0567, + "step": 11828 + }, + { + "epoch": 2.4, + "learning_rate": 6.154322671160129e-06, + "loss": 2.1099, + "step": 11829 + }, + { + "epoch": 2.4, + "learning_rate": 6.1527282300919536e-06, + "loss": 2.14, + "step": 11830 + }, + { + "epoch": 2.4, + "learning_rate": 6.15113390381445e-06, + "loss": 2.1062, + "step": 11831 + }, + { + "epoch": 2.4, + "learning_rate": 6.149539692375188e-06, + "loss": 2.1529, + "step": 11832 + }, + { + "epoch": 2.4, + "learning_rate": 6.147945595821729e-06, + "loss": 2.0433, + "step": 11833 + }, + { + "epoch": 2.4, + "learning_rate": 6.146351614201646e-06, + "loss": 2.0841, + "step": 11834 + }, + { + "epoch": 2.4, + "learning_rate": 6.144757747562492e-06, + "loss": 2.1112, + "step": 11835 + }, + { + "epoch": 2.4, + "learning_rate": 6.143163995951824e-06, + "loss": 2.1682, + "step": 11836 + }, + { + "epoch": 2.4, + "learning_rate": 6.141570359417195e-06, + "loss": 2.2222, + "step": 11837 + }, + { + "epoch": 2.4, + "learning_rate": 6.139976838006152e-06, + "loss": 2.0802, + "step": 11838 + }, + { + "epoch": 2.4, + "learning_rate": 6.1383834317662464e-06, + "loss": 2.0714, + "step": 11839 + }, + { + "epoch": 2.4, + "learning_rate": 6.1367901407450126e-06, + "loss": 2.0232, + "step": 11840 + }, + { + "epoch": 2.4, + "learning_rate": 6.1351969649899974e-06, + "loss": 2.1459, + "step": 11841 + }, + { + "epoch": 2.4, + "learning_rate": 6.133603904548733e-06, + "loss": 2.1105, + "step": 11842 + }, + { + "epoch": 2.4, + "learning_rate": 6.132010959468748e-06, + "loss": 2.0535, + "step": 11843 + }, + { + "epoch": 2.4, + "learning_rate": 6.130418129797575e-06, + "loss": 2.1322, + "step": 11844 + }, + { + "epoch": 2.4, + "learning_rate": 6.12882541558274e-06, + "loss": 2.1321, + "step": 11845 + }, + { + "epoch": 2.4, + "learning_rate": 6.127232816871762e-06, + "loss": 2.1001, + "step": 11846 + }, + { + "epoch": 2.4, + "learning_rate": 6.12564033371216e-06, + "loss": 2.135, + "step": 11847 + }, + { + "epoch": 2.4, + "learning_rate": 6.124047966151445e-06, + "loss": 2.0698, + "step": 11848 + }, + { + "epoch": 2.41, + "learning_rate": 6.122455714237136e-06, + "loss": 2.0842, + "step": 11849 + }, + { + "epoch": 2.41, + "learning_rate": 6.120863578016736e-06, + "loss": 2.1189, + "step": 11850 + }, + { + "epoch": 2.41, + "learning_rate": 6.119271557537752e-06, + "loss": 2.088, + "step": 11851 + }, + { + "epoch": 2.41, + "learning_rate": 6.117679652847681e-06, + "loss": 2.0677, + "step": 11852 + }, + { + "epoch": 2.41, + "learning_rate": 6.116087863994021e-06, + "loss": 2.1475, + "step": 11853 + }, + { + "epoch": 2.41, + "learning_rate": 6.114496191024271e-06, + "loss": 2.1663, + "step": 11854 + }, + { + "epoch": 2.41, + "learning_rate": 6.112904633985916e-06, + "loss": 2.0503, + "step": 11855 + }, + { + "epoch": 2.41, + "learning_rate": 6.111313192926449e-06, + "loss": 2.1025, + "step": 11856 + }, + { + "epoch": 2.41, + "learning_rate": 6.109721867893347e-06, + "loss": 2.1894, + "step": 11857 + }, + { + "epoch": 2.41, + "learning_rate": 6.108130658934093e-06, + "loss": 2.0574, + "step": 11858 + }, + { + "epoch": 2.41, + "learning_rate": 6.106539566096166e-06, + "loss": 2.0672, + "step": 11859 + }, + { + "epoch": 2.41, + "learning_rate": 6.104948589427037e-06, + "loss": 2.1329, + "step": 11860 + }, + { + "epoch": 2.41, + "learning_rate": 6.103357728974177e-06, + "loss": 2.1709, + "step": 11861 + }, + { + "epoch": 2.41, + "learning_rate": 6.101766984785052e-06, + "loss": 2.1036, + "step": 11862 + }, + { + "epoch": 2.41, + "learning_rate": 6.100176356907121e-06, + "loss": 2.1739, + "step": 11863 + }, + { + "epoch": 2.41, + "learning_rate": 6.098585845387848e-06, + "loss": 2.1742, + "step": 11864 + }, + { + "epoch": 2.41, + "learning_rate": 6.09699545027469e-06, + "loss": 2.1284, + "step": 11865 + }, + { + "epoch": 2.41, + "learning_rate": 6.095405171615096e-06, + "loss": 2.1071, + "step": 11866 + }, + { + "epoch": 2.41, + "learning_rate": 6.093815009456517e-06, + "loss": 2.1486, + "step": 11867 + }, + { + "epoch": 2.41, + "learning_rate": 6.092224963846396e-06, + "loss": 2.1151, + "step": 11868 + }, + { + "epoch": 2.41, + "learning_rate": 6.090635034832177e-06, + "loss": 2.0677, + "step": 11869 + }, + { + "epoch": 2.41, + "learning_rate": 6.089045222461301e-06, + "loss": 2.1776, + "step": 11870 + }, + { + "epoch": 2.41, + "learning_rate": 6.087455526781201e-06, + "loss": 2.0888, + "step": 11871 + }, + { + "epoch": 2.41, + "learning_rate": 6.085865947839302e-06, + "loss": 2.1106, + "step": 11872 + }, + { + "epoch": 2.41, + "learning_rate": 6.084276485683043e-06, + "loss": 2.2101, + "step": 11873 + }, + { + "epoch": 2.41, + "learning_rate": 6.082687140359844e-06, + "loss": 2.1726, + "step": 11874 + }, + { + "epoch": 2.41, + "learning_rate": 6.081097911917126e-06, + "loss": 2.1985, + "step": 11875 + }, + { + "epoch": 2.41, + "learning_rate": 6.079508800402305e-06, + "loss": 2.1215, + "step": 11876 + }, + { + "epoch": 2.41, + "learning_rate": 6.077919805862794e-06, + "loss": 2.1007, + "step": 11877 + }, + { + "epoch": 2.41, + "learning_rate": 6.0763309283460105e-06, + "loss": 2.1046, + "step": 11878 + }, + { + "epoch": 2.41, + "learning_rate": 6.074742167899358e-06, + "loss": 2.1296, + "step": 11879 + }, + { + "epoch": 2.41, + "learning_rate": 6.073153524570239e-06, + "loss": 2.1075, + "step": 11880 + }, + { + "epoch": 2.41, + "learning_rate": 6.071564998406052e-06, + "loss": 2.0727, + "step": 11881 + }, + { + "epoch": 2.41, + "learning_rate": 6.069976589454198e-06, + "loss": 2.0737, + "step": 11882 + }, + { + "epoch": 2.41, + "learning_rate": 6.068388297762069e-06, + "loss": 2.1596, + "step": 11883 + }, + { + "epoch": 2.41, + "learning_rate": 6.066800123377055e-06, + "loss": 2.1352, + "step": 11884 + }, + { + "epoch": 2.41, + "learning_rate": 6.065212066346538e-06, + "loss": 2.0568, + "step": 11885 + }, + { + "epoch": 2.41, + "learning_rate": 6.0636241267179065e-06, + "loss": 2.114, + "step": 11886 + }, + { + "epoch": 2.41, + "learning_rate": 6.062036304538535e-06, + "loss": 2.0283, + "step": 11887 + }, + { + "epoch": 2.41, + "learning_rate": 6.060448599855802e-06, + "loss": 2.0931, + "step": 11888 + }, + { + "epoch": 2.41, + "learning_rate": 6.058861012717081e-06, + "loss": 2.1369, + "step": 11889 + }, + { + "epoch": 2.41, + "learning_rate": 6.057273543169739e-06, + "loss": 2.0421, + "step": 11890 + }, + { + "epoch": 2.41, + "learning_rate": 6.055686191261141e-06, + "loss": 2.0453, + "step": 11891 + }, + { + "epoch": 2.41, + "learning_rate": 6.054098957038643e-06, + "loss": 2.0629, + "step": 11892 + }, + { + "epoch": 2.41, + "learning_rate": 6.052511840549614e-06, + "loss": 2.0289, + "step": 11893 + }, + { + "epoch": 2.41, + "learning_rate": 6.050924841841403e-06, + "loss": 2.1452, + "step": 11894 + }, + { + "epoch": 2.41, + "learning_rate": 6.049337960961362e-06, + "loss": 2.0519, + "step": 11895 + }, + { + "epoch": 2.41, + "learning_rate": 6.047751197956838e-06, + "loss": 2.0809, + "step": 11896 + }, + { + "epoch": 2.41, + "learning_rate": 6.046164552875171e-06, + "loss": 2.1094, + "step": 11897 + }, + { + "epoch": 2.42, + "learning_rate": 6.04457802576371e-06, + "loss": 2.1278, + "step": 11898 + }, + { + "epoch": 2.42, + "learning_rate": 6.042991616669789e-06, + "loss": 2.1865, + "step": 11899 + }, + { + "epoch": 2.42, + "learning_rate": 6.041405325640739e-06, + "loss": 2.1454, + "step": 11900 + }, + { + "epoch": 2.42, + "learning_rate": 6.039819152723889e-06, + "loss": 2.1452, + "step": 11901 + }, + { + "epoch": 2.42, + "learning_rate": 6.038233097966568e-06, + "loss": 2.2197, + "step": 11902 + }, + { + "epoch": 2.42, + "learning_rate": 6.036647161416102e-06, + "loss": 2.1755, + "step": 11903 + }, + { + "epoch": 2.42, + "learning_rate": 6.035061343119806e-06, + "loss": 2.1239, + "step": 11904 + }, + { + "epoch": 2.42, + "learning_rate": 6.0334756431249956e-06, + "loss": 2.0624, + "step": 11905 + }, + { + "epoch": 2.42, + "learning_rate": 6.031890061478986e-06, + "loss": 2.0156, + "step": 11906 + }, + { + "epoch": 2.42, + "learning_rate": 6.030304598229084e-06, + "loss": 2.1315, + "step": 11907 + }, + { + "epoch": 2.42, + "learning_rate": 6.028719253422598e-06, + "loss": 2.0561, + "step": 11908 + }, + { + "epoch": 2.42, + "learning_rate": 6.0271340271068246e-06, + "loss": 2.1384, + "step": 11909 + }, + { + "epoch": 2.42, + "learning_rate": 6.0255489193290675e-06, + "loss": 2.0694, + "step": 11910 + }, + { + "epoch": 2.42, + "learning_rate": 6.0239639301366184e-06, + "loss": 2.1136, + "step": 11911 + }, + { + "epoch": 2.42, + "learning_rate": 6.022379059576765e-06, + "loss": 2.0643, + "step": 11912 + }, + { + "epoch": 2.42, + "learning_rate": 6.020794307696802e-06, + "loss": 2.0231, + "step": 11913 + }, + { + "epoch": 2.42, + "learning_rate": 6.019209674544012e-06, + "loss": 2.1331, + "step": 11914 + }, + { + "epoch": 2.42, + "learning_rate": 6.017625160165673e-06, + "loss": 2.2347, + "step": 11915 + }, + { + "epoch": 2.42, + "learning_rate": 6.016040764609064e-06, + "loss": 2.1431, + "step": 11916 + }, + { + "epoch": 2.42, + "learning_rate": 6.0144564879214516e-06, + "loss": 1.9672, + "step": 11917 + }, + { + "epoch": 2.42, + "learning_rate": 6.012872330150116e-06, + "loss": 2.079, + "step": 11918 + }, + { + "epoch": 2.42, + "learning_rate": 6.0112882913423186e-06, + "loss": 2.1242, + "step": 11919 + }, + { + "epoch": 2.42, + "learning_rate": 6.009704371545324e-06, + "loss": 2.089, + "step": 11920 + }, + { + "epoch": 2.42, + "learning_rate": 6.008120570806386e-06, + "loss": 2.1855, + "step": 11921 + }, + { + "epoch": 2.42, + "learning_rate": 6.006536889172766e-06, + "loss": 2.1452, + "step": 11922 + }, + { + "epoch": 2.42, + "learning_rate": 6.004953326691716e-06, + "loss": 2.0797, + "step": 11923 + }, + { + "epoch": 2.42, + "learning_rate": 6.003369883410483e-06, + "loss": 2.0859, + "step": 11924 + }, + { + "epoch": 2.42, + "learning_rate": 6.00178655937631e-06, + "loss": 2.1179, + "step": 11925 + }, + { + "epoch": 2.42, + "learning_rate": 6.0002033546364435e-06, + "loss": 2.07, + "step": 11926 + }, + { + "epoch": 2.42, + "learning_rate": 5.9986202692381156e-06, + "loss": 2.0998, + "step": 11927 + }, + { + "epoch": 2.42, + "learning_rate": 5.997037303228566e-06, + "loss": 2.1634, + "step": 11928 + }, + { + "epoch": 2.42, + "learning_rate": 5.995454456655021e-06, + "loss": 2.1433, + "step": 11929 + }, + { + "epoch": 2.42, + "learning_rate": 5.993871729564713e-06, + "loss": 2.1309, + "step": 11930 + }, + { + "epoch": 2.42, + "learning_rate": 5.9922891220048615e-06, + "loss": 2.0861, + "step": 11931 + }, + { + "epoch": 2.42, + "learning_rate": 5.990706634022686e-06, + "loss": 2.1326, + "step": 11932 + }, + { + "epoch": 2.42, + "learning_rate": 5.989124265665405e-06, + "loss": 2.1229, + "step": 11933 + }, + { + "epoch": 2.42, + "learning_rate": 5.987542016980234e-06, + "loss": 2.0524, + "step": 11934 + }, + { + "epoch": 2.42, + "learning_rate": 5.9859598880143785e-06, + "loss": 2.0966, + "step": 11935 + }, + { + "epoch": 2.42, + "learning_rate": 5.984377878815045e-06, + "loss": 2.1159, + "step": 11936 + }, + { + "epoch": 2.42, + "learning_rate": 5.982795989429433e-06, + "loss": 2.1235, + "step": 11937 + }, + { + "epoch": 2.42, + "learning_rate": 5.981214219904749e-06, + "loss": 2.0047, + "step": 11938 + }, + { + "epoch": 2.42, + "learning_rate": 5.979632570288183e-06, + "loss": 2.1716, + "step": 11939 + }, + { + "epoch": 2.42, + "learning_rate": 5.978051040626926e-06, + "loss": 2.0793, + "step": 11940 + }, + { + "epoch": 2.42, + "learning_rate": 5.976469630968169e-06, + "loss": 2.1775, + "step": 11941 + }, + { + "epoch": 2.42, + "learning_rate": 5.974888341359089e-06, + "loss": 2.1445, + "step": 11942 + }, + { + "epoch": 2.42, + "learning_rate": 5.973307171846877e-06, + "loss": 2.1318, + "step": 11943 + }, + { + "epoch": 2.42, + "learning_rate": 5.9717261224787045e-06, + "loss": 2.1441, + "step": 11944 + }, + { + "epoch": 2.42, + "learning_rate": 5.970145193301744e-06, + "loss": 2.0926, + "step": 11945 + }, + { + "epoch": 2.42, + "learning_rate": 5.968564384363172e-06, + "loss": 2.1122, + "step": 11946 + }, + { + "epoch": 2.43, + "learning_rate": 5.966983695710144e-06, + "loss": 2.1329, + "step": 11947 + }, + { + "epoch": 2.43, + "learning_rate": 5.965403127389835e-06, + "loss": 2.0864, + "step": 11948 + }, + { + "epoch": 2.43, + "learning_rate": 5.963822679449394e-06, + "loss": 2.0896, + "step": 11949 + }, + { + "epoch": 2.43, + "learning_rate": 5.962242351935985e-06, + "loss": 2.1304, + "step": 11950 + }, + { + "epoch": 2.43, + "learning_rate": 5.960662144896756e-06, + "loss": 2.1393, + "step": 11951 + }, + { + "epoch": 2.43, + "learning_rate": 5.9590820583788535e-06, + "loss": 2.1231, + "step": 11952 + }, + { + "epoch": 2.43, + "learning_rate": 5.957502092429425e-06, + "loss": 2.1935, + "step": 11953 + }, + { + "epoch": 2.43, + "learning_rate": 5.955922247095614e-06, + "loss": 2.0869, + "step": 11954 + }, + { + "epoch": 2.43, + "learning_rate": 5.954342522424553e-06, + "loss": 2.0723, + "step": 11955 + }, + { + "epoch": 2.43, + "learning_rate": 5.95276291846338e-06, + "loss": 2.1314, + "step": 11956 + }, + { + "epoch": 2.43, + "learning_rate": 5.951183435259223e-06, + "loss": 2.0849, + "step": 11957 + }, + { + "epoch": 2.43, + "learning_rate": 5.949604072859211e-06, + "loss": 2.1659, + "step": 11958 + }, + { + "epoch": 2.43, + "learning_rate": 5.948024831310466e-06, + "loss": 2.1339, + "step": 11959 + }, + { + "epoch": 2.43, + "learning_rate": 5.94644571066011e-06, + "loss": 2.0746, + "step": 11960 + }, + { + "epoch": 2.43, + "learning_rate": 5.944866710955253e-06, + "loss": 2.1832, + "step": 11961 + }, + { + "epoch": 2.43, + "learning_rate": 5.943287832243014e-06, + "loss": 2.0487, + "step": 11962 + }, + { + "epoch": 2.43, + "learning_rate": 5.941709074570501e-06, + "loss": 2.1569, + "step": 11963 + }, + { + "epoch": 2.43, + "learning_rate": 5.940130437984817e-06, + "loss": 2.0676, + "step": 11964 + }, + { + "epoch": 2.43, + "learning_rate": 5.938551922533064e-06, + "loss": 2.1021, + "step": 11965 + }, + { + "epoch": 2.43, + "learning_rate": 5.936973528262338e-06, + "loss": 2.1083, + "step": 11966 + }, + { + "epoch": 2.43, + "learning_rate": 5.935395255219738e-06, + "loss": 2.0401, + "step": 11967 + }, + { + "epoch": 2.43, + "learning_rate": 5.933817103452353e-06, + "loss": 2.1232, + "step": 11968 + }, + { + "epoch": 2.43, + "learning_rate": 5.932239073007269e-06, + "loss": 2.1175, + "step": 11969 + }, + { + "epoch": 2.43, + "learning_rate": 5.930661163931571e-06, + "loss": 2.0939, + "step": 11970 + }, + { + "epoch": 2.43, + "learning_rate": 5.929083376272335e-06, + "loss": 2.1122, + "step": 11971 + }, + { + "epoch": 2.43, + "learning_rate": 5.927505710076644e-06, + "loss": 2.0971, + "step": 11972 + }, + { + "epoch": 2.43, + "learning_rate": 5.925928165391564e-06, + "loss": 2.0826, + "step": 11973 + }, + { + "epoch": 2.43, + "learning_rate": 5.92435074226417e-06, + "loss": 2.0935, + "step": 11974 + }, + { + "epoch": 2.43, + "learning_rate": 5.922773440741525e-06, + "loss": 2.096, + "step": 11975 + }, + { + "epoch": 2.43, + "learning_rate": 5.921196260870688e-06, + "loss": 2.1409, + "step": 11976 + }, + { + "epoch": 2.43, + "learning_rate": 5.919619202698718e-06, + "loss": 2.0896, + "step": 11977 + }, + { + "epoch": 2.43, + "learning_rate": 5.918042266272674e-06, + "loss": 2.0913, + "step": 11978 + }, + { + "epoch": 2.43, + "learning_rate": 5.916465451639603e-06, + "loss": 2.0735, + "step": 11979 + }, + { + "epoch": 2.43, + "learning_rate": 5.914888758846555e-06, + "loss": 2.0476, + "step": 11980 + }, + { + "epoch": 2.43, + "learning_rate": 5.9133121879405675e-06, + "loss": 2.0362, + "step": 11981 + }, + { + "epoch": 2.43, + "learning_rate": 5.911735738968689e-06, + "loss": 2.1186, + "step": 11982 + }, + { + "epoch": 2.43, + "learning_rate": 5.91015941197795e-06, + "loss": 2.2101, + "step": 11983 + }, + { + "epoch": 2.43, + "learning_rate": 5.908583207015386e-06, + "loss": 2.1525, + "step": 11984 + }, + { + "epoch": 2.43, + "learning_rate": 5.907007124128026e-06, + "loss": 2.1308, + "step": 11985 + }, + { + "epoch": 2.43, + "learning_rate": 5.905431163362889e-06, + "loss": 2.1045, + "step": 11986 + }, + { + "epoch": 2.43, + "learning_rate": 5.9038553247670075e-06, + "loss": 2.1612, + "step": 11987 + }, + { + "epoch": 2.43, + "learning_rate": 5.902279608387392e-06, + "loss": 2.0451, + "step": 11988 + }, + { + "epoch": 2.43, + "learning_rate": 5.900704014271061e-06, + "loss": 2.1345, + "step": 11989 + }, + { + "epoch": 2.43, + "learning_rate": 5.8991285424650215e-06, + "loss": 2.1154, + "step": 11990 + }, + { + "epoch": 2.43, + "learning_rate": 5.897553193016283e-06, + "loss": 2.1345, + "step": 11991 + }, + { + "epoch": 2.43, + "learning_rate": 5.89597796597185e-06, + "loss": 2.1332, + "step": 11992 + }, + { + "epoch": 2.43, + "learning_rate": 5.894402861378721e-06, + "loss": 2.1932, + "step": 11993 + }, + { + "epoch": 2.43, + "learning_rate": 5.892827879283893e-06, + "loss": 2.1245, + "step": 11994 + }, + { + "epoch": 2.43, + "learning_rate": 5.891253019734358e-06, + "loss": 2.0956, + "step": 11995 + }, + { + "epoch": 2.43, + "learning_rate": 5.889678282777104e-06, + "loss": 2.101, + "step": 11996 + }, + { + "epoch": 2.44, + "learning_rate": 5.888103668459118e-06, + "loss": 2.1095, + "step": 11997 + }, + { + "epoch": 2.44, + "learning_rate": 5.88652917682738e-06, + "loss": 2.0991, + "step": 11998 + }, + { + "epoch": 2.44, + "learning_rate": 5.884954807928871e-06, + "loss": 2.135, + "step": 11999 + }, + { + "epoch": 2.44, + "learning_rate": 5.8833805618105635e-06, + "loss": 2.0847, + "step": 12000 + }, + { + "epoch": 2.44, + "learning_rate": 5.881806438519424e-06, + "loss": 2.1394, + "step": 12001 + }, + { + "epoch": 2.44, + "learning_rate": 5.880232438102427e-06, + "loss": 2.0219, + "step": 12002 + }, + { + "epoch": 2.44, + "learning_rate": 5.878658560606531e-06, + "loss": 2.0438, + "step": 12003 + }, + { + "epoch": 2.44, + "learning_rate": 5.877084806078698e-06, + "loss": 2.0815, + "step": 12004 + }, + { + "epoch": 2.44, + "learning_rate": 5.875511174565882e-06, + "loss": 2.0664, + "step": 12005 + }, + { + "epoch": 2.44, + "learning_rate": 5.873937666115033e-06, + "loss": 2.2021, + "step": 12006 + }, + { + "epoch": 2.44, + "learning_rate": 5.872364280773104e-06, + "loss": 2.1528, + "step": 12007 + }, + { + "epoch": 2.44, + "learning_rate": 5.870791018587041e-06, + "loss": 2.1344, + "step": 12008 + }, + { + "epoch": 2.44, + "learning_rate": 5.869217879603781e-06, + "loss": 2.1062, + "step": 12009 + }, + { + "epoch": 2.44, + "learning_rate": 5.867644863870263e-06, + "loss": 2.1514, + "step": 12010 + }, + { + "epoch": 2.44, + "learning_rate": 5.866071971433421e-06, + "loss": 2.1154, + "step": 12011 + }, + { + "epoch": 2.44, + "learning_rate": 5.864499202340188e-06, + "loss": 2.152, + "step": 12012 + }, + { + "epoch": 2.44, + "learning_rate": 5.862926556637487e-06, + "loss": 2.001, + "step": 12013 + }, + { + "epoch": 2.44, + "learning_rate": 5.86135403437224e-06, + "loss": 2.117, + "step": 12014 + }, + { + "epoch": 2.44, + "learning_rate": 5.85978163559137e-06, + "loss": 2.1007, + "step": 12015 + }, + { + "epoch": 2.44, + "learning_rate": 5.85820936034179e-06, + "loss": 2.1604, + "step": 12016 + }, + { + "epoch": 2.44, + "learning_rate": 5.856637208670412e-06, + "loss": 2.1602, + "step": 12017 + }, + { + "epoch": 2.44, + "learning_rate": 5.855065180624147e-06, + "loss": 2.1807, + "step": 12018 + }, + { + "epoch": 2.44, + "learning_rate": 5.853493276249896e-06, + "loss": 2.137, + "step": 12019 + }, + { + "epoch": 2.44, + "learning_rate": 5.851921495594562e-06, + "loss": 2.1468, + "step": 12020 + }, + { + "epoch": 2.44, + "learning_rate": 5.850349838705034e-06, + "loss": 2.1517, + "step": 12021 + }, + { + "epoch": 2.44, + "learning_rate": 5.84877830562822e-06, + "loss": 2.1157, + "step": 12022 + }, + { + "epoch": 2.44, + "learning_rate": 5.847206896411e-06, + "loss": 2.1079, + "step": 12023 + }, + { + "epoch": 2.44, + "learning_rate": 5.8456356111002625e-06, + "loss": 2.1252, + "step": 12024 + }, + { + "epoch": 2.44, + "learning_rate": 5.8440644497428875e-06, + "loss": 2.1492, + "step": 12025 + }, + { + "epoch": 2.44, + "learning_rate": 5.842493412385755e-06, + "loss": 2.1362, + "step": 12026 + }, + { + "epoch": 2.44, + "learning_rate": 5.840922499075741e-06, + "loss": 2.0912, + "step": 12027 + }, + { + "epoch": 2.44, + "learning_rate": 5.839351709859718e-06, + "loss": 2.1688, + "step": 12028 + }, + { + "epoch": 2.44, + "learning_rate": 5.8377810447845495e-06, + "loss": 2.1153, + "step": 12029 + }, + { + "epoch": 2.44, + "learning_rate": 5.836210503897099e-06, + "loss": 2.1418, + "step": 12030 + }, + { + "epoch": 2.44, + "learning_rate": 5.8346400872442295e-06, + "loss": 2.1072, + "step": 12031 + }, + { + "epoch": 2.44, + "learning_rate": 5.8330697948727984e-06, + "loss": 2.1945, + "step": 12032 + }, + { + "epoch": 2.44, + "learning_rate": 5.831499626829659e-06, + "loss": 2.0725, + "step": 12033 + }, + { + "epoch": 2.44, + "learning_rate": 5.829929583161653e-06, + "loss": 2.0766, + "step": 12034 + }, + { + "epoch": 2.44, + "learning_rate": 5.828359663915636e-06, + "loss": 2.1106, + "step": 12035 + }, + { + "epoch": 2.44, + "learning_rate": 5.826789869138435e-06, + "loss": 2.1516, + "step": 12036 + }, + { + "epoch": 2.44, + "learning_rate": 5.825220198876906e-06, + "loss": 2.1653, + "step": 12037 + }, + { + "epoch": 2.44, + "learning_rate": 5.823650653177868e-06, + "loss": 2.1533, + "step": 12038 + }, + { + "epoch": 2.44, + "learning_rate": 5.822081232088163e-06, + "loss": 2.0371, + "step": 12039 + }, + { + "epoch": 2.44, + "learning_rate": 5.820511935654607e-06, + "loss": 2.0781, + "step": 12040 + }, + { + "epoch": 2.44, + "learning_rate": 5.818942763924027e-06, + "loss": 2.0775, + "step": 12041 + }, + { + "epoch": 2.44, + "learning_rate": 5.817373716943244e-06, + "loss": 2.2064, + "step": 12042 + }, + { + "epoch": 2.44, + "learning_rate": 5.815804794759077e-06, + "loss": 2.1123, + "step": 12043 + }, + { + "epoch": 2.44, + "learning_rate": 5.8142359974183294e-06, + "loss": 2.2033, + "step": 12044 + }, + { + "epoch": 2.44, + "learning_rate": 5.812667324967813e-06, + "loss": 2.179, + "step": 12045 + }, + { + "epoch": 2.45, + "learning_rate": 5.811098777454333e-06, + "loss": 2.1562, + "step": 12046 + }, + { + "epoch": 2.45, + "learning_rate": 5.809530354924693e-06, + "loss": 2.0607, + "step": 12047 + }, + { + "epoch": 2.45, + "learning_rate": 5.8079620574256815e-06, + "loss": 2.0836, + "step": 12048 + }, + { + "epoch": 2.45, + "learning_rate": 5.806393885004096e-06, + "loss": 2.0794, + "step": 12049 + }, + { + "epoch": 2.45, + "learning_rate": 5.8048258377067265e-06, + "loss": 2.1319, + "step": 12050 + }, + { + "epoch": 2.45, + "learning_rate": 5.803257915580364e-06, + "loss": 2.1309, + "step": 12051 + }, + { + "epoch": 2.45, + "learning_rate": 5.80169011867178e-06, + "loss": 2.1661, + "step": 12052 + }, + { + "epoch": 2.45, + "learning_rate": 5.800122447027758e-06, + "loss": 2.1135, + "step": 12053 + }, + { + "epoch": 2.45, + "learning_rate": 5.7985549006950745e-06, + "loss": 2.0876, + "step": 12054 + }, + { + "epoch": 2.45, + "learning_rate": 5.79698747972049e-06, + "loss": 2.0876, + "step": 12055 + }, + { + "epoch": 2.45, + "learning_rate": 5.795420184150789e-06, + "loss": 2.1983, + "step": 12056 + }, + { + "epoch": 2.45, + "learning_rate": 5.79385301403272e-06, + "loss": 2.114, + "step": 12057 + }, + { + "epoch": 2.45, + "learning_rate": 5.792285969413052e-06, + "loss": 2.0989, + "step": 12058 + }, + { + "epoch": 2.45, + "learning_rate": 5.790719050338532e-06, + "loss": 2.1698, + "step": 12059 + }, + { + "epoch": 2.45, + "learning_rate": 5.789152256855917e-06, + "loss": 2.1172, + "step": 12060 + }, + { + "epoch": 2.45, + "learning_rate": 5.787585589011953e-06, + "loss": 2.2221, + "step": 12061 + }, + { + "epoch": 2.45, + "learning_rate": 5.786019046853391e-06, + "loss": 2.0197, + "step": 12062 + }, + { + "epoch": 2.45, + "learning_rate": 5.784452630426962e-06, + "loss": 2.1602, + "step": 12063 + }, + { + "epoch": 2.45, + "learning_rate": 5.782886339779409e-06, + "loss": 2.0816, + "step": 12064 + }, + { + "epoch": 2.45, + "learning_rate": 5.781320174957463e-06, + "loss": 2.1108, + "step": 12065 + }, + { + "epoch": 2.45, + "learning_rate": 5.77975413600786e-06, + "loss": 2.113, + "step": 12066 + }, + { + "epoch": 2.45, + "learning_rate": 5.778188222977315e-06, + "loss": 2.1093, + "step": 12067 + }, + { + "epoch": 2.45, + "learning_rate": 5.776622435912555e-06, + "loss": 2.1846, + "step": 12068 + }, + { + "epoch": 2.45, + "learning_rate": 5.7750567748603035e-06, + "loss": 2.218, + "step": 12069 + }, + { + "epoch": 2.45, + "learning_rate": 5.773491239867267e-06, + "loss": 2.1499, + "step": 12070 + }, + { + "epoch": 2.45, + "learning_rate": 5.771925830980158e-06, + "loss": 2.1082, + "step": 12071 + }, + { + "epoch": 2.45, + "learning_rate": 5.770360548245686e-06, + "loss": 2.0971, + "step": 12072 + }, + { + "epoch": 2.45, + "learning_rate": 5.768795391710555e-06, + "loss": 2.1971, + "step": 12073 + }, + { + "epoch": 2.45, + "learning_rate": 5.76723036142146e-06, + "loss": 2.1267, + "step": 12074 + }, + { + "epoch": 2.45, + "learning_rate": 5.765665457425098e-06, + "loss": 2.0934, + "step": 12075 + }, + { + "epoch": 2.45, + "learning_rate": 5.764100679768163e-06, + "loss": 2.0831, + "step": 12076 + }, + { + "epoch": 2.45, + "learning_rate": 5.762536028497346e-06, + "loss": 2.094, + "step": 12077 + }, + { + "epoch": 2.45, + "learning_rate": 5.76097150365932e-06, + "loss": 2.1686, + "step": 12078 + }, + { + "epoch": 2.45, + "learning_rate": 5.759407105300781e-06, + "loss": 2.1081, + "step": 12079 + }, + { + "epoch": 2.45, + "learning_rate": 5.757842833468388e-06, + "loss": 2.1312, + "step": 12080 + }, + { + "epoch": 2.45, + "learning_rate": 5.756278688208832e-06, + "loss": 2.1001, + "step": 12081 + }, + { + "epoch": 2.45, + "learning_rate": 5.754714669568771e-06, + "loss": 2.1933, + "step": 12082 + }, + { + "epoch": 2.45, + "learning_rate": 5.753150777594876e-06, + "loss": 2.0904, + "step": 12083 + }, + { + "epoch": 2.45, + "learning_rate": 5.751587012333802e-06, + "loss": 2.144, + "step": 12084 + }, + { + "epoch": 2.45, + "learning_rate": 5.750023373832212e-06, + "loss": 2.0463, + "step": 12085 + }, + { + "epoch": 2.45, + "learning_rate": 5.748459862136759e-06, + "loss": 2.0983, + "step": 12086 + }, + { + "epoch": 2.45, + "learning_rate": 5.746896477294097e-06, + "loss": 2.1274, + "step": 12087 + }, + { + "epoch": 2.45, + "learning_rate": 5.745333219350866e-06, + "loss": 2.183, + "step": 12088 + }, + { + "epoch": 2.45, + "learning_rate": 5.7437700883537115e-06, + "loss": 2.0156, + "step": 12089 + }, + { + "epoch": 2.45, + "learning_rate": 5.742207084349274e-06, + "loss": 2.0323, + "step": 12090 + }, + { + "epoch": 2.45, + "learning_rate": 5.740644207384189e-06, + "loss": 2.1425, + "step": 12091 + }, + { + "epoch": 2.45, + "learning_rate": 5.739081457505085e-06, + "loss": 2.1106, + "step": 12092 + }, + { + "epoch": 2.45, + "learning_rate": 5.73751883475859e-06, + "loss": 2.0488, + "step": 12093 + }, + { + "epoch": 2.45, + "learning_rate": 5.735956339191333e-06, + "loss": 2.1192, + "step": 12094 + }, + { + "epoch": 2.46, + "learning_rate": 5.734393970849925e-06, + "loss": 2.1215, + "step": 12095 + }, + { + "epoch": 2.46, + "learning_rate": 5.732831729780988e-06, + "loss": 2.1094, + "step": 12096 + }, + { + "epoch": 2.46, + "learning_rate": 5.731269616031134e-06, + "loss": 2.1084, + "step": 12097 + }, + { + "epoch": 2.46, + "learning_rate": 5.729707629646975e-06, + "loss": 2.0162, + "step": 12098 + }, + { + "epoch": 2.46, + "learning_rate": 5.728145770675109e-06, + "loss": 2.1276, + "step": 12099 + }, + { + "epoch": 2.46, + "learning_rate": 5.726584039162139e-06, + "loss": 2.0889, + "step": 12100 + }, + { + "epoch": 2.46, + "learning_rate": 5.7250224351546636e-06, + "loss": 2.0971, + "step": 12101 + }, + { + "epoch": 2.46, + "learning_rate": 5.723460958699279e-06, + "loss": 2.0854, + "step": 12102 + }, + { + "epoch": 2.46, + "learning_rate": 5.721899609842569e-06, + "loss": 2.1438, + "step": 12103 + }, + { + "epoch": 2.46, + "learning_rate": 5.720338388631126e-06, + "loss": 2.1028, + "step": 12104 + }, + { + "epoch": 2.46, + "learning_rate": 5.718777295111519e-06, + "loss": 2.1114, + "step": 12105 + }, + { + "epoch": 2.46, + "learning_rate": 5.717216329330345e-06, + "loss": 2.1021, + "step": 12106 + }, + { + "epoch": 2.46, + "learning_rate": 5.7156554913341644e-06, + "loss": 2.1358, + "step": 12107 + }, + { + "epoch": 2.46, + "learning_rate": 5.714094781169553e-06, + "loss": 2.0829, + "step": 12108 + }, + { + "epoch": 2.46, + "learning_rate": 5.712534198883078e-06, + "loss": 2.1177, + "step": 12109 + }, + { + "epoch": 2.46, + "learning_rate": 5.7109737445213e-06, + "loss": 2.1878, + "step": 12110 + }, + { + "epoch": 2.46, + "learning_rate": 5.709413418130778e-06, + "loss": 2.1303, + "step": 12111 + }, + { + "epoch": 2.46, + "learning_rate": 5.707853219758068e-06, + "loss": 2.0568, + "step": 12112 + }, + { + "epoch": 2.46, + "learning_rate": 5.7062931494497266e-06, + "loss": 2.1414, + "step": 12113 + }, + { + "epoch": 2.46, + "learning_rate": 5.7047332072522935e-06, + "loss": 2.1278, + "step": 12114 + }, + { + "epoch": 2.46, + "learning_rate": 5.703173393212313e-06, + "loss": 2.068, + "step": 12115 + }, + { + "epoch": 2.46, + "learning_rate": 5.701613707376331e-06, + "loss": 2.0232, + "step": 12116 + }, + { + "epoch": 2.46, + "learning_rate": 5.7000541497908834e-06, + "loss": 2.1196, + "step": 12117 + }, + { + "epoch": 2.46, + "learning_rate": 5.6984947205024964e-06, + "loss": 2.0544, + "step": 12118 + }, + { + "epoch": 2.46, + "learning_rate": 5.6969354195577054e-06, + "loss": 2.1874, + "step": 12119 + }, + { + "epoch": 2.46, + "learning_rate": 5.695376247003025e-06, + "loss": 2.041, + "step": 12120 + }, + { + "epoch": 2.46, + "learning_rate": 5.69381720288499e-06, + "loss": 2.1229, + "step": 12121 + }, + { + "epoch": 2.46, + "learning_rate": 5.692258287250106e-06, + "loss": 2.1453, + "step": 12122 + }, + { + "epoch": 2.46, + "learning_rate": 5.690699500144894e-06, + "loss": 2.1239, + "step": 12123 + }, + { + "epoch": 2.46, + "learning_rate": 5.689140841615857e-06, + "loss": 2.1101, + "step": 12124 + }, + { + "epoch": 2.46, + "learning_rate": 5.687582311709503e-06, + "loss": 2.027, + "step": 12125 + }, + { + "epoch": 2.46, + "learning_rate": 5.6860239104723335e-06, + "loss": 2.141, + "step": 12126 + }, + { + "epoch": 2.46, + "learning_rate": 5.684465637950851e-06, + "loss": 2.1612, + "step": 12127 + }, + { + "epoch": 2.46, + "learning_rate": 5.682907494191543e-06, + "loss": 2.1353, + "step": 12128 + }, + { + "epoch": 2.46, + "learning_rate": 5.681349479240901e-06, + "loss": 2.1403, + "step": 12129 + }, + { + "epoch": 2.46, + "learning_rate": 5.6797915931454115e-06, + "loss": 2.1412, + "step": 12130 + }, + { + "epoch": 2.46, + "learning_rate": 5.678233835951563e-06, + "loss": 2.1711, + "step": 12131 + }, + { + "epoch": 2.46, + "learning_rate": 5.676676207705825e-06, + "loss": 2.111, + "step": 12132 + }, + { + "epoch": 2.46, + "learning_rate": 5.675118708454677e-06, + "loss": 2.0604, + "step": 12133 + }, + { + "epoch": 2.46, + "learning_rate": 5.673561338244593e-06, + "loss": 2.1379, + "step": 12134 + }, + { + "epoch": 2.46, + "learning_rate": 5.672004097122033e-06, + "loss": 2.0874, + "step": 12135 + }, + { + "epoch": 2.46, + "learning_rate": 5.670446985133462e-06, + "loss": 2.18, + "step": 12136 + }, + { + "epoch": 2.46, + "learning_rate": 5.668890002325342e-06, + "loss": 2.0622, + "step": 12137 + }, + { + "epoch": 2.46, + "learning_rate": 5.6673331487441315e-06, + "loss": 2.175, + "step": 12138 + }, + { + "epoch": 2.46, + "learning_rate": 5.665776424436275e-06, + "loss": 2.0958, + "step": 12139 + }, + { + "epoch": 2.46, + "learning_rate": 5.664219829448223e-06, + "loss": 2.1187, + "step": 12140 + }, + { + "epoch": 2.46, + "learning_rate": 5.6626633638264214e-06, + "loss": 2.1011, + "step": 12141 + }, + { + "epoch": 2.46, + "learning_rate": 5.661107027617312e-06, + "loss": 2.0659, + "step": 12142 + }, + { + "epoch": 2.46, + "learning_rate": 5.659550820867325e-06, + "loss": 2.2138, + "step": 12143 + }, + { + "epoch": 2.47, + "learning_rate": 5.657994743622895e-06, + "loss": 2.1459, + "step": 12144 + }, + { + "epoch": 2.47, + "learning_rate": 5.656438795930452e-06, + "loss": 2.2144, + "step": 12145 + }, + { + "epoch": 2.47, + "learning_rate": 5.654882977836424e-06, + "loss": 2.0584, + "step": 12146 + }, + { + "epoch": 2.47, + "learning_rate": 5.653327289387225e-06, + "loss": 2.1792, + "step": 12147 + }, + { + "epoch": 2.47, + "learning_rate": 5.6517717306292764e-06, + "loss": 2.1666, + "step": 12148 + }, + { + "epoch": 2.47, + "learning_rate": 5.650216301608984e-06, + "loss": 2.1069, + "step": 12149 + }, + { + "epoch": 2.47, + "learning_rate": 5.648661002372769e-06, + "loss": 2.2305, + "step": 12150 + }, + { + "epoch": 2.47, + "learning_rate": 5.647105832967027e-06, + "loss": 2.1293, + "step": 12151 + }, + { + "epoch": 2.47, + "learning_rate": 5.645550793438167e-06, + "loss": 2.0772, + "step": 12152 + }, + { + "epoch": 2.47, + "learning_rate": 5.643995883832578e-06, + "loss": 2.1331, + "step": 12153 + }, + { + "epoch": 2.47, + "learning_rate": 5.642441104196659e-06, + "loss": 2.1817, + "step": 12154 + }, + { + "epoch": 2.47, + "learning_rate": 5.640886454576797e-06, + "loss": 2.0585, + "step": 12155 + }, + { + "epoch": 2.47, + "learning_rate": 5.639331935019385e-06, + "loss": 2.112, + "step": 12156 + }, + { + "epoch": 2.47, + "learning_rate": 5.6377775455707975e-06, + "loss": 2.0745, + "step": 12157 + }, + { + "epoch": 2.47, + "learning_rate": 5.636223286277412e-06, + "loss": 2.022, + "step": 12158 + }, + { + "epoch": 2.47, + "learning_rate": 5.634669157185608e-06, + "loss": 2.1351, + "step": 12159 + }, + { + "epoch": 2.47, + "learning_rate": 5.633115158341753e-06, + "loss": 2.1124, + "step": 12160 + }, + { + "epoch": 2.47, + "learning_rate": 5.631561289792218e-06, + "loss": 2.1217, + "step": 12161 + }, + { + "epoch": 2.47, + "learning_rate": 5.630007551583358e-06, + "loss": 2.1594, + "step": 12162 + }, + { + "epoch": 2.47, + "learning_rate": 5.628453943761538e-06, + "loss": 2.147, + "step": 12163 + }, + { + "epoch": 2.47, + "learning_rate": 5.6269004663731045e-06, + "loss": 2.0873, + "step": 12164 + }, + { + "epoch": 2.47, + "learning_rate": 5.625347119464422e-06, + "loss": 2.1145, + "step": 12165 + }, + { + "epoch": 2.47, + "learning_rate": 5.623793903081826e-06, + "loss": 2.0827, + "step": 12166 + }, + { + "epoch": 2.47, + "learning_rate": 5.622240817271667e-06, + "loss": 2.0522, + "step": 12167 + }, + { + "epoch": 2.47, + "learning_rate": 5.620687862080276e-06, + "loss": 2.1216, + "step": 12168 + }, + { + "epoch": 2.47, + "learning_rate": 5.619135037553995e-06, + "loss": 2.1338, + "step": 12169 + }, + { + "epoch": 2.47, + "learning_rate": 5.617582343739153e-06, + "loss": 2.1279, + "step": 12170 + }, + { + "epoch": 2.47, + "learning_rate": 5.616029780682082e-06, + "loss": 2.0848, + "step": 12171 + }, + { + "epoch": 2.47, + "learning_rate": 5.614477348429099e-06, + "loss": 2.0915, + "step": 12172 + }, + { + "epoch": 2.47, + "learning_rate": 5.612925047026525e-06, + "loss": 2.142, + "step": 12173 + }, + { + "epoch": 2.47, + "learning_rate": 5.61137287652068e-06, + "loss": 2.1273, + "step": 12174 + }, + { + "epoch": 2.47, + "learning_rate": 5.6098208369578755e-06, + "loss": 2.1267, + "step": 12175 + }, + { + "epoch": 2.47, + "learning_rate": 5.608268928384415e-06, + "loss": 2.1028, + "step": 12176 + }, + { + "epoch": 2.47, + "learning_rate": 5.606717150846603e-06, + "loss": 2.1097, + "step": 12177 + }, + { + "epoch": 2.47, + "learning_rate": 5.6051655043907485e-06, + "loss": 2.1168, + "step": 12178 + }, + { + "epoch": 2.47, + "learning_rate": 5.603613989063136e-06, + "loss": 2.0714, + "step": 12179 + }, + { + "epoch": 2.47, + "learning_rate": 5.602062604910064e-06, + "loss": 2.0462, + "step": 12180 + }, + { + "epoch": 2.47, + "learning_rate": 5.600511351977818e-06, + "loss": 2.0169, + "step": 12181 + }, + { + "epoch": 2.47, + "learning_rate": 5.598960230312691e-06, + "loss": 2.0387, + "step": 12182 + }, + { + "epoch": 2.47, + "learning_rate": 5.597409239960954e-06, + "loss": 2.1518, + "step": 12183 + }, + { + "epoch": 2.47, + "learning_rate": 5.595858380968886e-06, + "loss": 2.0927, + "step": 12184 + }, + { + "epoch": 2.47, + "learning_rate": 5.59430765338276e-06, + "loss": 2.0479, + "step": 12185 + }, + { + "epoch": 2.47, + "learning_rate": 5.592757057248852e-06, + "loss": 2.1658, + "step": 12186 + }, + { + "epoch": 2.47, + "learning_rate": 5.591206592613416e-06, + "loss": 2.0827, + "step": 12187 + }, + { + "epoch": 2.47, + "learning_rate": 5.5896562595227225e-06, + "loss": 2.191, + "step": 12188 + }, + { + "epoch": 2.47, + "learning_rate": 5.588106058023015e-06, + "loss": 2.0599, + "step": 12189 + }, + { + "epoch": 2.47, + "learning_rate": 5.5865559881605644e-06, + "loss": 2.1654, + "step": 12190 + }, + { + "epoch": 2.47, + "learning_rate": 5.585006049981607e-06, + "loss": 2.1227, + "step": 12191 + }, + { + "epoch": 2.47, + "learning_rate": 5.583456243532397e-06, + "loss": 2.0574, + "step": 12192 + }, + { + "epoch": 2.47, + "learning_rate": 5.5819065688591675e-06, + "loss": 2.183, + "step": 12193 + }, + { + "epoch": 2.48, + "learning_rate": 5.580357026008161e-06, + "loss": 2.1548, + "step": 12194 + }, + { + "epoch": 2.48, + "learning_rate": 5.5788076150256075e-06, + "loss": 2.0112, + "step": 12195 + }, + { + "epoch": 2.48, + "learning_rate": 5.5772583359577445e-06, + "loss": 2.0815, + "step": 12196 + }, + { + "epoch": 2.48, + "learning_rate": 5.575709188850788e-06, + "loss": 2.1115, + "step": 12197 + }, + { + "epoch": 2.48, + "learning_rate": 5.574160173750964e-06, + "loss": 2.0945, + "step": 12198 + }, + { + "epoch": 2.48, + "learning_rate": 5.572611290704491e-06, + "loss": 2.1546, + "step": 12199 + }, + { + "epoch": 2.48, + "learning_rate": 5.571062539757586e-06, + "loss": 2.0748, + "step": 12200 + }, + { + "epoch": 2.48, + "learning_rate": 5.56951392095645e-06, + "loss": 2.0849, + "step": 12201 + }, + { + "epoch": 2.48, + "learning_rate": 5.567965434347296e-06, + "loss": 2.1818, + "step": 12202 + }, + { + "epoch": 2.48, + "learning_rate": 5.566417079976325e-06, + "loss": 2.1178, + "step": 12203 + }, + { + "epoch": 2.48, + "learning_rate": 5.5648688578897334e-06, + "loss": 2.111, + "step": 12204 + }, + { + "epoch": 2.48, + "learning_rate": 5.563320768133716e-06, + "loss": 2.062, + "step": 12205 + }, + { + "epoch": 2.48, + "learning_rate": 5.561772810754461e-06, + "loss": 2.0578, + "step": 12206 + }, + { + "epoch": 2.48, + "learning_rate": 5.560224985798162e-06, + "loss": 2.1509, + "step": 12207 + }, + { + "epoch": 2.48, + "learning_rate": 5.558677293310992e-06, + "loss": 2.1114, + "step": 12208 + }, + { + "epoch": 2.48, + "learning_rate": 5.557129733339135e-06, + "loss": 2.1031, + "step": 12209 + }, + { + "epoch": 2.48, + "learning_rate": 5.555582305928762e-06, + "loss": 2.1725, + "step": 12210 + }, + { + "epoch": 2.48, + "learning_rate": 5.55403501112605e-06, + "loss": 2.1254, + "step": 12211 + }, + { + "epoch": 2.48, + "learning_rate": 5.552487848977157e-06, + "loss": 2.0456, + "step": 12212 + }, + { + "epoch": 2.48, + "learning_rate": 5.550940819528249e-06, + "loss": 2.1324, + "step": 12213 + }, + { + "epoch": 2.48, + "learning_rate": 5.549393922825486e-06, + "loss": 2.1881, + "step": 12214 + }, + { + "epoch": 2.48, + "learning_rate": 5.547847158915024e-06, + "loss": 2.0604, + "step": 12215 + }, + { + "epoch": 2.48, + "learning_rate": 5.5463005278430075e-06, + "loss": 2.071, + "step": 12216 + }, + { + "epoch": 2.48, + "learning_rate": 5.544754029655588e-06, + "loss": 2.1262, + "step": 12217 + }, + { + "epoch": 2.48, + "learning_rate": 5.54320766439891e-06, + "loss": 2.1465, + "step": 12218 + }, + { + "epoch": 2.48, + "learning_rate": 5.541661432119105e-06, + "loss": 2.0732, + "step": 12219 + }, + { + "epoch": 2.48, + "learning_rate": 5.540115332862312e-06, + "loss": 2.1051, + "step": 12220 + }, + { + "epoch": 2.48, + "learning_rate": 5.538569366674664e-06, + "loss": 2.0616, + "step": 12221 + }, + { + "epoch": 2.48, + "learning_rate": 5.537023533602288e-06, + "loss": 2.0827, + "step": 12222 + }, + { + "epoch": 2.48, + "learning_rate": 5.535477833691301e-06, + "loss": 2.1294, + "step": 12223 + }, + { + "epoch": 2.48, + "learning_rate": 5.533932266987825e-06, + "loss": 2.1645, + "step": 12224 + }, + { + "epoch": 2.48, + "learning_rate": 5.5323868335379775e-06, + "loss": 2.1058, + "step": 12225 + }, + { + "epoch": 2.48, + "learning_rate": 5.5308415333878685e-06, + "loss": 2.0605, + "step": 12226 + }, + { + "epoch": 2.48, + "learning_rate": 5.529296366583601e-06, + "loss": 2.1023, + "step": 12227 + }, + { + "epoch": 2.48, + "learning_rate": 5.5277513331712804e-06, + "loss": 2.0755, + "step": 12228 + }, + { + "epoch": 2.48, + "learning_rate": 5.526206433197008e-06, + "loss": 2.0878, + "step": 12229 + }, + { + "epoch": 2.48, + "learning_rate": 5.52466166670688e-06, + "loss": 2.1192, + "step": 12230 + }, + { + "epoch": 2.48, + "learning_rate": 5.52311703374698e-06, + "loss": 2.1918, + "step": 12231 + }, + { + "epoch": 2.48, + "learning_rate": 5.521572534363404e-06, + "loss": 2.0983, + "step": 12232 + }, + { + "epoch": 2.48, + "learning_rate": 5.520028168602222e-06, + "loss": 2.1506, + "step": 12233 + }, + { + "epoch": 2.48, + "learning_rate": 5.51848393650953e-06, + "loss": 2.0465, + "step": 12234 + }, + { + "epoch": 2.48, + "learning_rate": 5.51693983813139e-06, + "loss": 2.0732, + "step": 12235 + }, + { + "epoch": 2.48, + "learning_rate": 5.5153958735138825e-06, + "loss": 2.1981, + "step": 12236 + }, + { + "epoch": 2.48, + "learning_rate": 5.513852042703067e-06, + "loss": 2.0604, + "step": 12237 + }, + { + "epoch": 2.48, + "learning_rate": 5.512308345745008e-06, + "loss": 2.0806, + "step": 12238 + }, + { + "epoch": 2.48, + "learning_rate": 5.5107647826857665e-06, + "loss": 2.0866, + "step": 12239 + }, + { + "epoch": 2.48, + "learning_rate": 5.509221353571404e-06, + "loss": 2.1036, + "step": 12240 + }, + { + "epoch": 2.48, + "learning_rate": 5.507678058447958e-06, + "loss": 2.1458, + "step": 12241 + }, + { + "epoch": 2.48, + "learning_rate": 5.5061348973614835e-06, + "loss": 2.0792, + "step": 12242 + }, + { + "epoch": 2.49, + "learning_rate": 5.504591870358022e-06, + "loss": 2.0441, + "step": 12243 + }, + { + "epoch": 2.49, + "learning_rate": 5.503048977483621e-06, + "loss": 2.1714, + "step": 12244 + }, + { + "epoch": 2.49, + "learning_rate": 5.5015062187843e-06, + "loss": 2.1354, + "step": 12245 + }, + { + "epoch": 2.49, + "learning_rate": 5.4999635943061005e-06, + "loss": 2.1667, + "step": 12246 + }, + { + "epoch": 2.49, + "learning_rate": 5.498421104095051e-06, + "loss": 2.2577, + "step": 12247 + }, + { + "epoch": 2.49, + "learning_rate": 5.496878748197167e-06, + "loss": 2.1237, + "step": 12248 + }, + { + "epoch": 2.49, + "learning_rate": 5.49533652665847e-06, + "loss": 2.1583, + "step": 12249 + }, + { + "epoch": 2.49, + "learning_rate": 5.49379443952498e-06, + "loss": 2.1437, + "step": 12250 + }, + { + "epoch": 2.49, + "learning_rate": 5.492252486842706e-06, + "loss": 2.1143, + "step": 12251 + }, + { + "epoch": 2.49, + "learning_rate": 5.490710668657652e-06, + "loss": 2.1338, + "step": 12252 + }, + { + "epoch": 2.49, + "learning_rate": 5.489168985015823e-06, + "loss": 2.1195, + "step": 12253 + }, + { + "epoch": 2.49, + "learning_rate": 5.4876274359632165e-06, + "loss": 2.0517, + "step": 12254 + }, + { + "epoch": 2.49, + "learning_rate": 5.486086021545833e-06, + "loss": 2.226, + "step": 12255 + }, + { + "epoch": 2.49, + "learning_rate": 5.484544741809656e-06, + "loss": 2.17, + "step": 12256 + }, + { + "epoch": 2.49, + "learning_rate": 5.483003596800682e-06, + "loss": 2.1099, + "step": 12257 + }, + { + "epoch": 2.49, + "learning_rate": 5.481462586564878e-06, + "loss": 2.163, + "step": 12258 + }, + { + "epoch": 2.49, + "learning_rate": 5.479921711148244e-06, + "loss": 2.1472, + "step": 12259 + }, + { + "epoch": 2.49, + "learning_rate": 5.478380970596737e-06, + "loss": 2.1759, + "step": 12260 + }, + { + "epoch": 2.49, + "learning_rate": 5.476840364956338e-06, + "loss": 2.0875, + "step": 12261 + }, + { + "epoch": 2.49, + "learning_rate": 5.475299894273012e-06, + "loss": 2.0156, + "step": 12262 + }, + { + "epoch": 2.49, + "learning_rate": 5.473759558592718e-06, + "loss": 2.1017, + "step": 12263 + }, + { + "epoch": 2.49, + "learning_rate": 5.472219357961419e-06, + "loss": 2.1379, + "step": 12264 + }, + { + "epoch": 2.49, + "learning_rate": 5.470679292425067e-06, + "loss": 2.1452, + "step": 12265 + }, + { + "epoch": 2.49, + "learning_rate": 5.469139362029618e-06, + "loss": 2.0456, + "step": 12266 + }, + { + "epoch": 2.49, + "learning_rate": 5.4675995668210115e-06, + "loss": 2.1552, + "step": 12267 + }, + { + "epoch": 2.49, + "learning_rate": 5.466059906845193e-06, + "loss": 2.1337, + "step": 12268 + }, + { + "epoch": 2.49, + "learning_rate": 5.464520382148104e-06, + "loss": 2.0547, + "step": 12269 + }, + { + "epoch": 2.49, + "learning_rate": 5.4629809927756794e-06, + "loss": 2.0874, + "step": 12270 + }, + { + "epoch": 2.49, + "learning_rate": 5.461441738773844e-06, + "loss": 2.139, + "step": 12271 + }, + { + "epoch": 2.49, + "learning_rate": 5.4599026201885315e-06, + "loss": 2.1186, + "step": 12272 + }, + { + "epoch": 2.49, + "learning_rate": 5.458363637065654e-06, + "loss": 2.134, + "step": 12273 + }, + { + "epoch": 2.49, + "learning_rate": 5.4568247894511425e-06, + "loss": 2.1397, + "step": 12274 + }, + { + "epoch": 2.49, + "learning_rate": 5.4552860773909035e-06, + "loss": 2.1913, + "step": 12275 + }, + { + "epoch": 2.49, + "learning_rate": 5.453747500930853e-06, + "loss": 2.1175, + "step": 12276 + }, + { + "epoch": 2.49, + "learning_rate": 5.452209060116891e-06, + "loss": 2.2129, + "step": 12277 + }, + { + "epoch": 2.49, + "learning_rate": 5.450670754994921e-06, + "loss": 2.1049, + "step": 12278 + }, + { + "epoch": 2.49, + "learning_rate": 5.449132585610844e-06, + "loss": 2.1501, + "step": 12279 + }, + { + "epoch": 2.49, + "learning_rate": 5.4475945520105564e-06, + "loss": 2.0507, + "step": 12280 + }, + { + "epoch": 2.49, + "learning_rate": 5.4460566542399415e-06, + "loss": 2.0952, + "step": 12281 + }, + { + "epoch": 2.49, + "learning_rate": 5.44451889234489e-06, + "loss": 2.2029, + "step": 12282 + }, + { + "epoch": 2.49, + "learning_rate": 5.442981266371283e-06, + "loss": 2.1761, + "step": 12283 + }, + { + "epoch": 2.49, + "learning_rate": 5.441443776365003e-06, + "loss": 2.1194, + "step": 12284 + }, + { + "epoch": 2.49, + "learning_rate": 5.439906422371914e-06, + "loss": 2.1272, + "step": 12285 + }, + { + "epoch": 2.49, + "learning_rate": 5.438369204437892e-06, + "loss": 2.0651, + "step": 12286 + }, + { + "epoch": 2.49, + "learning_rate": 5.436832122608806e-06, + "loss": 2.1273, + "step": 12287 + }, + { + "epoch": 2.49, + "learning_rate": 5.43529517693051e-06, + "loss": 2.047, + "step": 12288 + }, + { + "epoch": 2.49, + "learning_rate": 5.433758367448867e-06, + "loss": 2.152, + "step": 12289 + }, + { + "epoch": 2.49, + "learning_rate": 5.4322216942097275e-06, + "loss": 2.1522, + "step": 12290 + }, + { + "epoch": 2.49, + "learning_rate": 5.430685157258948e-06, + "loss": 2.1396, + "step": 12291 + }, + { + "epoch": 2.5, + "learning_rate": 5.429148756642364e-06, + "loss": 2.1815, + "step": 12292 + }, + { + "epoch": 2.5, + "learning_rate": 5.427612492405822e-06, + "loss": 2.1566, + "step": 12293 + }, + { + "epoch": 2.5, + "learning_rate": 5.426076364595158e-06, + "loss": 2.0703, + "step": 12294 + }, + { + "epoch": 2.5, + "learning_rate": 5.424540373256213e-06, + "loss": 2.2041, + "step": 12295 + }, + { + "epoch": 2.5, + "learning_rate": 5.423004518434805e-06, + "loss": 2.1088, + "step": 12296 + }, + { + "epoch": 2.5, + "learning_rate": 5.4214688001767665e-06, + "loss": 2.1599, + "step": 12297 + }, + { + "epoch": 2.5, + "learning_rate": 5.419933218527909e-06, + "loss": 2.0658, + "step": 12298 + }, + { + "epoch": 2.5, + "learning_rate": 5.418397773534064e-06, + "loss": 2.2002, + "step": 12299 + }, + { + "epoch": 2.5, + "learning_rate": 5.416862465241033e-06, + "loss": 2.1454, + "step": 12300 + }, + { + "epoch": 2.5, + "learning_rate": 5.4153272936946325e-06, + "loss": 2.096, + "step": 12301 + }, + { + "epoch": 2.5, + "learning_rate": 5.41379225894066e-06, + "loss": 2.0485, + "step": 12302 + }, + { + "epoch": 2.5, + "learning_rate": 5.412257361024919e-06, + "loss": 1.9992, + "step": 12303 + }, + { + "epoch": 2.5, + "learning_rate": 5.410722599993208e-06, + "loss": 2.1046, + "step": 12304 + }, + { + "epoch": 2.5, + "learning_rate": 5.40918797589132e-06, + "loss": 2.1669, + "step": 12305 + }, + { + "epoch": 2.5, + "learning_rate": 5.407653488765039e-06, + "loss": 2.1792, + "step": 12306 + }, + { + "epoch": 2.5, + "learning_rate": 5.406119138660152e-06, + "loss": 2.085, + "step": 12307 + }, + { + "epoch": 2.5, + "learning_rate": 5.404584925622438e-06, + "loss": 2.027, + "step": 12308 + }, + { + "epoch": 2.5, + "learning_rate": 5.40305084969768e-06, + "loss": 2.1476, + "step": 12309 + }, + { + "epoch": 2.5, + "learning_rate": 5.401516910931641e-06, + "loss": 2.0989, + "step": 12310 + }, + { + "epoch": 2.5, + "learning_rate": 5.39998310937009e-06, + "loss": 2.1741, + "step": 12311 + }, + { + "epoch": 2.5, + "learning_rate": 5.3984494450588e-06, + "loss": 2.1844, + "step": 12312 + }, + { + "epoch": 2.5, + "learning_rate": 5.396915918043515e-06, + "loss": 2.1786, + "step": 12313 + }, + { + "epoch": 2.5, + "learning_rate": 5.395382528370008e-06, + "loss": 2.0936, + "step": 12314 + }, + { + "epoch": 2.5, + "learning_rate": 5.3938492760840176e-06, + "loss": 2.0496, + "step": 12315 + }, + { + "epoch": 2.5, + "learning_rate": 5.3923161612313015e-06, + "loss": 2.0501, + "step": 12316 + }, + { + "epoch": 2.5, + "learning_rate": 5.390783183857593e-06, + "loss": 2.1716, + "step": 12317 + }, + { + "epoch": 2.5, + "learning_rate": 5.389250344008636e-06, + "loss": 2.1293, + "step": 12318 + }, + { + "epoch": 2.5, + "learning_rate": 5.387717641730166e-06, + "loss": 2.1061, + "step": 12319 + }, + { + "epoch": 2.5, + "learning_rate": 5.386185077067918e-06, + "loss": 2.1308, + "step": 12320 + }, + { + "epoch": 2.5, + "learning_rate": 5.384652650067611e-06, + "loss": 2.1187, + "step": 12321 + }, + { + "epoch": 2.5, + "learning_rate": 5.383120360774972e-06, + "loss": 2.1281, + "step": 12322 + }, + { + "epoch": 2.5, + "learning_rate": 5.381588209235719e-06, + "loss": 2.0683, + "step": 12323 + }, + { + "epoch": 2.5, + "learning_rate": 5.380056195495571e-06, + "loss": 2.1415, + "step": 12324 + }, + { + "epoch": 2.5, + "learning_rate": 5.378524319600231e-06, + "loss": 2.1315, + "step": 12325 + }, + { + "epoch": 2.5, + "learning_rate": 5.37699258159541e-06, + "loss": 2.0842, + "step": 12326 + }, + { + "epoch": 2.5, + "learning_rate": 5.375460981526809e-06, + "loss": 2.1871, + "step": 12327 + }, + { + "epoch": 2.5, + "learning_rate": 5.3739295194401305e-06, + "loss": 2.0661, + "step": 12328 + }, + { + "epoch": 2.5, + "learning_rate": 5.372398195381061e-06, + "loss": 2.1451, + "step": 12329 + }, + { + "epoch": 2.5, + "learning_rate": 5.370867009395294e-06, + "loss": 2.1371, + "step": 12330 + }, + { + "epoch": 2.5, + "learning_rate": 5.3693359615285205e-06, + "loss": 2.0576, + "step": 12331 + }, + { + "epoch": 2.5, + "learning_rate": 5.367805051826411e-06, + "loss": 2.2261, + "step": 12332 + }, + { + "epoch": 2.5, + "learning_rate": 5.366274280334651e-06, + "loss": 2.1021, + "step": 12333 + }, + { + "epoch": 2.5, + "learning_rate": 5.364743647098913e-06, + "loss": 2.0944, + "step": 12334 + }, + { + "epoch": 2.5, + "learning_rate": 5.36321315216487e-06, + "loss": 2.0442, + "step": 12335 + }, + { + "epoch": 2.5, + "learning_rate": 5.361682795578178e-06, + "loss": 2.1178, + "step": 12336 + }, + { + "epoch": 2.5, + "learning_rate": 5.360152577384503e-06, + "loss": 2.1091, + "step": 12337 + }, + { + "epoch": 2.5, + "learning_rate": 5.358622497629502e-06, + "loss": 2.1305, + "step": 12338 + }, + { + "epoch": 2.5, + "learning_rate": 5.3570925563588316e-06, + "loss": 2.0896, + "step": 12339 + }, + { + "epoch": 2.5, + "learning_rate": 5.355562753618133e-06, + "loss": 2.0964, + "step": 12340 + }, + { + "epoch": 2.51, + "learning_rate": 5.354033089453059e-06, + "loss": 2.0614, + "step": 12341 + }, + { + "epoch": 2.51, + "learning_rate": 5.3525035639092385e-06, + "loss": 2.1293, + "step": 12342 + }, + { + "epoch": 2.51, + "learning_rate": 5.350974177032322e-06, + "loss": 2.2093, + "step": 12343 + }, + { + "epoch": 2.51, + "learning_rate": 5.34944492886793e-06, + "loss": 2.1149, + "step": 12344 + }, + { + "epoch": 2.51, + "learning_rate": 5.3479158194617e-06, + "loss": 2.0144, + "step": 12345 + }, + { + "epoch": 2.51, + "learning_rate": 5.346386848859246e-06, + "loss": 2.0862, + "step": 12346 + }, + { + "epoch": 2.51, + "learning_rate": 5.344858017106193e-06, + "loss": 2.1511, + "step": 12347 + }, + { + "epoch": 2.51, + "learning_rate": 5.343329324248159e-06, + "loss": 2.0816, + "step": 12348 + }, + { + "epoch": 2.51, + "learning_rate": 5.341800770330754e-06, + "loss": 2.0451, + "step": 12349 + }, + { + "epoch": 2.51, + "learning_rate": 5.34027235539958e-06, + "loss": 2.0899, + "step": 12350 + }, + { + "epoch": 2.51, + "learning_rate": 5.3387440795002455e-06, + "loss": 2.2241, + "step": 12351 + }, + { + "epoch": 2.51, + "learning_rate": 5.337215942678348e-06, + "loss": 2.1144, + "step": 12352 + }, + { + "epoch": 2.51, + "learning_rate": 5.335687944979486e-06, + "loss": 2.136, + "step": 12353 + }, + { + "epoch": 2.51, + "learning_rate": 5.334160086449243e-06, + "loss": 2.1512, + "step": 12354 + }, + { + "epoch": 2.51, + "learning_rate": 5.33263236713321e-06, + "loss": 2.1091, + "step": 12355 + }, + { + "epoch": 2.51, + "learning_rate": 5.331104787076973e-06, + "loss": 2.1341, + "step": 12356 + }, + { + "epoch": 2.51, + "learning_rate": 5.329577346326102e-06, + "loss": 2.1021, + "step": 12357 + }, + { + "epoch": 2.51, + "learning_rate": 5.328050044926177e-06, + "loss": 2.1081, + "step": 12358 + }, + { + "epoch": 2.51, + "learning_rate": 5.326522882922762e-06, + "loss": 2.0428, + "step": 12359 + }, + { + "epoch": 2.51, + "learning_rate": 5.3249958603614345e-06, + "loss": 2.0783, + "step": 12360 + }, + { + "epoch": 2.51, + "learning_rate": 5.3234689772877425e-06, + "loss": 2.1203, + "step": 12361 + }, + { + "epoch": 2.51, + "learning_rate": 5.321942233747251e-06, + "loss": 2.067, + "step": 12362 + }, + { + "epoch": 2.51, + "learning_rate": 5.320415629785511e-06, + "loss": 2.1705, + "step": 12363 + }, + { + "epoch": 2.51, + "learning_rate": 5.318889165448077e-06, + "loss": 2.1621, + "step": 12364 + }, + { + "epoch": 2.51, + "learning_rate": 5.317362840780484e-06, + "loss": 2.0998, + "step": 12365 + }, + { + "epoch": 2.51, + "learning_rate": 5.315836655828282e-06, + "loss": 2.0802, + "step": 12366 + }, + { + "epoch": 2.51, + "learning_rate": 5.314310610637e-06, + "loss": 2.0687, + "step": 12367 + }, + { + "epoch": 2.51, + "learning_rate": 5.31278470525218e-06, + "loss": 2.1051, + "step": 12368 + }, + { + "epoch": 2.51, + "learning_rate": 5.311258939719339e-06, + "loss": 2.1014, + "step": 12369 + }, + { + "epoch": 2.51, + "learning_rate": 5.309733314084009e-06, + "loss": 2.1455, + "step": 12370 + }, + { + "epoch": 2.51, + "learning_rate": 5.308207828391711e-06, + "loss": 2.1353, + "step": 12371 + }, + { + "epoch": 2.51, + "learning_rate": 5.306682482687953e-06, + "loss": 2.1104, + "step": 12372 + }, + { + "epoch": 2.51, + "learning_rate": 5.305157277018251e-06, + "loss": 2.1634, + "step": 12373 + }, + { + "epoch": 2.51, + "learning_rate": 5.303632211428114e-06, + "loss": 2.2173, + "step": 12374 + }, + { + "epoch": 2.51, + "learning_rate": 5.302107285963045e-06, + "loss": 2.087, + "step": 12375 + }, + { + "epoch": 2.51, + "learning_rate": 5.300582500668539e-06, + "loss": 2.1326, + "step": 12376 + }, + { + "epoch": 2.51, + "learning_rate": 5.299057855590094e-06, + "loss": 2.1859, + "step": 12377 + }, + { + "epoch": 2.51, + "learning_rate": 5.297533350773201e-06, + "loss": 2.1305, + "step": 12378 + }, + { + "epoch": 2.51, + "learning_rate": 5.296008986263348e-06, + "loss": 2.1474, + "step": 12379 + }, + { + "epoch": 2.51, + "learning_rate": 5.2944847621060115e-06, + "loss": 2.1702, + "step": 12380 + }, + { + "epoch": 2.51, + "learning_rate": 5.292960678346678e-06, + "loss": 2.1626, + "step": 12381 + }, + { + "epoch": 2.51, + "learning_rate": 5.291436735030808e-06, + "loss": 2.1241, + "step": 12382 + }, + { + "epoch": 2.51, + "learning_rate": 5.289912932203889e-06, + "loss": 2.1273, + "step": 12383 + }, + { + "epoch": 2.51, + "learning_rate": 5.288389269911373e-06, + "loss": 2.181, + "step": 12384 + }, + { + "epoch": 2.51, + "learning_rate": 5.286865748198728e-06, + "loss": 2.1418, + "step": 12385 + }, + { + "epoch": 2.51, + "learning_rate": 5.285342367111408e-06, + "loss": 1.9946, + "step": 12386 + }, + { + "epoch": 2.51, + "learning_rate": 5.283819126694865e-06, + "loss": 2.114, + "step": 12387 + }, + { + "epoch": 2.51, + "learning_rate": 5.282296026994551e-06, + "loss": 2.0392, + "step": 12388 + }, + { + "epoch": 2.51, + "learning_rate": 5.280773068055913e-06, + "loss": 2.0887, + "step": 12389 + }, + { + "epoch": 2.51, + "learning_rate": 5.279250249924384e-06, + "loss": 2.1122, + "step": 12390 + }, + { + "epoch": 2.52, + "learning_rate": 5.277727572645403e-06, + "loss": 2.1421, + "step": 12391 + }, + { + "epoch": 2.52, + "learning_rate": 5.276205036264403e-06, + "loss": 2.0878, + "step": 12392 + }, + { + "epoch": 2.52, + "learning_rate": 5.274682640826816e-06, + "loss": 2.1528, + "step": 12393 + }, + { + "epoch": 2.52, + "learning_rate": 5.273160386378057e-06, + "loss": 2.1248, + "step": 12394 + }, + { + "epoch": 2.52, + "learning_rate": 5.271638272963549e-06, + "loss": 2.1329, + "step": 12395 + }, + { + "epoch": 2.52, + "learning_rate": 5.270116300628713e-06, + "loss": 2.1643, + "step": 12396 + }, + { + "epoch": 2.52, + "learning_rate": 5.268594469418949e-06, + "loss": 2.1008, + "step": 12397 + }, + { + "epoch": 2.52, + "learning_rate": 5.26707277937967e-06, + "loss": 2.1459, + "step": 12398 + }, + { + "epoch": 2.52, + "learning_rate": 5.265551230556278e-06, + "loss": 2.1001, + "step": 12399 + }, + { + "epoch": 2.52, + "learning_rate": 5.264029822994177e-06, + "loss": 2.0532, + "step": 12400 + }, + { + "epoch": 2.52, + "learning_rate": 5.262508556738749e-06, + "loss": 2.0658, + "step": 12401 + }, + { + "epoch": 2.52, + "learning_rate": 5.260987431835391e-06, + "loss": 2.1822, + "step": 12402 + }, + { + "epoch": 2.52, + "learning_rate": 5.259466448329488e-06, + "loss": 2.0886, + "step": 12403 + }, + { + "epoch": 2.52, + "learning_rate": 5.257945606266425e-06, + "loss": 2.1131, + "step": 12404 + }, + { + "epoch": 2.52, + "learning_rate": 5.2564249056915704e-06, + "loss": 2.0807, + "step": 12405 + }, + { + "epoch": 2.52, + "learning_rate": 5.254904346650308e-06, + "loss": 2.1288, + "step": 12406 + }, + { + "epoch": 2.52, + "learning_rate": 5.253383929187992e-06, + "loss": 2.2022, + "step": 12407 + }, + { + "epoch": 2.52, + "learning_rate": 5.251863653350005e-06, + "loss": 2.0961, + "step": 12408 + }, + { + "epoch": 2.52, + "learning_rate": 5.250343519181691e-06, + "loss": 2.0964, + "step": 12409 + }, + { + "epoch": 2.52, + "learning_rate": 5.248823526728419e-06, + "loss": 2.1353, + "step": 12410 + }, + { + "epoch": 2.52, + "learning_rate": 5.247303676035529e-06, + "loss": 2.0056, + "step": 12411 + }, + { + "epoch": 2.52, + "learning_rate": 5.2457839671483805e-06, + "loss": 2.1032, + "step": 12412 + }, + { + "epoch": 2.52, + "learning_rate": 5.244264400112307e-06, + "loss": 2.1182, + "step": 12413 + }, + { + "epoch": 2.52, + "learning_rate": 5.242744974972652e-06, + "loss": 2.0507, + "step": 12414 + }, + { + "epoch": 2.52, + "learning_rate": 5.241225691774754e-06, + "loss": 2.1352, + "step": 12415 + }, + { + "epoch": 2.52, + "learning_rate": 5.239706550563937e-06, + "loss": 2.155, + "step": 12416 + }, + { + "epoch": 2.52, + "learning_rate": 5.23818755138553e-06, + "loss": 2.0324, + "step": 12417 + }, + { + "epoch": 2.52, + "learning_rate": 5.236668694284856e-06, + "loss": 2.1437, + "step": 12418 + }, + { + "epoch": 2.52, + "learning_rate": 5.235149979307234e-06, + "loss": 2.0526, + "step": 12419 + }, + { + "epoch": 2.52, + "learning_rate": 5.2336314064979766e-06, + "loss": 2.0785, + "step": 12420 + }, + { + "epoch": 2.52, + "learning_rate": 5.23211297590239e-06, + "loss": 2.2063, + "step": 12421 + }, + { + "epoch": 2.52, + "learning_rate": 5.230594687565784e-06, + "loss": 2.1185, + "step": 12422 + }, + { + "epoch": 2.52, + "learning_rate": 5.229076541533461e-06, + "loss": 2.1531, + "step": 12423 + }, + { + "epoch": 2.52, + "learning_rate": 5.227558537850712e-06, + "loss": 2.1234, + "step": 12424 + }, + { + "epoch": 2.52, + "learning_rate": 5.226040676562836e-06, + "loss": 2.0943, + "step": 12425 + }, + { + "epoch": 2.52, + "learning_rate": 5.224522957715109e-06, + "loss": 2.0707, + "step": 12426 + }, + { + "epoch": 2.52, + "learning_rate": 5.223005381352832e-06, + "loss": 2.1513, + "step": 12427 + }, + { + "epoch": 2.52, + "learning_rate": 5.221487947521273e-06, + "loss": 2.1421, + "step": 12428 + }, + { + "epoch": 2.52, + "learning_rate": 5.219970656265716e-06, + "loss": 2.1685, + "step": 12429 + }, + { + "epoch": 2.52, + "learning_rate": 5.2184535076314225e-06, + "loss": 2.1025, + "step": 12430 + }, + { + "epoch": 2.52, + "learning_rate": 5.216936501663663e-06, + "loss": 2.0789, + "step": 12431 + }, + { + "epoch": 2.52, + "learning_rate": 5.215419638407704e-06, + "loss": 2.2088, + "step": 12432 + }, + { + "epoch": 2.52, + "learning_rate": 5.213902917908804e-06, + "loss": 2.1607, + "step": 12433 + }, + { + "epoch": 2.52, + "learning_rate": 5.212386340212211e-06, + "loss": 2.1202, + "step": 12434 + }, + { + "epoch": 2.52, + "learning_rate": 5.210869905363178e-06, + "loss": 2.1411, + "step": 12435 + }, + { + "epoch": 2.52, + "learning_rate": 5.209353613406953e-06, + "loss": 2.119, + "step": 12436 + }, + { + "epoch": 2.52, + "learning_rate": 5.20783746438878e-06, + "loss": 2.1251, + "step": 12437 + }, + { + "epoch": 2.52, + "learning_rate": 5.206321458353888e-06, + "loss": 2.0732, + "step": 12438 + }, + { + "epoch": 2.52, + "learning_rate": 5.204805595347514e-06, + "loss": 2.1173, + "step": 12439 + }, + { + "epoch": 2.53, + "learning_rate": 5.203289875414891e-06, + "loss": 2.1472, + "step": 12440 + }, + { + "epoch": 2.53, + "learning_rate": 5.201774298601235e-06, + "loss": 2.041, + "step": 12441 + }, + { + "epoch": 2.53, + "learning_rate": 5.20025886495177e-06, + "loss": 2.1398, + "step": 12442 + }, + { + "epoch": 2.53, + "learning_rate": 5.1987435745117135e-06, + "loss": 2.2152, + "step": 12443 + }, + { + "epoch": 2.53, + "learning_rate": 5.197228427326278e-06, + "loss": 2.2306, + "step": 12444 + }, + { + "epoch": 2.53, + "learning_rate": 5.195713423440665e-06, + "loss": 2.0704, + "step": 12445 + }, + { + "epoch": 2.53, + "learning_rate": 5.19419856290008e-06, + "loss": 2.1256, + "step": 12446 + }, + { + "epoch": 2.53, + "learning_rate": 5.192683845749724e-06, + "loss": 2.1216, + "step": 12447 + }, + { + "epoch": 2.53, + "learning_rate": 5.191169272034795e-06, + "loss": 2.139, + "step": 12448 + }, + { + "epoch": 2.53, + "learning_rate": 5.189654841800474e-06, + "loss": 2.0857, + "step": 12449 + }, + { + "epoch": 2.53, + "learning_rate": 5.188140555091954e-06, + "loss": 2.0859, + "step": 12450 + }, + { + "epoch": 2.53, + "learning_rate": 5.186626411954406e-06, + "loss": 2.0335, + "step": 12451 + }, + { + "epoch": 2.53, + "learning_rate": 5.185112412433025e-06, + "loss": 2.1854, + "step": 12452 + }, + { + "epoch": 2.53, + "learning_rate": 5.183598556572969e-06, + "loss": 2.0874, + "step": 12453 + }, + { + "epoch": 2.53, + "learning_rate": 5.182084844419416e-06, + "loss": 2.0387, + "step": 12454 + }, + { + "epoch": 2.53, + "learning_rate": 5.180571276017523e-06, + "loss": 2.1791, + "step": 12455 + }, + { + "epoch": 2.53, + "learning_rate": 5.179057851412455e-06, + "loss": 2.0966, + "step": 12456 + }, + { + "epoch": 2.53, + "learning_rate": 5.177544570649364e-06, + "loss": 2.0764, + "step": 12457 + }, + { + "epoch": 2.53, + "learning_rate": 5.176031433773411e-06, + "loss": 2.1099, + "step": 12458 + }, + { + "epoch": 2.53, + "learning_rate": 5.174518440829731e-06, + "loss": 2.0823, + "step": 12459 + }, + { + "epoch": 2.53, + "learning_rate": 5.173005591863474e-06, + "loss": 2.066, + "step": 12460 + }, + { + "epoch": 2.53, + "learning_rate": 5.1714928869197755e-06, + "loss": 2.1861, + "step": 12461 + }, + { + "epoch": 2.53, + "learning_rate": 5.169980326043778e-06, + "loss": 2.0824, + "step": 12462 + }, + { + "epoch": 2.53, + "learning_rate": 5.168467909280599e-06, + "loss": 2.1217, + "step": 12463 + }, + { + "epoch": 2.53, + "learning_rate": 5.166955636675373e-06, + "loss": 2.1786, + "step": 12464 + }, + { + "epoch": 2.53, + "learning_rate": 5.165443508273223e-06, + "loss": 2.1057, + "step": 12465 + }, + { + "epoch": 2.53, + "learning_rate": 5.163931524119252e-06, + "loss": 2.1536, + "step": 12466 + }, + { + "epoch": 2.53, + "learning_rate": 5.162419684258594e-06, + "loss": 2.119, + "step": 12467 + }, + { + "epoch": 2.53, + "learning_rate": 5.1609079887363424e-06, + "loss": 2.0549, + "step": 12468 + }, + { + "epoch": 2.53, + "learning_rate": 5.15939643759761e-06, + "loss": 2.0892, + "step": 12469 + }, + { + "epoch": 2.53, + "learning_rate": 5.157885030887488e-06, + "loss": 2.0624, + "step": 12470 + }, + { + "epoch": 2.53, + "learning_rate": 5.1563737686510795e-06, + "loss": 2.2365, + "step": 12471 + }, + { + "epoch": 2.53, + "learning_rate": 5.154862650933472e-06, + "loss": 2.0985, + "step": 12472 + }, + { + "epoch": 2.53, + "learning_rate": 5.153351677779759e-06, + "loss": 2.0661, + "step": 12473 + }, + { + "epoch": 2.53, + "learning_rate": 5.1518408492350125e-06, + "loss": 2.1178, + "step": 12474 + }, + { + "epoch": 2.53, + "learning_rate": 5.150330165344317e-06, + "loss": 2.1092, + "step": 12475 + }, + { + "epoch": 2.53, + "learning_rate": 5.1488196261527476e-06, + "loss": 2.1264, + "step": 12476 + }, + { + "epoch": 2.53, + "learning_rate": 5.147309231705376e-06, + "loss": 2.1093, + "step": 12477 + }, + { + "epoch": 2.53, + "learning_rate": 5.145798982047261e-06, + "loss": 2.0991, + "step": 12478 + }, + { + "epoch": 2.53, + "learning_rate": 5.144288877223468e-06, + "loss": 2.0801, + "step": 12479 + }, + { + "epoch": 2.53, + "learning_rate": 5.1427789172790565e-06, + "loss": 2.1017, + "step": 12480 + }, + { + "epoch": 2.53, + "learning_rate": 5.141269102259071e-06, + "loss": 2.0971, + "step": 12481 + }, + { + "epoch": 2.53, + "learning_rate": 5.1397594322085655e-06, + "loss": 2.0678, + "step": 12482 + }, + { + "epoch": 2.53, + "learning_rate": 5.138249907172581e-06, + "loss": 2.1749, + "step": 12483 + }, + { + "epoch": 2.53, + "learning_rate": 5.136740527196165e-06, + "loss": 2.1687, + "step": 12484 + }, + { + "epoch": 2.53, + "learning_rate": 5.135231292324341e-06, + "loss": 2.2049, + "step": 12485 + }, + { + "epoch": 2.53, + "learning_rate": 5.133722202602147e-06, + "loss": 2.1533, + "step": 12486 + }, + { + "epoch": 2.53, + "learning_rate": 5.132213258074608e-06, + "loss": 2.1373, + "step": 12487 + }, + { + "epoch": 2.53, + "learning_rate": 5.1307044587867485e-06, + "loss": 2.1345, + "step": 12488 + }, + { + "epoch": 2.54, + "learning_rate": 5.129195804783581e-06, + "loss": 2.0039, + "step": 12489 + }, + { + "epoch": 2.54, + "learning_rate": 5.127687296110127e-06, + "loss": 2.2234, + "step": 12490 + }, + { + "epoch": 2.54, + "learning_rate": 5.126178932811381e-06, + "loss": 2.15, + "step": 12491 + }, + { + "epoch": 2.54, + "learning_rate": 5.124670714932367e-06, + "loss": 2.0861, + "step": 12492 + }, + { + "epoch": 2.54, + "learning_rate": 5.123162642518073e-06, + "loss": 2.2229, + "step": 12493 + }, + { + "epoch": 2.54, + "learning_rate": 5.121654715613502e-06, + "loss": 2.1195, + "step": 12494 + }, + { + "epoch": 2.54, + "learning_rate": 5.120146934263638e-06, + "loss": 2.1851, + "step": 12495 + }, + { + "epoch": 2.54, + "learning_rate": 5.1186392985134735e-06, + "loss": 2.2141, + "step": 12496 + }, + { + "epoch": 2.54, + "learning_rate": 5.117131808407991e-06, + "loss": 2.1071, + "step": 12497 + }, + { + "epoch": 2.54, + "learning_rate": 5.115624463992173e-06, + "loss": 2.015, + "step": 12498 + }, + { + "epoch": 2.54, + "learning_rate": 5.1141172653109874e-06, + "loss": 2.1795, + "step": 12499 + }, + { + "epoch": 2.54, + "learning_rate": 5.112610212409407e-06, + "loss": 2.2137, + "step": 12500 + }, + { + "epoch": 2.54, + "learning_rate": 5.111103305332399e-06, + "loss": 2.1098, + "step": 12501 + }, + { + "epoch": 2.54, + "learning_rate": 5.109596544124926e-06, + "loss": 2.1077, + "step": 12502 + }, + { + "epoch": 2.54, + "learning_rate": 5.10808992883194e-06, + "loss": 2.0678, + "step": 12503 + }, + { + "epoch": 2.54, + "learning_rate": 5.106583459498396e-06, + "loss": 2.0927, + "step": 12504 + }, + { + "epoch": 2.54, + "learning_rate": 5.105077136169243e-06, + "loss": 2.1339, + "step": 12505 + }, + { + "epoch": 2.54, + "learning_rate": 5.10357095888943e-06, + "loss": 2.0869, + "step": 12506 + }, + { + "epoch": 2.54, + "learning_rate": 5.102064927703887e-06, + "loss": 2.1724, + "step": 12507 + }, + { + "epoch": 2.54, + "learning_rate": 5.1005590426575544e-06, + "loss": 2.127, + "step": 12508 + }, + { + "epoch": 2.54, + "learning_rate": 5.099053303795367e-06, + "loss": 2.0546, + "step": 12509 + }, + { + "epoch": 2.54, + "learning_rate": 5.097547711162243e-06, + "loss": 2.1734, + "step": 12510 + }, + { + "epoch": 2.54, + "learning_rate": 5.096042264803109e-06, + "loss": 2.0637, + "step": 12511 + }, + { + "epoch": 2.54, + "learning_rate": 5.094536964762882e-06, + "loss": 2.1134, + "step": 12512 + }, + { + "epoch": 2.54, + "learning_rate": 5.093031811086479e-06, + "loss": 2.1331, + "step": 12513 + }, + { + "epoch": 2.54, + "learning_rate": 5.091526803818805e-06, + "loss": 2.0599, + "step": 12514 + }, + { + "epoch": 2.54, + "learning_rate": 5.090021943004765e-06, + "loss": 2.1628, + "step": 12515 + }, + { + "epoch": 2.54, + "learning_rate": 5.08851722868926e-06, + "loss": 2.1431, + "step": 12516 + }, + { + "epoch": 2.54, + "learning_rate": 5.08701266091719e-06, + "loss": 2.1005, + "step": 12517 + }, + { + "epoch": 2.54, + "learning_rate": 5.08550823973344e-06, + "loss": 2.0935, + "step": 12518 + }, + { + "epoch": 2.54, + "learning_rate": 5.0840039651829e-06, + "loss": 2.1337, + "step": 12519 + }, + { + "epoch": 2.54, + "learning_rate": 5.082499837310453e-06, + "loss": 2.235, + "step": 12520 + }, + { + "epoch": 2.54, + "learning_rate": 5.0809958561609815e-06, + "loss": 2.1178, + "step": 12521 + }, + { + "epoch": 2.54, + "learning_rate": 5.079492021779352e-06, + "loss": 2.0867, + "step": 12522 + }, + { + "epoch": 2.54, + "learning_rate": 5.077988334210438e-06, + "loss": 2.1852, + "step": 12523 + }, + { + "epoch": 2.54, + "learning_rate": 5.076484793499111e-06, + "loss": 2.0974, + "step": 12524 + }, + { + "epoch": 2.54, + "learning_rate": 5.074981399690219e-06, + "loss": 2.1195, + "step": 12525 + }, + { + "epoch": 2.54, + "learning_rate": 5.073478152828626e-06, + "loss": 2.0408, + "step": 12526 + }, + { + "epoch": 2.54, + "learning_rate": 5.071975052959186e-06, + "loss": 2.1835, + "step": 12527 + }, + { + "epoch": 2.54, + "learning_rate": 5.070472100126747e-06, + "loss": 2.1056, + "step": 12528 + }, + { + "epoch": 2.54, + "learning_rate": 5.068969294376147e-06, + "loss": 2.1273, + "step": 12529 + }, + { + "epoch": 2.54, + "learning_rate": 5.0674666357522275e-06, + "loss": 2.159, + "step": 12530 + }, + { + "epoch": 2.54, + "learning_rate": 5.0659641242998236e-06, + "loss": 2.1254, + "step": 12531 + }, + { + "epoch": 2.54, + "learning_rate": 5.06446176006377e-06, + "loss": 2.1033, + "step": 12532 + }, + { + "epoch": 2.54, + "learning_rate": 5.062959543088886e-06, + "loss": 2.116, + "step": 12533 + }, + { + "epoch": 2.54, + "learning_rate": 5.061457473419997e-06, + "loss": 2.0498, + "step": 12534 + }, + { + "epoch": 2.54, + "learning_rate": 5.059955551101914e-06, + "loss": 2.0649, + "step": 12535 + }, + { + "epoch": 2.54, + "learning_rate": 5.058453776179459e-06, + "loss": 2.1963, + "step": 12536 + }, + { + "epoch": 2.54, + "learning_rate": 5.056952148697434e-06, + "loss": 2.1364, + "step": 12537 + }, + { + "epoch": 2.55, + "learning_rate": 5.055450668700647e-06, + "loss": 2.1179, + "step": 12538 + }, + { + "epoch": 2.55, + "learning_rate": 5.053949336233892e-06, + "loss": 2.1044, + "step": 12539 + }, + { + "epoch": 2.55, + "learning_rate": 5.0524481513419675e-06, + "loss": 2.0842, + "step": 12540 + }, + { + "epoch": 2.55, + "learning_rate": 5.050947114069664e-06, + "loss": 2.1519, + "step": 12541 + }, + { + "epoch": 2.55, + "learning_rate": 5.049446224461771e-06, + "loss": 2.1761, + "step": 12542 + }, + { + "epoch": 2.55, + "learning_rate": 5.0479454825630635e-06, + "loss": 2.1461, + "step": 12543 + }, + { + "epoch": 2.55, + "learning_rate": 5.046444888418322e-06, + "loss": 2.0993, + "step": 12544 + }, + { + "epoch": 2.55, + "learning_rate": 5.044944442072321e-06, + "loss": 2.1709, + "step": 12545 + }, + { + "epoch": 2.55, + "learning_rate": 5.043444143569833e-06, + "loss": 2.0702, + "step": 12546 + }, + { + "epoch": 2.55, + "learning_rate": 5.0419439929556135e-06, + "loss": 2.175, + "step": 12547 + }, + { + "epoch": 2.55, + "learning_rate": 5.0404439902744266e-06, + "loss": 2.1522, + "step": 12548 + }, + { + "epoch": 2.55, + "learning_rate": 5.03894413557103e-06, + "loss": 2.1265, + "step": 12549 + }, + { + "epoch": 2.55, + "learning_rate": 5.037444428890169e-06, + "loss": 2.057, + "step": 12550 + }, + { + "epoch": 2.55, + "learning_rate": 5.035944870276594e-06, + "loss": 2.0988, + "step": 12551 + }, + { + "epoch": 2.55, + "learning_rate": 5.034445459775046e-06, + "loss": 2.112, + "step": 12552 + }, + { + "epoch": 2.55, + "learning_rate": 5.0329461974302685e-06, + "loss": 2.1271, + "step": 12553 + }, + { + "epoch": 2.55, + "learning_rate": 5.031447083286985e-06, + "loss": 2.12, + "step": 12554 + }, + { + "epoch": 2.55, + "learning_rate": 5.02994811738993e-06, + "loss": 1.9709, + "step": 12555 + }, + { + "epoch": 2.55, + "learning_rate": 5.028449299783827e-06, + "loss": 2.0894, + "step": 12556 + }, + { + "epoch": 2.55, + "learning_rate": 5.0269506305133995e-06, + "loss": 2.0299, + "step": 12557 + }, + { + "epoch": 2.55, + "learning_rate": 5.025452109623356e-06, + "loss": 2.1303, + "step": 12558 + }, + { + "epoch": 2.55, + "learning_rate": 5.023953737158418e-06, + "loss": 2.1611, + "step": 12559 + }, + { + "epoch": 2.55, + "learning_rate": 5.022455513163275e-06, + "loss": 2.1693, + "step": 12560 + }, + { + "epoch": 2.55, + "learning_rate": 5.02095743768265e-06, + "loss": 2.1547, + "step": 12561 + }, + { + "epoch": 2.55, + "learning_rate": 5.0194595107612275e-06, + "loss": 2.147, + "step": 12562 + }, + { + "epoch": 2.55, + "learning_rate": 5.017961732443708e-06, + "loss": 2.1589, + "step": 12563 + }, + { + "epoch": 2.55, + "learning_rate": 5.016464102774774e-06, + "loss": 2.1151, + "step": 12564 + }, + { + "epoch": 2.55, + "learning_rate": 5.014966621799113e-06, + "loss": 2.1838, + "step": 12565 + }, + { + "epoch": 2.55, + "learning_rate": 5.013469289561406e-06, + "loss": 2.0801, + "step": 12566 + }, + { + "epoch": 2.55, + "learning_rate": 5.011972106106329e-06, + "loss": 2.1726, + "step": 12567 + }, + { + "epoch": 2.55, + "learning_rate": 5.0104750714785554e-06, + "loss": 2.1518, + "step": 12568 + }, + { + "epoch": 2.55, + "learning_rate": 5.008978185722746e-06, + "loss": 2.0485, + "step": 12569 + }, + { + "epoch": 2.55, + "learning_rate": 5.007481448883567e-06, + "loss": 2.0999, + "step": 12570 + }, + { + "epoch": 2.55, + "learning_rate": 5.005984861005675e-06, + "loss": 2.0757, + "step": 12571 + }, + { + "epoch": 2.55, + "learning_rate": 5.0044884221337285e-06, + "loss": 2.1013, + "step": 12572 + }, + { + "epoch": 2.55, + "learning_rate": 5.00299213231237e-06, + "loss": 2.1208, + "step": 12573 + }, + { + "epoch": 2.55, + "learning_rate": 5.001495991586251e-06, + "loss": 2.2186, + "step": 12574 + }, + { + "epoch": 2.55, + "learning_rate": 4.999999999999998e-06, + "loss": 2.1065, + "step": 12575 + }, + { + "epoch": 2.55, + "learning_rate": 4.9985041575982655e-06, + "loss": 2.1829, + "step": 12576 + }, + { + "epoch": 2.55, + "learning_rate": 4.997008464425671e-06, + "loss": 2.1304, + "step": 12577 + }, + { + "epoch": 2.55, + "learning_rate": 4.99551292052685e-06, + "loss": 2.143, + "step": 12578 + }, + { + "epoch": 2.55, + "learning_rate": 4.994017525946418e-06, + "loss": 2.0695, + "step": 12579 + }, + { + "epoch": 2.55, + "learning_rate": 4.992522280728995e-06, + "loss": 2.1348, + "step": 12580 + }, + { + "epoch": 2.55, + "learning_rate": 4.991027184919195e-06, + "loss": 2.112, + "step": 12581 + }, + { + "epoch": 2.55, + "learning_rate": 4.989532238561632e-06, + "loss": 2.1039, + "step": 12582 + }, + { + "epoch": 2.55, + "learning_rate": 4.988037441700901e-06, + "loss": 1.9781, + "step": 12583 + }, + { + "epoch": 2.55, + "learning_rate": 4.986542794381607e-06, + "loss": 2.1128, + "step": 12584 + }, + { + "epoch": 2.55, + "learning_rate": 4.985048296648346e-06, + "loss": 2.0747, + "step": 12585 + }, + { + "epoch": 2.55, + "learning_rate": 4.983553948545713e-06, + "loss": 2.0236, + "step": 12586 + }, + { + "epoch": 2.55, + "learning_rate": 4.9820597501182865e-06, + "loss": 2.0996, + "step": 12587 + }, + { + "epoch": 2.56, + "learning_rate": 4.980565701410652e-06, + "loss": 2.1013, + "step": 12588 + }, + { + "epoch": 2.56, + "learning_rate": 4.979071802467393e-06, + "loss": 2.1336, + "step": 12589 + }, + { + "epoch": 2.56, + "learning_rate": 4.977578053333074e-06, + "loss": 2.1017, + "step": 12590 + }, + { + "epoch": 2.56, + "learning_rate": 4.976084454052268e-06, + "loss": 2.1629, + "step": 12591 + }, + { + "epoch": 2.56, + "learning_rate": 4.974591004669538e-06, + "loss": 2.1068, + "step": 12592 + }, + { + "epoch": 2.56, + "learning_rate": 4.97309770522945e-06, + "loss": 2.1561, + "step": 12593 + }, + { + "epoch": 2.56, + "learning_rate": 4.971604555776549e-06, + "loss": 2.0893, + "step": 12594 + }, + { + "epoch": 2.56, + "learning_rate": 4.970111556355392e-06, + "loss": 2.0353, + "step": 12595 + }, + { + "epoch": 2.56, + "learning_rate": 4.9686187070105264e-06, + "loss": 2.1732, + "step": 12596 + }, + { + "epoch": 2.56, + "learning_rate": 4.967126007786495e-06, + "loss": 2.1064, + "step": 12597 + }, + { + "epoch": 2.56, + "learning_rate": 4.9656334587278296e-06, + "loss": 2.1741, + "step": 12598 + }, + { + "epoch": 2.56, + "learning_rate": 4.9641410598790665e-06, + "loss": 2.1884, + "step": 12599 + }, + { + "epoch": 2.56, + "learning_rate": 4.962648811284735e-06, + "loss": 2.1264, + "step": 12600 + }, + { + "epoch": 2.56, + "learning_rate": 4.961156712989361e-06, + "loss": 2.0975, + "step": 12601 + }, + { + "epoch": 2.56, + "learning_rate": 4.959664765037459e-06, + "loss": 2.1874, + "step": 12602 + }, + { + "epoch": 2.56, + "learning_rate": 4.95817296747355e-06, + "loss": 2.0732, + "step": 12603 + }, + { + "epoch": 2.56, + "learning_rate": 4.956681320342135e-06, + "loss": 2.1093, + "step": 12604 + }, + { + "epoch": 2.56, + "learning_rate": 4.955189823687733e-06, + "loss": 2.0438, + "step": 12605 + }, + { + "epoch": 2.56, + "learning_rate": 4.953698477554836e-06, + "loss": 2.1148, + "step": 12606 + }, + { + "epoch": 2.56, + "learning_rate": 4.952207281987947e-06, + "loss": 2.0727, + "step": 12607 + }, + { + "epoch": 2.56, + "learning_rate": 4.950716237031553e-06, + "loss": 2.1167, + "step": 12608 + }, + { + "epoch": 2.56, + "learning_rate": 4.949225342730145e-06, + "loss": 2.1428, + "step": 12609 + }, + { + "epoch": 2.56, + "learning_rate": 4.947734599128207e-06, + "loss": 2.138, + "step": 12610 + }, + { + "epoch": 2.56, + "learning_rate": 4.946244006270221e-06, + "loss": 2.1307, + "step": 12611 + }, + { + "epoch": 2.56, + "learning_rate": 4.944753564200656e-06, + "loss": 2.1907, + "step": 12612 + }, + { + "epoch": 2.56, + "learning_rate": 4.943263272963984e-06, + "loss": 2.1667, + "step": 12613 + }, + { + "epoch": 2.56, + "learning_rate": 4.941773132604672e-06, + "loss": 2.1138, + "step": 12614 + }, + { + "epoch": 2.56, + "learning_rate": 4.9402831431671834e-06, + "loss": 2.1541, + "step": 12615 + }, + { + "epoch": 2.56, + "learning_rate": 4.938793304695969e-06, + "loss": 2.1094, + "step": 12616 + }, + { + "epoch": 2.56, + "learning_rate": 4.9373036172354845e-06, + "loss": 2.1511, + "step": 12617 + }, + { + "epoch": 2.56, + "learning_rate": 4.93581408083018e-06, + "loss": 2.1741, + "step": 12618 + }, + { + "epoch": 2.56, + "learning_rate": 4.934324695524489e-06, + "loss": 2.1287, + "step": 12619 + }, + { + "epoch": 2.56, + "learning_rate": 4.932835461362865e-06, + "loss": 2.0616, + "step": 12620 + }, + { + "epoch": 2.56, + "learning_rate": 4.93134637838973e-06, + "loss": 2.0591, + "step": 12621 + }, + { + "epoch": 2.56, + "learning_rate": 4.929857446649521e-06, + "loss": 2.0334, + "step": 12622 + }, + { + "epoch": 2.56, + "learning_rate": 4.928368666186656e-06, + "loss": 2.1062, + "step": 12623 + }, + { + "epoch": 2.56, + "learning_rate": 4.926880037045559e-06, + "loss": 2.1771, + "step": 12624 + }, + { + "epoch": 2.56, + "learning_rate": 4.925391559270647e-06, + "loss": 2.1283, + "step": 12625 + }, + { + "epoch": 2.56, + "learning_rate": 4.923903232906335e-06, + "loss": 2.1235, + "step": 12626 + }, + { + "epoch": 2.56, + "learning_rate": 4.922415057997022e-06, + "loss": 2.1378, + "step": 12627 + }, + { + "epoch": 2.56, + "learning_rate": 4.920927034587115e-06, + "loss": 2.1711, + "step": 12628 + }, + { + "epoch": 2.56, + "learning_rate": 4.919439162721012e-06, + "loss": 2.0346, + "step": 12629 + }, + { + "epoch": 2.56, + "learning_rate": 4.91795144244311e-06, + "loss": 2.1053, + "step": 12630 + }, + { + "epoch": 2.56, + "learning_rate": 4.916463873797789e-06, + "loss": 2.1404, + "step": 12631 + }, + { + "epoch": 2.56, + "learning_rate": 4.914976456829439e-06, + "loss": 2.1035, + "step": 12632 + }, + { + "epoch": 2.56, + "learning_rate": 4.9134891915824435e-06, + "loss": 2.1021, + "step": 12633 + }, + { + "epoch": 2.56, + "learning_rate": 4.91200207810117e-06, + "loss": 2.1888, + "step": 12634 + }, + { + "epoch": 2.56, + "learning_rate": 4.910515116429994e-06, + "loss": 2.157, + "step": 12635 + }, + { + "epoch": 2.56, + "learning_rate": 4.9090283066132806e-06, + "loss": 2.1047, + "step": 12636 + }, + { + "epoch": 2.57, + "learning_rate": 4.907541648695395e-06, + "loss": 2.0754, + "step": 12637 + }, + { + "epoch": 2.57, + "learning_rate": 4.906055142720688e-06, + "loss": 2.2321, + "step": 12638 + }, + { + "epoch": 2.57, + "learning_rate": 4.904568788733517e-06, + "loss": 2.0813, + "step": 12639 + }, + { + "epoch": 2.57, + "learning_rate": 4.9030825867782275e-06, + "loss": 2.07, + "step": 12640 + }, + { + "epoch": 2.57, + "learning_rate": 4.9015965368991695e-06, + "loss": 2.134, + "step": 12641 + }, + { + "epoch": 2.57, + "learning_rate": 4.900110639140673e-06, + "loss": 2.0774, + "step": 12642 + }, + { + "epoch": 2.57, + "learning_rate": 4.89862489354708e-06, + "loss": 2.1709, + "step": 12643 + }, + { + "epoch": 2.57, + "learning_rate": 4.897139300162711e-06, + "loss": 2.192, + "step": 12644 + }, + { + "epoch": 2.57, + "learning_rate": 4.895653859031906e-06, + "loss": 2.1168, + "step": 12645 + }, + { + "epoch": 2.57, + "learning_rate": 4.894168570198973e-06, + "loss": 2.208, + "step": 12646 + }, + { + "epoch": 2.57, + "learning_rate": 4.892683433708237e-06, + "loss": 2.1548, + "step": 12647 + }, + { + "epoch": 2.57, + "learning_rate": 4.891198449604004e-06, + "loss": 2.1262, + "step": 12648 + }, + { + "epoch": 2.57, + "learning_rate": 4.8897136179305814e-06, + "loss": 2.1675, + "step": 12649 + }, + { + "epoch": 2.57, + "learning_rate": 4.888228938732276e-06, + "loss": 2.1159, + "step": 12650 + }, + { + "epoch": 2.57, + "learning_rate": 4.886744412053387e-06, + "loss": 1.9938, + "step": 12651 + }, + { + "epoch": 2.57, + "learning_rate": 4.885260037938201e-06, + "loss": 2.1976, + "step": 12652 + }, + { + "epoch": 2.57, + "learning_rate": 4.883775816431012e-06, + "loss": 2.1063, + "step": 12653 + }, + { + "epoch": 2.57, + "learning_rate": 4.882291747576103e-06, + "loss": 2.1728, + "step": 12654 + }, + { + "epoch": 2.57, + "learning_rate": 4.88080783141776e-06, + "loss": 2.1501, + "step": 12655 + }, + { + "epoch": 2.57, + "learning_rate": 4.879324068000249e-06, + "loss": 2.0285, + "step": 12656 + }, + { + "epoch": 2.57, + "learning_rate": 4.877840457367845e-06, + "loss": 2.1326, + "step": 12657 + }, + { + "epoch": 2.57, + "learning_rate": 4.876356999564819e-06, + "loss": 2.1102, + "step": 12658 + }, + { + "epoch": 2.57, + "learning_rate": 4.874873694635425e-06, + "loss": 2.1399, + "step": 12659 + }, + { + "epoch": 2.57, + "learning_rate": 4.873390542623922e-06, + "loss": 2.1383, + "step": 12660 + }, + { + "epoch": 2.57, + "learning_rate": 4.871907543574565e-06, + "loss": 2.1361, + "step": 12661 + }, + { + "epoch": 2.57, + "learning_rate": 4.870424697531607e-06, + "loss": 2.1967, + "step": 12662 + }, + { + "epoch": 2.57, + "learning_rate": 4.86894200453928e-06, + "loss": 2.1436, + "step": 12663 + }, + { + "epoch": 2.57, + "learning_rate": 4.867459464641831e-06, + "loss": 2.11, + "step": 12664 + }, + { + "epoch": 2.57, + "learning_rate": 4.86597707788349e-06, + "loss": 2.1503, + "step": 12665 + }, + { + "epoch": 2.57, + "learning_rate": 4.864494844308494e-06, + "loss": 2.0548, + "step": 12666 + }, + { + "epoch": 2.57, + "learning_rate": 4.863012763961059e-06, + "loss": 2.1319, + "step": 12667 + }, + { + "epoch": 2.57, + "learning_rate": 4.861530836885414e-06, + "loss": 2.1204, + "step": 12668 + }, + { + "epoch": 2.57, + "learning_rate": 4.860049063125765e-06, + "loss": 2.1601, + "step": 12669 + }, + { + "epoch": 2.57, + "learning_rate": 4.8585674427263365e-06, + "loss": 2.0184, + "step": 12670 + }, + { + "epoch": 2.57, + "learning_rate": 4.857085975731324e-06, + "loss": 2.1251, + "step": 12671 + }, + { + "epoch": 2.57, + "learning_rate": 4.855604662184935e-06, + "loss": 2.1246, + "step": 12672 + }, + { + "epoch": 2.57, + "learning_rate": 4.854123502131371e-06, + "loss": 2.1094, + "step": 12673 + }, + { + "epoch": 2.57, + "learning_rate": 4.852642495614818e-06, + "loss": 2.1735, + "step": 12674 + }, + { + "epoch": 2.57, + "learning_rate": 4.851161642679466e-06, + "loss": 2.165, + "step": 12675 + }, + { + "epoch": 2.57, + "learning_rate": 4.849680943369502e-06, + "loss": 2.0945, + "step": 12676 + }, + { + "epoch": 2.57, + "learning_rate": 4.848200397729108e-06, + "loss": 2.088, + "step": 12677 + }, + { + "epoch": 2.57, + "learning_rate": 4.846720005802452e-06, + "loss": 2.093, + "step": 12678 + }, + { + "epoch": 2.57, + "learning_rate": 4.8452397676337074e-06, + "loss": 2.1232, + "step": 12679 + }, + { + "epoch": 2.57, + "learning_rate": 4.843759683267039e-06, + "loss": 2.1246, + "step": 12680 + }, + { + "epoch": 2.57, + "learning_rate": 4.8422797527466126e-06, + "loss": 2.1704, + "step": 12681 + }, + { + "epoch": 2.57, + "learning_rate": 4.840799976116578e-06, + "loss": 2.0907, + "step": 12682 + }, + { + "epoch": 2.57, + "learning_rate": 4.839320353421091e-06, + "loss": 2.0773, + "step": 12683 + }, + { + "epoch": 2.57, + "learning_rate": 4.837840884704297e-06, + "loss": 2.0551, + "step": 12684 + }, + { + "epoch": 2.57, + "learning_rate": 4.836361570010344e-06, + "loss": 2.1747, + "step": 12685 + }, + { + "epoch": 2.58, + "learning_rate": 4.834882409383362e-06, + "loss": 2.147, + "step": 12686 + }, + { + "epoch": 2.58, + "learning_rate": 4.8334034028674935e-06, + "loss": 2.0881, + "step": 12687 + }, + { + "epoch": 2.58, + "learning_rate": 4.831924550506856e-06, + "loss": 2.1256, + "step": 12688 + }, + { + "epoch": 2.58, + "learning_rate": 4.830445852345586e-06, + "loss": 2.1127, + "step": 12689 + }, + { + "epoch": 2.58, + "learning_rate": 4.828967308427795e-06, + "loss": 2.1278, + "step": 12690 + }, + { + "epoch": 2.58, + "learning_rate": 4.827488918797605e-06, + "loss": 2.0806, + "step": 12691 + }, + { + "epoch": 2.58, + "learning_rate": 4.826010683499118e-06, + "loss": 2.0671, + "step": 12692 + }, + { + "epoch": 2.58, + "learning_rate": 4.824532602576445e-06, + "loss": 2.183, + "step": 12693 + }, + { + "epoch": 2.58, + "learning_rate": 4.823054676073688e-06, + "loss": 2.1659, + "step": 12694 + }, + { + "epoch": 2.58, + "learning_rate": 4.821576904034945e-06, + "loss": 2.088, + "step": 12695 + }, + { + "epoch": 2.58, + "learning_rate": 4.820099286504301e-06, + "loss": 2.1245, + "step": 12696 + }, + { + "epoch": 2.58, + "learning_rate": 4.818621823525851e-06, + "loss": 2.1344, + "step": 12697 + }, + { + "epoch": 2.58, + "learning_rate": 4.8171445151436735e-06, + "loss": 2.0891, + "step": 12698 + }, + { + "epoch": 2.58, + "learning_rate": 4.815667361401852e-06, + "loss": 2.122, + "step": 12699 + }, + { + "epoch": 2.58, + "learning_rate": 4.814190362344454e-06, + "loss": 2.1366, + "step": 12700 + }, + { + "epoch": 2.58, + "learning_rate": 4.812713518015551e-06, + "loss": 2.0432, + "step": 12701 + }, + { + "epoch": 2.58, + "learning_rate": 4.811236828459211e-06, + "loss": 2.1719, + "step": 12702 + }, + { + "epoch": 2.58, + "learning_rate": 4.809760293719488e-06, + "loss": 2.0907, + "step": 12703 + }, + { + "epoch": 2.58, + "learning_rate": 4.808283913840439e-06, + "loss": 2.0271, + "step": 12704 + }, + { + "epoch": 2.58, + "learning_rate": 4.806807688866116e-06, + "loss": 2.1019, + "step": 12705 + }, + { + "epoch": 2.58, + "learning_rate": 4.805331618840567e-06, + "loss": 2.1606, + "step": 12706 + }, + { + "epoch": 2.58, + "learning_rate": 4.803855703807828e-06, + "loss": 2.086, + "step": 12707 + }, + { + "epoch": 2.58, + "learning_rate": 4.802379943811939e-06, + "loss": 2.1069, + "step": 12708 + }, + { + "epoch": 2.58, + "learning_rate": 4.800904338896931e-06, + "loss": 2.1419, + "step": 12709 + }, + { + "epoch": 2.58, + "learning_rate": 4.799428889106836e-06, + "loss": 2.1191, + "step": 12710 + }, + { + "epoch": 2.58, + "learning_rate": 4.797953594485668e-06, + "loss": 2.1273, + "step": 12711 + }, + { + "epoch": 2.58, + "learning_rate": 4.796478455077455e-06, + "loss": 2.1299, + "step": 12712 + }, + { + "epoch": 2.58, + "learning_rate": 4.795003470926198e-06, + "loss": 2.1373, + "step": 12713 + }, + { + "epoch": 2.58, + "learning_rate": 4.7935286420759215e-06, + "loss": 2.06, + "step": 12714 + }, + { + "epoch": 2.58, + "learning_rate": 4.792053968570618e-06, + "loss": 2.101, + "step": 12715 + }, + { + "epoch": 2.58, + "learning_rate": 4.790579450454296e-06, + "loss": 2.1758, + "step": 12716 + }, + { + "epoch": 2.58, + "learning_rate": 4.78910508777094e-06, + "loss": 2.2021, + "step": 12717 + }, + { + "epoch": 2.58, + "learning_rate": 4.787630880564547e-06, + "loss": 2.0463, + "step": 12718 + }, + { + "epoch": 2.58, + "learning_rate": 4.786156828879103e-06, + "loss": 2.1364, + "step": 12719 + }, + { + "epoch": 2.58, + "learning_rate": 4.7846829327585905e-06, + "loss": 2.0726, + "step": 12720 + }, + { + "epoch": 2.58, + "learning_rate": 4.78320919224698e-06, + "loss": 2.0136, + "step": 12721 + }, + { + "epoch": 2.58, + "learning_rate": 4.781735607388248e-06, + "loss": 2.1576, + "step": 12722 + }, + { + "epoch": 2.58, + "learning_rate": 4.780262178226361e-06, + "loss": 2.1111, + "step": 12723 + }, + { + "epoch": 2.58, + "learning_rate": 4.77878890480528e-06, + "loss": 2.1543, + "step": 12724 + }, + { + "epoch": 2.58, + "learning_rate": 4.777315787168968e-06, + "loss": 2.1877, + "step": 12725 + }, + { + "epoch": 2.58, + "learning_rate": 4.775842825361371e-06, + "loss": 2.1075, + "step": 12726 + }, + { + "epoch": 2.58, + "learning_rate": 4.774370019426444e-06, + "loss": 2.1091, + "step": 12727 + }, + { + "epoch": 2.58, + "learning_rate": 4.772897369408121e-06, + "loss": 2.1179, + "step": 12728 + }, + { + "epoch": 2.58, + "learning_rate": 4.771424875350356e-06, + "loss": 2.1233, + "step": 12729 + }, + { + "epoch": 2.58, + "learning_rate": 4.769952537297072e-06, + "loss": 2.1495, + "step": 12730 + }, + { + "epoch": 2.58, + "learning_rate": 4.768480355292206e-06, + "loss": 2.1986, + "step": 12731 + }, + { + "epoch": 2.58, + "learning_rate": 4.767008329379678e-06, + "loss": 2.0732, + "step": 12732 + }, + { + "epoch": 2.58, + "learning_rate": 4.765536459603411e-06, + "loss": 2.1205, + "step": 12733 + }, + { + "epoch": 2.58, + "learning_rate": 4.76406474600732e-06, + "loss": 2.1011, + "step": 12734 + }, + { + "epoch": 2.58, + "learning_rate": 4.7625931886353215e-06, + "loss": 2.1058, + "step": 12735 + }, + { + "epoch": 2.59, + "learning_rate": 4.7611217875313145e-06, + "loss": 2.1886, + "step": 12736 + }, + { + "epoch": 2.59, + "learning_rate": 4.759650542739205e-06, + "loss": 2.0725, + "step": 12737 + }, + { + "epoch": 2.59, + "learning_rate": 4.7581794543028895e-06, + "loss": 2.1817, + "step": 12738 + }, + { + "epoch": 2.59, + "learning_rate": 4.756708522266265e-06, + "loss": 2.0388, + "step": 12739 + }, + { + "epoch": 2.59, + "learning_rate": 4.755237746673213e-06, + "loss": 2.106, + "step": 12740 + }, + { + "epoch": 2.59, + "learning_rate": 4.753767127567618e-06, + "loss": 2.125, + "step": 12741 + }, + { + "epoch": 2.59, + "learning_rate": 4.7522966649933655e-06, + "loss": 2.0406, + "step": 12742 + }, + { + "epoch": 2.59, + "learning_rate": 4.7508263589943206e-06, + "loss": 2.0142, + "step": 12743 + }, + { + "epoch": 2.59, + "learning_rate": 4.749356209614356e-06, + "loss": 2.1396, + "step": 12744 + }, + { + "epoch": 2.59, + "learning_rate": 4.747886216897338e-06, + "loss": 2.2153, + "step": 12745 + }, + { + "epoch": 2.59, + "learning_rate": 4.746416380887129e-06, + "loss": 2.155, + "step": 12746 + }, + { + "epoch": 2.59, + "learning_rate": 4.744946701627578e-06, + "loss": 2.0349, + "step": 12747 + }, + { + "epoch": 2.59, + "learning_rate": 4.743477179162537e-06, + "loss": 2.1653, + "step": 12748 + }, + { + "epoch": 2.59, + "learning_rate": 4.742007813535854e-06, + "loss": 2.1644, + "step": 12749 + }, + { + "epoch": 2.59, + "learning_rate": 4.740538604791374e-06, + "loss": 2.1716, + "step": 12750 + }, + { + "epoch": 2.59, + "learning_rate": 4.7390695529729255e-06, + "loss": 2.0474, + "step": 12751 + }, + { + "epoch": 2.59, + "learning_rate": 4.737600658124349e-06, + "loss": 2.0574, + "step": 12752 + }, + { + "epoch": 2.59, + "learning_rate": 4.73613192028946e-06, + "loss": 2.1281, + "step": 12753 + }, + { + "epoch": 2.59, + "learning_rate": 4.734663339512096e-06, + "loss": 2.1213, + "step": 12754 + }, + { + "epoch": 2.59, + "learning_rate": 4.733194915836062e-06, + "loss": 2.1459, + "step": 12755 + }, + { + "epoch": 2.59, + "learning_rate": 4.731726649305181e-06, + "loss": 2.1019, + "step": 12756 + }, + { + "epoch": 2.59, + "learning_rate": 4.730258539963254e-06, + "loss": 2.0816, + "step": 12757 + }, + { + "epoch": 2.59, + "learning_rate": 4.728790587854087e-06, + "loss": 2.1483, + "step": 12758 + }, + { + "epoch": 2.59, + "learning_rate": 4.727322793021481e-06, + "loss": 2.0903, + "step": 12759 + }, + { + "epoch": 2.59, + "learning_rate": 4.725855155509234e-06, + "loss": 2.1594, + "step": 12760 + }, + { + "epoch": 2.59, + "learning_rate": 4.724387675361125e-06, + "loss": 2.1261, + "step": 12761 + }, + { + "epoch": 2.59, + "learning_rate": 4.7229203526209466e-06, + "loss": 2.1461, + "step": 12762 + }, + { + "epoch": 2.59, + "learning_rate": 4.7214531873324775e-06, + "loss": 2.1201, + "step": 12763 + }, + { + "epoch": 2.59, + "learning_rate": 4.719986179539499e-06, + "loss": 2.0614, + "step": 12764 + }, + { + "epoch": 2.59, + "learning_rate": 4.718519329285771e-06, + "loss": 2.0926, + "step": 12765 + }, + { + "epoch": 2.59, + "learning_rate": 4.717052636615066e-06, + "loss": 2.0998, + "step": 12766 + }, + { + "epoch": 2.59, + "learning_rate": 4.71558610157115e-06, + "loss": 2.0927, + "step": 12767 + }, + { + "epoch": 2.59, + "learning_rate": 4.71411972419777e-06, + "loss": 2.1435, + "step": 12768 + }, + { + "epoch": 2.59, + "learning_rate": 4.712653504538684e-06, + "loss": 2.0888, + "step": 12769 + }, + { + "epoch": 2.59, + "learning_rate": 4.711187442637638e-06, + "loss": 2.1112, + "step": 12770 + }, + { + "epoch": 2.59, + "learning_rate": 4.70972153853838e-06, + "loss": 2.1486, + "step": 12771 + }, + { + "epoch": 2.59, + "learning_rate": 4.708255792284639e-06, + "loss": 2.1853, + "step": 12772 + }, + { + "epoch": 2.59, + "learning_rate": 4.706790203920153e-06, + "loss": 2.1265, + "step": 12773 + }, + { + "epoch": 2.59, + "learning_rate": 4.7053247734886495e-06, + "loss": 2.1164, + "step": 12774 + }, + { + "epoch": 2.59, + "learning_rate": 4.703859501033857e-06, + "loss": 2.1458, + "step": 12775 + }, + { + "epoch": 2.59, + "learning_rate": 4.702394386599488e-06, + "loss": 2.1557, + "step": 12776 + }, + { + "epoch": 2.59, + "learning_rate": 4.70092943022926e-06, + "loss": 2.1904, + "step": 12777 + }, + { + "epoch": 2.59, + "learning_rate": 4.699464631966883e-06, + "loss": 2.1536, + "step": 12778 + }, + { + "epoch": 2.59, + "learning_rate": 4.697999991856063e-06, + "loss": 2.1397, + "step": 12779 + }, + { + "epoch": 2.59, + "learning_rate": 4.696535509940499e-06, + "loss": 2.1728, + "step": 12780 + }, + { + "epoch": 2.59, + "learning_rate": 4.695071186263884e-06, + "loss": 2.0225, + "step": 12781 + }, + { + "epoch": 2.59, + "learning_rate": 4.693607020869913e-06, + "loss": 2.123, + "step": 12782 + }, + { + "epoch": 2.59, + "learning_rate": 4.692143013802273e-06, + "loss": 2.0794, + "step": 12783 + }, + { + "epoch": 2.59, + "learning_rate": 4.69067916510464e-06, + "loss": 2.1539, + "step": 12784 + }, + { + "epoch": 2.6, + "learning_rate": 4.689215474820693e-06, + "loss": 2.0601, + "step": 12785 + }, + { + "epoch": 2.6, + "learning_rate": 4.68775194299411e-06, + "loss": 2.1702, + "step": 12786 + }, + { + "epoch": 2.6, + "learning_rate": 4.6862885696685486e-06, + "loss": 2.1879, + "step": 12787 + }, + { + "epoch": 2.6, + "learning_rate": 4.684825354887677e-06, + "loss": 2.181, + "step": 12788 + }, + { + "epoch": 2.6, + "learning_rate": 4.68336229869515e-06, + "loss": 2.079, + "step": 12789 + }, + { + "epoch": 2.6, + "learning_rate": 4.6818994011346264e-06, + "loss": 2.0833, + "step": 12790 + }, + { + "epoch": 2.6, + "learning_rate": 4.680436662249748e-06, + "loss": 2.0674, + "step": 12791 + }, + { + "epoch": 2.6, + "learning_rate": 4.67897408208416e-06, + "loss": 2.1755, + "step": 12792 + }, + { + "epoch": 2.6, + "learning_rate": 4.677511660681502e-06, + "loss": 2.1647, + "step": 12793 + }, + { + "epoch": 2.6, + "learning_rate": 4.676049398085413e-06, + "loss": 2.1014, + "step": 12794 + }, + { + "epoch": 2.6, + "learning_rate": 4.674587294339513e-06, + "loss": 2.0295, + "step": 12795 + }, + { + "epoch": 2.6, + "learning_rate": 4.673125349487436e-06, + "loss": 2.0718, + "step": 12796 + }, + { + "epoch": 2.6, + "learning_rate": 4.6716635635727905e-06, + "loss": 2.0776, + "step": 12797 + }, + { + "epoch": 2.6, + "learning_rate": 4.670201936639205e-06, + "loss": 2.1534, + "step": 12798 + }, + { + "epoch": 2.6, + "learning_rate": 4.668740468730279e-06, + "loss": 2.1027, + "step": 12799 + }, + { + "epoch": 2.6, + "learning_rate": 4.667279159889627e-06, + "loss": 2.1784, + "step": 12800 + }, + { + "epoch": 2.6, + "learning_rate": 4.665818010160842e-06, + "loss": 2.1489, + "step": 12801 + }, + { + "epoch": 2.6, + "learning_rate": 4.664357019587524e-06, + "loss": 2.1738, + "step": 12802 + }, + { + "epoch": 2.6, + "learning_rate": 4.662896188213264e-06, + "loss": 2.1837, + "step": 12803 + }, + { + "epoch": 2.6, + "learning_rate": 4.661435516081652e-06, + "loss": 2.0684, + "step": 12804 + }, + { + "epoch": 2.6, + "learning_rate": 4.659975003236262e-06, + "loss": 2.1227, + "step": 12805 + }, + { + "epoch": 2.6, + "learning_rate": 4.658514649720678e-06, + "loss": 2.0796, + "step": 12806 + }, + { + "epoch": 2.6, + "learning_rate": 4.657054455578469e-06, + "loss": 2.1514, + "step": 12807 + }, + { + "epoch": 2.6, + "learning_rate": 4.6555944208532074e-06, + "loss": 2.153, + "step": 12808 + }, + { + "epoch": 2.6, + "learning_rate": 4.65413454558845e-06, + "loss": 2.1155, + "step": 12809 + }, + { + "epoch": 2.6, + "learning_rate": 4.652674829827758e-06, + "loss": 2.12, + "step": 12810 + }, + { + "epoch": 2.6, + "learning_rate": 4.651215273614688e-06, + "loss": 2.1532, + "step": 12811 + }, + { + "epoch": 2.6, + "learning_rate": 4.64975587699278e-06, + "loss": 2.1786, + "step": 12812 + }, + { + "epoch": 2.6, + "learning_rate": 4.648296640005584e-06, + "loss": 2.084, + "step": 12813 + }, + { + "epoch": 2.6, + "learning_rate": 4.646837562696638e-06, + "loss": 2.0615, + "step": 12814 + }, + { + "epoch": 2.6, + "learning_rate": 4.64537864510948e-06, + "loss": 2.0839, + "step": 12815 + }, + { + "epoch": 2.6, + "learning_rate": 4.6439198872876324e-06, + "loss": 2.148, + "step": 12816 + }, + { + "epoch": 2.6, + "learning_rate": 4.642461289274624e-06, + "loss": 2.1897, + "step": 12817 + }, + { + "epoch": 2.6, + "learning_rate": 4.641002851113974e-06, + "loss": 2.1464, + "step": 12818 + }, + { + "epoch": 2.6, + "learning_rate": 4.639544572849202e-06, + "loss": 2.1251, + "step": 12819 + }, + { + "epoch": 2.6, + "learning_rate": 4.63808645452381e-06, + "loss": 2.1175, + "step": 12820 + }, + { + "epoch": 2.6, + "learning_rate": 4.636628496181313e-06, + "loss": 2.128, + "step": 12821 + }, + { + "epoch": 2.6, + "learning_rate": 4.6351706978652e-06, + "loss": 2.1629, + "step": 12822 + }, + { + "epoch": 2.6, + "learning_rate": 4.6337130596189814e-06, + "loss": 2.0611, + "step": 12823 + }, + { + "epoch": 2.6, + "learning_rate": 4.6322555814861395e-06, + "loss": 2.0787, + "step": 12824 + }, + { + "epoch": 2.6, + "learning_rate": 4.630798263510162e-06, + "loss": 2.0925, + "step": 12825 + }, + { + "epoch": 2.6, + "learning_rate": 4.629341105734536e-06, + "loss": 2.185, + "step": 12826 + }, + { + "epoch": 2.6, + "learning_rate": 4.62788410820273e-06, + "loss": 1.9922, + "step": 12827 + }, + { + "epoch": 2.6, + "learning_rate": 4.626427270958222e-06, + "loss": 2.1474, + "step": 12828 + }, + { + "epoch": 2.6, + "learning_rate": 4.624970594044478e-06, + "loss": 2.1828, + "step": 12829 + }, + { + "epoch": 2.6, + "learning_rate": 4.623514077504964e-06, + "loss": 2.1243, + "step": 12830 + }, + { + "epoch": 2.6, + "learning_rate": 4.622057721383132e-06, + "loss": 2.1536, + "step": 12831 + }, + { + "epoch": 2.6, + "learning_rate": 4.6206015257224376e-06, + "loss": 2.2439, + "step": 12832 + }, + { + "epoch": 2.6, + "learning_rate": 4.6191454905663305e-06, + "loss": 2.1191, + "step": 12833 + }, + { + "epoch": 2.61, + "learning_rate": 4.617689615958256e-06, + "loss": 2.1187, + "step": 12834 + }, + { + "epoch": 2.61, + "learning_rate": 4.616233901941648e-06, + "loss": 2.1067, + "step": 12835 + }, + { + "epoch": 2.61, + "learning_rate": 4.614778348559945e-06, + "loss": 2.0914, + "step": 12836 + }, + { + "epoch": 2.61, + "learning_rate": 4.613322955856569e-06, + "loss": 2.069, + "step": 12837 + }, + { + "epoch": 2.61, + "learning_rate": 4.611867723874954e-06, + "loss": 2.1574, + "step": 12838 + }, + { + "epoch": 2.61, + "learning_rate": 4.6104126526585136e-06, + "loss": 2.1349, + "step": 12839 + }, + { + "epoch": 2.61, + "learning_rate": 4.608957742250667e-06, + "loss": 2.1281, + "step": 12840 + }, + { + "epoch": 2.61, + "learning_rate": 4.607502992694818e-06, + "loss": 2.0976, + "step": 12841 + }, + { + "epoch": 2.61, + "learning_rate": 4.6060484040343746e-06, + "loss": 2.1202, + "step": 12842 + }, + { + "epoch": 2.61, + "learning_rate": 4.604593976312738e-06, + "loss": 2.1774, + "step": 12843 + }, + { + "epoch": 2.61, + "learning_rate": 4.603139709573307e-06, + "loss": 2.1894, + "step": 12844 + }, + { + "epoch": 2.61, + "learning_rate": 4.601685603859466e-06, + "loss": 2.0913, + "step": 12845 + }, + { + "epoch": 2.61, + "learning_rate": 4.600231659214604e-06, + "loss": 2.0792, + "step": 12846 + }, + { + "epoch": 2.61, + "learning_rate": 4.5987778756821015e-06, + "loss": 2.1699, + "step": 12847 + }, + { + "epoch": 2.61, + "learning_rate": 4.59732425330534e-06, + "loss": 2.1168, + "step": 12848 + }, + { + "epoch": 2.61, + "learning_rate": 4.595870792127682e-06, + "loss": 2.0746, + "step": 12849 + }, + { + "epoch": 2.61, + "learning_rate": 4.5944174921925e-06, + "loss": 2.1082, + "step": 12850 + }, + { + "epoch": 2.61, + "learning_rate": 4.592964353543158e-06, + "loss": 2.1435, + "step": 12851 + }, + { + "epoch": 2.61, + "learning_rate": 4.591511376223007e-06, + "loss": 2.1324, + "step": 12852 + }, + { + "epoch": 2.61, + "learning_rate": 4.5900585602754025e-06, + "loss": 2.0624, + "step": 12853 + }, + { + "epoch": 2.61, + "learning_rate": 4.588605905743691e-06, + "loss": 2.1216, + "step": 12854 + }, + { + "epoch": 2.61, + "learning_rate": 4.58715341267122e-06, + "loss": 2.0995, + "step": 12855 + }, + { + "epoch": 2.61, + "learning_rate": 4.58570108110132e-06, + "loss": 2.1697, + "step": 12856 + }, + { + "epoch": 2.61, + "learning_rate": 4.584248911077329e-06, + "loss": 2.1384, + "step": 12857 + }, + { + "epoch": 2.61, + "learning_rate": 4.582796902642573e-06, + "loss": 2.1581, + "step": 12858 + }, + { + "epoch": 2.61, + "learning_rate": 4.58134505584038e-06, + "loss": 2.1464, + "step": 12859 + }, + { + "epoch": 2.61, + "learning_rate": 4.579893370714062e-06, + "loss": 2.1616, + "step": 12860 + }, + { + "epoch": 2.61, + "learning_rate": 4.578441847306938e-06, + "loss": 2.155, + "step": 12861 + }, + { + "epoch": 2.61, + "learning_rate": 4.576990485662308e-06, + "loss": 2.3049, + "step": 12862 + }, + { + "epoch": 2.61, + "learning_rate": 4.57553928582349e-06, + "loss": 2.1078, + "step": 12863 + }, + { + "epoch": 2.61, + "learning_rate": 4.574088247833774e-06, + "loss": 2.0614, + "step": 12864 + }, + { + "epoch": 2.61, + "learning_rate": 4.572637371736458e-06, + "loss": 2.1437, + "step": 12865 + }, + { + "epoch": 2.61, + "learning_rate": 4.571186657574824e-06, + "loss": 2.1693, + "step": 12866 + }, + { + "epoch": 2.61, + "learning_rate": 4.569736105392171e-06, + "loss": 2.1437, + "step": 12867 + }, + { + "epoch": 2.61, + "learning_rate": 4.568285715231766e-06, + "loss": 2.1658, + "step": 12868 + }, + { + "epoch": 2.61, + "learning_rate": 4.566835487136894e-06, + "loss": 2.0935, + "step": 12869 + }, + { + "epoch": 2.61, + "learning_rate": 4.565385421150817e-06, + "loss": 2.0466, + "step": 12870 + }, + { + "epoch": 2.61, + "learning_rate": 4.5639355173168035e-06, + "loss": 2.0449, + "step": 12871 + }, + { + "epoch": 2.61, + "learning_rate": 4.562485775678114e-06, + "loss": 2.0929, + "step": 12872 + }, + { + "epoch": 2.61, + "learning_rate": 4.561036196278009e-06, + "loss": 2.194, + "step": 12873 + }, + { + "epoch": 2.61, + "learning_rate": 4.5595867791597326e-06, + "loss": 2.1814, + "step": 12874 + }, + { + "epoch": 2.61, + "learning_rate": 4.558137524366534e-06, + "loss": 2.1096, + "step": 12875 + }, + { + "epoch": 2.61, + "learning_rate": 4.5566884319416525e-06, + "loss": 2.0501, + "step": 12876 + }, + { + "epoch": 2.61, + "learning_rate": 4.555239501928327e-06, + "loss": 2.0147, + "step": 12877 + }, + { + "epoch": 2.61, + "learning_rate": 4.5537907343697925e-06, + "loss": 2.0416, + "step": 12878 + }, + { + "epoch": 2.61, + "learning_rate": 4.552342129309267e-06, + "loss": 2.0943, + "step": 12879 + }, + { + "epoch": 2.61, + "learning_rate": 4.550893686789982e-06, + "loss": 2.1507, + "step": 12880 + }, + { + "epoch": 2.61, + "learning_rate": 4.5494454068551405e-06, + "loss": 2.0505, + "step": 12881 + }, + { + "epoch": 2.61, + "learning_rate": 4.547997289547973e-06, + "loss": 2.1204, + "step": 12882 + }, + { + "epoch": 2.62, + "learning_rate": 4.546549334911672e-06, + "loss": 2.1163, + "step": 12883 + }, + { + "epoch": 2.62, + "learning_rate": 4.5451015429894495e-06, + "loss": 2.0631, + "step": 12884 + }, + { + "epoch": 2.62, + "learning_rate": 4.543653913824496e-06, + "loss": 2.059, + "step": 12885 + }, + { + "epoch": 2.62, + "learning_rate": 4.542206447460007e-06, + "loss": 2.0166, + "step": 12886 + }, + { + "epoch": 2.62, + "learning_rate": 4.540759143939171e-06, + "loss": 2.1396, + "step": 12887 + }, + { + "epoch": 2.62, + "learning_rate": 4.5393120033051745e-06, + "loss": 2.132, + "step": 12888 + }, + { + "epoch": 2.62, + "learning_rate": 4.537865025601188e-06, + "loss": 2.1994, + "step": 12889 + }, + { + "epoch": 2.62, + "learning_rate": 4.536418210870389e-06, + "loss": 2.1455, + "step": 12890 + }, + { + "epoch": 2.62, + "learning_rate": 4.534971559155946e-06, + "loss": 2.0449, + "step": 12891 + }, + { + "epoch": 2.62, + "learning_rate": 4.533525070501027e-06, + "loss": 2.0769, + "step": 12892 + }, + { + "epoch": 2.62, + "learning_rate": 4.532078744948781e-06, + "loss": 2.0918, + "step": 12893 + }, + { + "epoch": 2.62, + "learning_rate": 4.530632582542368e-06, + "loss": 2.0747, + "step": 12894 + }, + { + "epoch": 2.62, + "learning_rate": 4.5291865833249395e-06, + "loss": 2.0984, + "step": 12895 + }, + { + "epoch": 2.62, + "learning_rate": 4.527740747339632e-06, + "loss": 2.1476, + "step": 12896 + }, + { + "epoch": 2.62, + "learning_rate": 4.526295074629589e-06, + "loss": 2.1129, + "step": 12897 + }, + { + "epoch": 2.62, + "learning_rate": 4.5248495652379455e-06, + "loss": 2.1837, + "step": 12898 + }, + { + "epoch": 2.62, + "learning_rate": 4.523404219207834e-06, + "loss": 2.1212, + "step": 12899 + }, + { + "epoch": 2.62, + "learning_rate": 4.521959036582372e-06, + "loss": 2.0945, + "step": 12900 + }, + { + "epoch": 2.62, + "learning_rate": 4.520514017404682e-06, + "loss": 2.1539, + "step": 12901 + }, + { + "epoch": 2.62, + "learning_rate": 4.519069161717881e-06, + "loss": 2.0833, + "step": 12902 + }, + { + "epoch": 2.62, + "learning_rate": 4.5176244695650805e-06, + "loss": 2.1009, + "step": 12903 + }, + { + "epoch": 2.62, + "learning_rate": 4.516179940989379e-06, + "loss": 2.1022, + "step": 12904 + }, + { + "epoch": 2.62, + "learning_rate": 4.514735576033885e-06, + "loss": 2.1154, + "step": 12905 + }, + { + "epoch": 2.62, + "learning_rate": 4.513291374741681e-06, + "loss": 2.0355, + "step": 12906 + }, + { + "epoch": 2.62, + "learning_rate": 4.511847337155875e-06, + "loss": 2.0822, + "step": 12907 + }, + { + "epoch": 2.62, + "learning_rate": 4.5104034633195385e-06, + "loss": 2.1138, + "step": 12908 + }, + { + "epoch": 2.62, + "learning_rate": 4.508959753275761e-06, + "loss": 2.0748, + "step": 12909 + }, + { + "epoch": 2.62, + "learning_rate": 4.507516207067612e-06, + "loss": 2.0742, + "step": 12910 + }, + { + "epoch": 2.62, + "learning_rate": 4.506072824738164e-06, + "loss": 2.1018, + "step": 12911 + }, + { + "epoch": 2.62, + "learning_rate": 4.504629606330485e-06, + "loss": 2.0753, + "step": 12912 + }, + { + "epoch": 2.62, + "learning_rate": 4.503186551887639e-06, + "loss": 2.1664, + "step": 12913 + }, + { + "epoch": 2.62, + "learning_rate": 4.501743661452674e-06, + "loss": 2.1608, + "step": 12914 + }, + { + "epoch": 2.62, + "learning_rate": 4.500300935068647e-06, + "loss": 2.0938, + "step": 12915 + }, + { + "epoch": 2.62, + "learning_rate": 4.498858372778604e-06, + "loss": 2.2015, + "step": 12916 + }, + { + "epoch": 2.62, + "learning_rate": 4.497415974625589e-06, + "loss": 2.1994, + "step": 12917 + }, + { + "epoch": 2.62, + "learning_rate": 4.495973740652632e-06, + "loss": 2.1462, + "step": 12918 + }, + { + "epoch": 2.62, + "learning_rate": 4.4945316709027675e-06, + "loss": 2.092, + "step": 12919 + }, + { + "epoch": 2.62, + "learning_rate": 4.493089765419028e-06, + "loss": 2.0263, + "step": 12920 + }, + { + "epoch": 2.62, + "learning_rate": 4.491648024244428e-06, + "loss": 2.0727, + "step": 12921 + }, + { + "epoch": 2.62, + "learning_rate": 4.490206447421987e-06, + "loss": 2.1259, + "step": 12922 + }, + { + "epoch": 2.62, + "learning_rate": 4.4887650349947175e-06, + "loss": 2.1483, + "step": 12923 + }, + { + "epoch": 2.62, + "learning_rate": 4.4873237870056304e-06, + "loss": 2.0738, + "step": 12924 + }, + { + "epoch": 2.62, + "learning_rate": 4.485882703497722e-06, + "loss": 2.1834, + "step": 12925 + }, + { + "epoch": 2.62, + "learning_rate": 4.484441784513991e-06, + "loss": 2.0653, + "step": 12926 + }, + { + "epoch": 2.62, + "learning_rate": 4.483001030097432e-06, + "loss": 2.1332, + "step": 12927 + }, + { + "epoch": 2.62, + "learning_rate": 4.481560440291036e-06, + "loss": 2.1236, + "step": 12928 + }, + { + "epoch": 2.62, + "learning_rate": 4.4801200151377775e-06, + "loss": 2.0884, + "step": 12929 + }, + { + "epoch": 2.62, + "learning_rate": 4.478679754680639e-06, + "loss": 2.1627, + "step": 12930 + }, + { + "epoch": 2.62, + "learning_rate": 4.477239658962593e-06, + "loss": 2.1817, + "step": 12931 + }, + { + "epoch": 2.62, + "learning_rate": 4.4757997280266115e-06, + "loss": 2.062, + "step": 12932 + }, + { + "epoch": 2.63, + "learning_rate": 4.474359961915651e-06, + "loss": 2.0545, + "step": 12933 + }, + { + "epoch": 2.63, + "learning_rate": 4.4729203606726705e-06, + "loss": 2.1658, + "step": 12934 + }, + { + "epoch": 2.63, + "learning_rate": 4.4714809243406285e-06, + "loss": 2.1462, + "step": 12935 + }, + { + "epoch": 2.63, + "learning_rate": 4.470041652962467e-06, + "loss": 2.0169, + "step": 12936 + }, + { + "epoch": 2.63, + "learning_rate": 4.468602546581132e-06, + "loss": 2.1776, + "step": 12937 + }, + { + "epoch": 2.63, + "learning_rate": 4.4671636052395614e-06, + "loss": 2.1376, + "step": 12938 + }, + { + "epoch": 2.63, + "learning_rate": 4.465724828980694e-06, + "loss": 2.1345, + "step": 12939 + }, + { + "epoch": 2.63, + "learning_rate": 4.4642862178474514e-06, + "loss": 2.0506, + "step": 12940 + }, + { + "epoch": 2.63, + "learning_rate": 4.462847771882758e-06, + "loss": 2.1413, + "step": 12941 + }, + { + "epoch": 2.63, + "learning_rate": 4.461409491129536e-06, + "loss": 2.0935, + "step": 12942 + }, + { + "epoch": 2.63, + "learning_rate": 4.4599713756306996e-06, + "loss": 2.0505, + "step": 12943 + }, + { + "epoch": 2.63, + "learning_rate": 4.458533425429154e-06, + "loss": 2.0814, + "step": 12944 + }, + { + "epoch": 2.63, + "learning_rate": 4.457095640567806e-06, + "loss": 2.1384, + "step": 12945 + }, + { + "epoch": 2.63, + "learning_rate": 4.455658021089549e-06, + "loss": 2.1862, + "step": 12946 + }, + { + "epoch": 2.63, + "learning_rate": 4.4542205670372875e-06, + "loss": 2.088, + "step": 12947 + }, + { + "epoch": 2.63, + "learning_rate": 4.452783278453902e-06, + "loss": 2.155, + "step": 12948 + }, + { + "epoch": 2.63, + "learning_rate": 4.451346155382284e-06, + "loss": 2.085, + "step": 12949 + }, + { + "epoch": 2.63, + "learning_rate": 4.449909197865303e-06, + "loss": 2.1432, + "step": 12950 + }, + { + "epoch": 2.63, + "learning_rate": 4.44847240594584e-06, + "loss": 2.1757, + "step": 12951 + }, + { + "epoch": 2.63, + "learning_rate": 4.447035779666762e-06, + "loss": 2.1553, + "step": 12952 + }, + { + "epoch": 2.63, + "learning_rate": 4.44559931907094e-06, + "loss": 2.1011, + "step": 12953 + }, + { + "epoch": 2.63, + "learning_rate": 4.444163024201223e-06, + "loss": 2.1256, + "step": 12954 + }, + { + "epoch": 2.63, + "learning_rate": 4.442726895100472e-06, + "loss": 2.015, + "step": 12955 + }, + { + "epoch": 2.63, + "learning_rate": 4.441290931811535e-06, + "loss": 2.1545, + "step": 12956 + }, + { + "epoch": 2.63, + "learning_rate": 4.439855134377261e-06, + "loss": 2.0833, + "step": 12957 + }, + { + "epoch": 2.63, + "learning_rate": 4.438419502840482e-06, + "loss": 2.127, + "step": 12958 + }, + { + "epoch": 2.63, + "learning_rate": 4.4369840372440374e-06, + "loss": 2.1169, + "step": 12959 + }, + { + "epoch": 2.63, + "learning_rate": 4.435548737630756e-06, + "loss": 2.1624, + "step": 12960 + }, + { + "epoch": 2.63, + "learning_rate": 4.434113604043467e-06, + "loss": 2.0806, + "step": 12961 + }, + { + "epoch": 2.63, + "learning_rate": 4.432678636524982e-06, + "loss": 2.1019, + "step": 12962 + }, + { + "epoch": 2.63, + "learning_rate": 4.4312438351181195e-06, + "loss": 2.1352, + "step": 12963 + }, + { + "epoch": 2.63, + "learning_rate": 4.429809199865696e-06, + "loss": 2.0663, + "step": 12964 + }, + { + "epoch": 2.63, + "learning_rate": 4.428374730810507e-06, + "loss": 2.1412, + "step": 12965 + }, + { + "epoch": 2.63, + "learning_rate": 4.426940427995355e-06, + "loss": 2.1296, + "step": 12966 + }, + { + "epoch": 2.63, + "learning_rate": 4.4255062914630385e-06, + "loss": 2.1133, + "step": 12967 + }, + { + "epoch": 2.63, + "learning_rate": 4.424072321256348e-06, + "loss": 2.1356, + "step": 12968 + }, + { + "epoch": 2.63, + "learning_rate": 4.4226385174180655e-06, + "loss": 2.1656, + "step": 12969 + }, + { + "epoch": 2.63, + "learning_rate": 4.42120487999097e-06, + "loss": 2.0976, + "step": 12970 + }, + { + "epoch": 2.63, + "learning_rate": 4.41977140901784e-06, + "loss": 2.1012, + "step": 12971 + }, + { + "epoch": 2.63, + "learning_rate": 4.4183381045414496e-06, + "loss": 2.0803, + "step": 12972 + }, + { + "epoch": 2.63, + "learning_rate": 4.416904966604555e-06, + "loss": 2.1364, + "step": 12973 + }, + { + "epoch": 2.63, + "learning_rate": 4.415471995249924e-06, + "loss": 2.0827, + "step": 12974 + }, + { + "epoch": 2.63, + "learning_rate": 4.414039190520304e-06, + "loss": 2.1343, + "step": 12975 + }, + { + "epoch": 2.63, + "learning_rate": 4.412606552458457e-06, + "loss": 2.0825, + "step": 12976 + }, + { + "epoch": 2.63, + "learning_rate": 4.411174081107119e-06, + "loss": 2.1182, + "step": 12977 + }, + { + "epoch": 2.63, + "learning_rate": 4.409741776509033e-06, + "loss": 2.0679, + "step": 12978 + }, + { + "epoch": 2.63, + "learning_rate": 4.408309638706939e-06, + "loss": 2.1505, + "step": 12979 + }, + { + "epoch": 2.63, + "learning_rate": 4.406877667743561e-06, + "loss": 2.14, + "step": 12980 + }, + { + "epoch": 2.63, + "learning_rate": 4.405445863661627e-06, + "loss": 2.1032, + "step": 12981 + }, + { + "epoch": 2.64, + "learning_rate": 4.404014226503858e-06, + "loss": 2.1585, + "step": 12982 + }, + { + "epoch": 2.64, + "learning_rate": 4.402582756312972e-06, + "loss": 2.1525, + "step": 12983 + }, + { + "epoch": 2.64, + "learning_rate": 4.401151453131675e-06, + "loss": 2.1266, + "step": 12984 + }, + { + "epoch": 2.64, + "learning_rate": 4.3997203170026736e-06, + "loss": 2.1339, + "step": 12985 + }, + { + "epoch": 2.64, + "learning_rate": 4.398289347968671e-06, + "loss": 2.1337, + "step": 12986 + }, + { + "epoch": 2.64, + "learning_rate": 4.396858546072364e-06, + "loss": 2.0963, + "step": 12987 + }, + { + "epoch": 2.64, + "learning_rate": 4.395427911356439e-06, + "loss": 2.1564, + "step": 12988 + }, + { + "epoch": 2.64, + "learning_rate": 4.393997443863586e-06, + "loss": 2.0219, + "step": 12989 + }, + { + "epoch": 2.64, + "learning_rate": 4.392567143636477e-06, + "loss": 2.109, + "step": 12990 + }, + { + "epoch": 2.64, + "learning_rate": 4.3911370107178e-06, + "loss": 2.0383, + "step": 12991 + }, + { + "epoch": 2.64, + "learning_rate": 4.389707045150217e-06, + "loss": 2.1361, + "step": 12992 + }, + { + "epoch": 2.64, + "learning_rate": 4.3882772469764e-06, + "loss": 2.2435, + "step": 12993 + }, + { + "epoch": 2.64, + "learning_rate": 4.386847616239003e-06, + "loss": 2.1626, + "step": 12994 + }, + { + "epoch": 2.64, + "learning_rate": 4.385418152980685e-06, + "loss": 2.1663, + "step": 12995 + }, + { + "epoch": 2.64, + "learning_rate": 4.383988857244096e-06, + "loss": 2.0787, + "step": 12996 + }, + { + "epoch": 2.64, + "learning_rate": 4.382559729071887e-06, + "loss": 2.1065, + "step": 12997 + }, + { + "epoch": 2.64, + "learning_rate": 4.381130768506691e-06, + "loss": 2.0395, + "step": 12998 + }, + { + "epoch": 2.64, + "learning_rate": 4.379701975591147e-06, + "loss": 2.1029, + "step": 12999 + }, + { + "epoch": 2.64, + "learning_rate": 4.378273350367884e-06, + "loss": 2.1134, + "step": 13000 + }, + { + "epoch": 2.64, + "learning_rate": 4.376844892879535e-06, + "loss": 2.1372, + "step": 13001 + }, + { + "epoch": 2.64, + "learning_rate": 4.375416603168712e-06, + "loss": 2.0088, + "step": 13002 + }, + { + "epoch": 2.64, + "learning_rate": 4.3739884812780335e-06, + "loss": 2.1378, + "step": 13003 + }, + { + "epoch": 2.64, + "learning_rate": 4.3725605272501135e-06, + "loss": 2.1175, + "step": 13004 + }, + { + "epoch": 2.64, + "learning_rate": 4.371132741127553e-06, + "loss": 2.1261, + "step": 13005 + }, + { + "epoch": 2.64, + "learning_rate": 4.369705122952953e-06, + "loss": 2.1285, + "step": 13006 + }, + { + "epoch": 2.64, + "learning_rate": 4.3682776727689124e-06, + "loss": 2.0308, + "step": 13007 + }, + { + "epoch": 2.64, + "learning_rate": 4.3668503906180235e-06, + "loss": 2.0221, + "step": 13008 + }, + { + "epoch": 2.64, + "learning_rate": 4.365423276542865e-06, + "loss": 2.0907, + "step": 13009 + }, + { + "epoch": 2.64, + "learning_rate": 4.363996330586022e-06, + "loss": 2.1014, + "step": 13010 + }, + { + "epoch": 2.64, + "learning_rate": 4.36256955279007e-06, + "loss": 2.069, + "step": 13011 + }, + { + "epoch": 2.64, + "learning_rate": 4.3611429431975815e-06, + "loss": 2.1114, + "step": 13012 + }, + { + "epoch": 2.64, + "learning_rate": 4.359716501851118e-06, + "loss": 2.1344, + "step": 13013 + }, + { + "epoch": 2.64, + "learning_rate": 4.358290228793245e-06, + "loss": 2.1677, + "step": 13014 + }, + { + "epoch": 2.64, + "learning_rate": 4.356864124066507e-06, + "loss": 2.0434, + "step": 13015 + }, + { + "epoch": 2.64, + "learning_rate": 4.35543818771347e-06, + "loss": 2.0843, + "step": 13016 + }, + { + "epoch": 2.64, + "learning_rate": 4.354012419776669e-06, + "loss": 2.0383, + "step": 13017 + }, + { + "epoch": 2.64, + "learning_rate": 4.352586820298651e-06, + "loss": 2.1253, + "step": 13018 + }, + { + "epoch": 2.64, + "learning_rate": 4.351161389321944e-06, + "loss": 2.1804, + "step": 13019 + }, + { + "epoch": 2.64, + "learning_rate": 4.349736126889084e-06, + "loss": 2.1218, + "step": 13020 + }, + { + "epoch": 2.64, + "learning_rate": 4.348311033042594e-06, + "loss": 2.1946, + "step": 13021 + }, + { + "epoch": 2.64, + "learning_rate": 4.346886107824998e-06, + "loss": 2.095, + "step": 13022 + }, + { + "epoch": 2.64, + "learning_rate": 4.345461351278805e-06, + "loss": 2.1078, + "step": 13023 + }, + { + "epoch": 2.64, + "learning_rate": 4.344036763446531e-06, + "loss": 2.1182, + "step": 13024 + }, + { + "epoch": 2.64, + "learning_rate": 4.342612344370678e-06, + "loss": 2.1966, + "step": 13025 + }, + { + "epoch": 2.64, + "learning_rate": 4.3411880940937516e-06, + "loss": 2.1115, + "step": 13026 + }, + { + "epoch": 2.64, + "learning_rate": 4.339764012658239e-06, + "loss": 2.0936, + "step": 13027 + }, + { + "epoch": 2.64, + "learning_rate": 4.338340100106635e-06, + "loss": 2.0903, + "step": 13028 + }, + { + "epoch": 2.64, + "learning_rate": 4.336916356481427e-06, + "loss": 2.1045, + "step": 13029 + }, + { + "epoch": 2.64, + "learning_rate": 4.335492781825085e-06, + "loss": 2.1331, + "step": 13030 + }, + { + "epoch": 2.65, + "learning_rate": 4.3340693761800974e-06, + "loss": 2.1792, + "step": 13031 + }, + { + "epoch": 2.65, + "learning_rate": 4.332646139588926e-06, + "loss": 2.153, + "step": 13032 + }, + { + "epoch": 2.65, + "learning_rate": 4.331223072094041e-06, + "loss": 2.1118, + "step": 13033 + }, + { + "epoch": 2.65, + "learning_rate": 4.329800173737896e-06, + "loss": 2.2115, + "step": 13034 + }, + { + "epoch": 2.65, + "learning_rate": 4.328377444562948e-06, + "loss": 2.1389, + "step": 13035 + }, + { + "epoch": 2.65, + "learning_rate": 4.326954884611648e-06, + "loss": 2.1624, + "step": 13036 + }, + { + "epoch": 2.65, + "learning_rate": 4.325532493926445e-06, + "loss": 2.1776, + "step": 13037 + }, + { + "epoch": 2.65, + "learning_rate": 4.324110272549771e-06, + "loss": 2.1292, + "step": 13038 + }, + { + "epoch": 2.65, + "learning_rate": 4.322688220524063e-06, + "loss": 2.049, + "step": 13039 + }, + { + "epoch": 2.65, + "learning_rate": 4.321266337891752e-06, + "loss": 2.0923, + "step": 13040 + }, + { + "epoch": 2.65, + "learning_rate": 4.3198446246952665e-06, + "loss": 2.1153, + "step": 13041 + }, + { + "epoch": 2.65, + "learning_rate": 4.318423080977017e-06, + "loss": 2.0833, + "step": 13042 + }, + { + "epoch": 2.65, + "learning_rate": 4.317001706779423e-06, + "loss": 2.0872, + "step": 13043 + }, + { + "epoch": 2.65, + "learning_rate": 4.315580502144897e-06, + "loss": 2.1276, + "step": 13044 + }, + { + "epoch": 2.65, + "learning_rate": 4.3141594671158374e-06, + "loss": 2.1187, + "step": 13045 + }, + { + "epoch": 2.65, + "learning_rate": 4.312738601734645e-06, + "loss": 2.0678, + "step": 13046 + }, + { + "epoch": 2.65, + "learning_rate": 4.311317906043716e-06, + "loss": 2.135, + "step": 13047 + }, + { + "epoch": 2.65, + "learning_rate": 4.309897380085443e-06, + "loss": 2.1719, + "step": 13048 + }, + { + "epoch": 2.65, + "learning_rate": 4.3084770239022e-06, + "loss": 2.1707, + "step": 13049 + }, + { + "epoch": 2.65, + "learning_rate": 4.307056837536373e-06, + "loss": 2.1685, + "step": 13050 + }, + { + "epoch": 2.65, + "learning_rate": 4.305636821030334e-06, + "loss": 2.0281, + "step": 13051 + }, + { + "epoch": 2.65, + "learning_rate": 4.3042169744264576e-06, + "loss": 2.0968, + "step": 13052 + }, + { + "epoch": 2.65, + "learning_rate": 4.302797297767097e-06, + "loss": 2.0803, + "step": 13053 + }, + { + "epoch": 2.65, + "learning_rate": 4.301377791094618e-06, + "loss": 2.0515, + "step": 13054 + }, + { + "epoch": 2.65, + "learning_rate": 4.299958454451373e-06, + "loss": 2.069, + "step": 13055 + }, + { + "epoch": 2.65, + "learning_rate": 4.298539287879713e-06, + "loss": 2.0197, + "step": 13056 + }, + { + "epoch": 2.65, + "learning_rate": 4.2971202914219765e-06, + "loss": 2.1685, + "step": 13057 + }, + { + "epoch": 2.65, + "learning_rate": 4.295701465120507e-06, + "loss": 2.0626, + "step": 13058 + }, + { + "epoch": 2.65, + "learning_rate": 4.294282809017629e-06, + "loss": 2.034, + "step": 13059 + }, + { + "epoch": 2.65, + "learning_rate": 4.292864323155684e-06, + "loss": 2.0831, + "step": 13060 + }, + { + "epoch": 2.65, + "learning_rate": 4.291446007576985e-06, + "loss": 2.062, + "step": 13061 + }, + { + "epoch": 2.65, + "learning_rate": 4.290027862323858e-06, + "loss": 2.1524, + "step": 13062 + }, + { + "epoch": 2.65, + "learning_rate": 4.288609887438606e-06, + "loss": 2.1366, + "step": 13063 + }, + { + "epoch": 2.65, + "learning_rate": 4.287192082963544e-06, + "loss": 2.1126, + "step": 13064 + }, + { + "epoch": 2.65, + "learning_rate": 4.2857744489409725e-06, + "loss": 2.1094, + "step": 13065 + }, + { + "epoch": 2.65, + "learning_rate": 4.284356985413195e-06, + "loss": 2.0853, + "step": 13066 + }, + { + "epoch": 2.65, + "learning_rate": 4.2829396924224955e-06, + "loss": 2.1221, + "step": 13067 + }, + { + "epoch": 2.65, + "learning_rate": 4.2815225700111655e-06, + "loss": 2.0968, + "step": 13068 + }, + { + "epoch": 2.65, + "learning_rate": 4.280105618221487e-06, + "loss": 2.0713, + "step": 13069 + }, + { + "epoch": 2.65, + "learning_rate": 4.278688837095742e-06, + "loss": 2.0468, + "step": 13070 + }, + { + "epoch": 2.65, + "learning_rate": 4.277272226676197e-06, + "loss": 2.0499, + "step": 13071 + }, + { + "epoch": 2.65, + "learning_rate": 4.2758557870051184e-06, + "loss": 2.1688, + "step": 13072 + }, + { + "epoch": 2.65, + "learning_rate": 4.274439518124777e-06, + "loss": 2.0899, + "step": 13073 + }, + { + "epoch": 2.65, + "learning_rate": 4.27302342007742e-06, + "loss": 2.0684, + "step": 13074 + }, + { + "epoch": 2.65, + "learning_rate": 4.271607492905303e-06, + "loss": 2.0846, + "step": 13075 + }, + { + "epoch": 2.65, + "learning_rate": 4.270191736650674e-06, + "loss": 2.0797, + "step": 13076 + }, + { + "epoch": 2.65, + "learning_rate": 4.268776151355777e-06, + "loss": 2.1113, + "step": 13077 + }, + { + "epoch": 2.65, + "learning_rate": 4.267360737062843e-06, + "loss": 2.088, + "step": 13078 + }, + { + "epoch": 2.65, + "learning_rate": 4.265945493814105e-06, + "loss": 2.11, + "step": 13079 + }, + { + "epoch": 2.66, + "learning_rate": 4.264530421651792e-06, + "loss": 2.2011, + "step": 13080 + }, + { + "epoch": 2.66, + "learning_rate": 4.2631155206181285e-06, + "loss": 2.0834, + "step": 13081 + }, + { + "epoch": 2.66, + "learning_rate": 4.261700790755322e-06, + "loss": 2.2328, + "step": 13082 + }, + { + "epoch": 2.66, + "learning_rate": 4.260286232105588e-06, + "loss": 2.0275, + "step": 13083 + }, + { + "epoch": 2.66, + "learning_rate": 4.258871844711133e-06, + "loss": 2.1185, + "step": 13084 + }, + { + "epoch": 2.66, + "learning_rate": 4.257457628614161e-06, + "loss": 2.1627, + "step": 13085 + }, + { + "epoch": 2.66, + "learning_rate": 4.256043583856862e-06, + "loss": 2.1026, + "step": 13086 + }, + { + "epoch": 2.66, + "learning_rate": 4.254629710481427e-06, + "loss": 2.0387, + "step": 13087 + }, + { + "epoch": 2.66, + "learning_rate": 4.2532160085300485e-06, + "loss": 2.1173, + "step": 13088 + }, + { + "epoch": 2.66, + "learning_rate": 4.2518024780449e-06, + "loss": 2.0624, + "step": 13089 + }, + { + "epoch": 2.66, + "learning_rate": 4.2503891190681565e-06, + "loss": 2.0332, + "step": 13090 + }, + { + "epoch": 2.66, + "learning_rate": 4.248975931641993e-06, + "loss": 2.1401, + "step": 13091 + }, + { + "epoch": 2.66, + "learning_rate": 4.247562915808575e-06, + "loss": 2.0945, + "step": 13092 + }, + { + "epoch": 2.66, + "learning_rate": 4.246150071610058e-06, + "loss": 2.1481, + "step": 13093 + }, + { + "epoch": 2.66, + "learning_rate": 4.244737399088598e-06, + "loss": 2.0805, + "step": 13094 + }, + { + "epoch": 2.66, + "learning_rate": 4.243324898286345e-06, + "loss": 2.1748, + "step": 13095 + }, + { + "epoch": 2.66, + "learning_rate": 4.2419125692454474e-06, + "loss": 2.1949, + "step": 13096 + }, + { + "epoch": 2.66, + "learning_rate": 4.24050041200804e-06, + "loss": 2.0919, + "step": 13097 + }, + { + "epoch": 2.66, + "learning_rate": 4.239088426616261e-06, + "loss": 2.0913, + "step": 13098 + }, + { + "epoch": 2.66, + "learning_rate": 4.237676613112232e-06, + "loss": 2.1534, + "step": 13099 + }, + { + "epoch": 2.66, + "learning_rate": 4.236264971538089e-06, + "loss": 2.0946, + "step": 13100 + }, + { + "epoch": 2.66, + "learning_rate": 4.234853501935941e-06, + "loss": 2.0693, + "step": 13101 + }, + { + "epoch": 2.66, + "learning_rate": 4.233442204347909e-06, + "loss": 2.0986, + "step": 13102 + }, + { + "epoch": 2.66, + "learning_rate": 4.232031078816095e-06, + "loss": 2.1841, + "step": 13103 + }, + { + "epoch": 2.66, + "learning_rate": 4.2306201253826055e-06, + "loss": 2.1467, + "step": 13104 + }, + { + "epoch": 2.66, + "learning_rate": 4.22920934408954e-06, + "loss": 2.0802, + "step": 13105 + }, + { + "epoch": 2.66, + "learning_rate": 4.227798734978994e-06, + "loss": 2.1135, + "step": 13106 + }, + { + "epoch": 2.66, + "learning_rate": 4.22638829809305e-06, + "loss": 2.1211, + "step": 13107 + }, + { + "epoch": 2.66, + "learning_rate": 4.224978033473794e-06, + "loss": 2.1154, + "step": 13108 + }, + { + "epoch": 2.66, + "learning_rate": 4.223567941163305e-06, + "loss": 2.1068, + "step": 13109 + }, + { + "epoch": 2.66, + "learning_rate": 4.222158021203657e-06, + "loss": 2.129, + "step": 13110 + }, + { + "epoch": 2.66, + "learning_rate": 4.220748273636912e-06, + "loss": 2.0653, + "step": 13111 + }, + { + "epoch": 2.66, + "learning_rate": 4.2193386985051365e-06, + "loss": 2.0848, + "step": 13112 + }, + { + "epoch": 2.66, + "learning_rate": 4.2179292958503905e-06, + "loss": 2.1386, + "step": 13113 + }, + { + "epoch": 2.66, + "learning_rate": 4.216520065714718e-06, + "loss": 2.1097, + "step": 13114 + }, + { + "epoch": 2.66, + "learning_rate": 4.215111008140174e-06, + "loss": 2.1753, + "step": 13115 + }, + { + "epoch": 2.66, + "learning_rate": 4.213702123168796e-06, + "loss": 2.0649, + "step": 13116 + }, + { + "epoch": 2.66, + "learning_rate": 4.212293410842625e-06, + "loss": 2.1648, + "step": 13117 + }, + { + "epoch": 2.66, + "learning_rate": 4.2108848712036865e-06, + "loss": 2.132, + "step": 13118 + }, + { + "epoch": 2.66, + "learning_rate": 4.2094765042940115e-06, + "loss": 2.0555, + "step": 13119 + }, + { + "epoch": 2.66, + "learning_rate": 4.208068310155619e-06, + "loss": 2.0836, + "step": 13120 + }, + { + "epoch": 2.66, + "learning_rate": 4.20666028883053e-06, + "loss": 2.0696, + "step": 13121 + }, + { + "epoch": 2.66, + "learning_rate": 4.2052524403607476e-06, + "loss": 2.0695, + "step": 13122 + }, + { + "epoch": 2.66, + "learning_rate": 4.203844764788287e-06, + "loss": 2.0932, + "step": 13123 + }, + { + "epoch": 2.66, + "learning_rate": 4.2024372621551346e-06, + "loss": 2.089, + "step": 13124 + }, + { + "epoch": 2.66, + "learning_rate": 4.201029932503303e-06, + "loss": 2.1008, + "step": 13125 + }, + { + "epoch": 2.66, + "learning_rate": 4.199622775874771e-06, + "loss": 2.1324, + "step": 13126 + }, + { + "epoch": 2.66, + "learning_rate": 4.198215792311531e-06, + "loss": 2.1033, + "step": 13127 + }, + { + "epoch": 2.66, + "learning_rate": 4.1968089818555554e-06, + "loss": 2.076, + "step": 13128 + }, + { + "epoch": 2.66, + "learning_rate": 4.195402344548824e-06, + "loss": 2.0625, + "step": 13129 + }, + { + "epoch": 2.67, + "learning_rate": 4.193995880433303e-06, + "loss": 2.1546, + "step": 13130 + }, + { + "epoch": 2.67, + "learning_rate": 4.192589589550962e-06, + "loss": 2.1163, + "step": 13131 + }, + { + "epoch": 2.67, + "learning_rate": 4.19118347194376e-06, + "loss": 2.066, + "step": 13132 + }, + { + "epoch": 2.67, + "learning_rate": 4.1897775276536464e-06, + "loss": 2.0703, + "step": 13133 + }, + { + "epoch": 2.67, + "learning_rate": 4.1883717567225725e-06, + "loss": 2.0327, + "step": 13134 + }, + { + "epoch": 2.67, + "learning_rate": 4.186966159192481e-06, + "loss": 2.0877, + "step": 13135 + }, + { + "epoch": 2.67, + "learning_rate": 4.185560735105318e-06, + "loss": 2.0766, + "step": 13136 + }, + { + "epoch": 2.67, + "learning_rate": 4.184155484503007e-06, + "loss": 2.1609, + "step": 13137 + }, + { + "epoch": 2.67, + "learning_rate": 4.182750407427479e-06, + "loss": 2.0818, + "step": 13138 + }, + { + "epoch": 2.67, + "learning_rate": 4.181345503920659e-06, + "loss": 2.1227, + "step": 13139 + }, + { + "epoch": 2.67, + "learning_rate": 4.179940774024469e-06, + "loss": 2.0143, + "step": 13140 + }, + { + "epoch": 2.67, + "learning_rate": 4.178536217780813e-06, + "loss": 2.143, + "step": 13141 + }, + { + "epoch": 2.67, + "learning_rate": 4.177131835231607e-06, + "loss": 2.1168, + "step": 13142 + }, + { + "epoch": 2.67, + "learning_rate": 4.175727626418741e-06, + "loss": 2.1618, + "step": 13143 + }, + { + "epoch": 2.67, + "learning_rate": 4.174323591384128e-06, + "loss": 2.0886, + "step": 13144 + }, + { + "epoch": 2.67, + "learning_rate": 4.17291973016965e-06, + "loss": 2.1091, + "step": 13145 + }, + { + "epoch": 2.67, + "learning_rate": 4.1715160428172e-06, + "loss": 2.1975, + "step": 13146 + }, + { + "epoch": 2.67, + "learning_rate": 4.170112529368652e-06, + "loss": 2.1581, + "step": 13147 + }, + { + "epoch": 2.67, + "learning_rate": 4.168709189865886e-06, + "loss": 2.1307, + "step": 13148 + }, + { + "epoch": 2.67, + "learning_rate": 4.1673060243507755e-06, + "loss": 2.1989, + "step": 13149 + }, + { + "epoch": 2.67, + "learning_rate": 4.165903032865188e-06, + "loss": 2.0675, + "step": 13150 + }, + { + "epoch": 2.67, + "learning_rate": 4.1645002154509774e-06, + "loss": 2.0206, + "step": 13151 + }, + { + "epoch": 2.67, + "learning_rate": 4.1630975721500045e-06, + "loss": 2.1688, + "step": 13152 + }, + { + "epoch": 2.67, + "learning_rate": 4.1616951030041175e-06, + "loss": 2.1288, + "step": 13153 + }, + { + "epoch": 2.67, + "learning_rate": 4.160292808055167e-06, + "loss": 2.1388, + "step": 13154 + }, + { + "epoch": 2.67, + "learning_rate": 4.158890687344986e-06, + "loss": 2.1056, + "step": 13155 + }, + { + "epoch": 2.67, + "learning_rate": 4.157488740915413e-06, + "loss": 2.0981, + "step": 13156 + }, + { + "epoch": 2.67, + "learning_rate": 4.15608696880828e-06, + "loss": 2.162, + "step": 13157 + }, + { + "epoch": 2.67, + "learning_rate": 4.154685371065407e-06, + "loss": 2.1148, + "step": 13158 + }, + { + "epoch": 2.67, + "learning_rate": 4.153283947728614e-06, + "loss": 2.0723, + "step": 13159 + }, + { + "epoch": 2.67, + "learning_rate": 4.151882698839718e-06, + "loss": 2.1761, + "step": 13160 + }, + { + "epoch": 2.67, + "learning_rate": 4.150481624440528e-06, + "loss": 2.1555, + "step": 13161 + }, + { + "epoch": 2.67, + "learning_rate": 4.149080724572842e-06, + "loss": 2.0429, + "step": 13162 + }, + { + "epoch": 2.67, + "learning_rate": 4.147679999278465e-06, + "loss": 2.0546, + "step": 13163 + }, + { + "epoch": 2.67, + "learning_rate": 4.1462794485991864e-06, + "loss": 2.147, + "step": 13164 + }, + { + "epoch": 2.67, + "learning_rate": 4.144879072576799e-06, + "loss": 2.1381, + "step": 13165 + }, + { + "epoch": 2.67, + "learning_rate": 4.143478871253079e-06, + "loss": 2.1512, + "step": 13166 + }, + { + "epoch": 2.67, + "learning_rate": 4.142078844669812e-06, + "loss": 2.1648, + "step": 13167 + }, + { + "epoch": 2.67, + "learning_rate": 4.1406789928687585e-06, + "loss": 2.1173, + "step": 13168 + }, + { + "epoch": 2.67, + "learning_rate": 4.1392793158917e-06, + "loss": 2.0982, + "step": 13169 + }, + { + "epoch": 2.67, + "learning_rate": 4.137879813780388e-06, + "loss": 2.1254, + "step": 13170 + }, + { + "epoch": 2.67, + "learning_rate": 4.136480486576587e-06, + "loss": 2.1254, + "step": 13171 + }, + { + "epoch": 2.67, + "learning_rate": 4.135081334322041e-06, + "loss": 2.0805, + "step": 13172 + }, + { + "epoch": 2.67, + "learning_rate": 4.1336823570585e-06, + "loss": 2.052, + "step": 13173 + }, + { + "epoch": 2.67, + "learning_rate": 4.132283554827704e-06, + "loss": 2.0668, + "step": 13174 + }, + { + "epoch": 2.67, + "learning_rate": 4.130884927671396e-06, + "loss": 2.0621, + "step": 13175 + }, + { + "epoch": 2.67, + "learning_rate": 4.129486475631295e-06, + "loss": 2.1568, + "step": 13176 + }, + { + "epoch": 2.67, + "learning_rate": 4.128088198749134e-06, + "loss": 2.0378, + "step": 13177 + }, + { + "epoch": 2.67, + "learning_rate": 4.12669009706663e-06, + "loss": 2.1568, + "step": 13178 + }, + { + "epoch": 2.68, + "learning_rate": 4.125292170625504e-06, + "loss": 2.1825, + "step": 13179 + }, + { + "epoch": 2.68, + "learning_rate": 4.123894419467457e-06, + "loss": 2.0781, + "step": 13180 + }, + { + "epoch": 2.68, + "learning_rate": 4.1224968436341986e-06, + "loss": 2.2367, + "step": 13181 + }, + { + "epoch": 2.68, + "learning_rate": 4.121099443167431e-06, + "loss": 2.0965, + "step": 13182 + }, + { + "epoch": 2.68, + "learning_rate": 4.119702218108838e-06, + "loss": 2.118, + "step": 13183 + }, + { + "epoch": 2.68, + "learning_rate": 4.1183051685001206e-06, + "loss": 2.1087, + "step": 13184 + }, + { + "epoch": 2.68, + "learning_rate": 4.116908294382956e-06, + "loss": 2.1898, + "step": 13185 + }, + { + "epoch": 2.68, + "learning_rate": 4.1155115957990244e-06, + "loss": 2.0298, + "step": 13186 + }, + { + "epoch": 2.68, + "learning_rate": 4.114115072789996e-06, + "loss": 2.0971, + "step": 13187 + }, + { + "epoch": 2.68, + "learning_rate": 4.112718725397541e-06, + "loss": 2.1721, + "step": 13188 + }, + { + "epoch": 2.68, + "learning_rate": 4.1113225536633216e-06, + "loss": 2.0524, + "step": 13189 + }, + { + "epoch": 2.68, + "learning_rate": 4.109926557628999e-06, + "loss": 2.0507, + "step": 13190 + }, + { + "epoch": 2.68, + "learning_rate": 4.108530737336219e-06, + "loss": 2.082, + "step": 13191 + }, + { + "epoch": 2.68, + "learning_rate": 4.107135092826632e-06, + "loss": 2.0948, + "step": 13192 + }, + { + "epoch": 2.68, + "learning_rate": 4.105739624141878e-06, + "loss": 2.0497, + "step": 13193 + }, + { + "epoch": 2.68, + "learning_rate": 4.1043443313235986e-06, + "loss": 2.0585, + "step": 13194 + }, + { + "epoch": 2.68, + "learning_rate": 4.102949214413418e-06, + "loss": 2.0685, + "step": 13195 + }, + { + "epoch": 2.68, + "learning_rate": 4.1015542734529656e-06, + "loss": 2.0874, + "step": 13196 + }, + { + "epoch": 2.68, + "learning_rate": 4.100159508483865e-06, + "loss": 2.1475, + "step": 13197 + }, + { + "epoch": 2.68, + "learning_rate": 4.098764919547725e-06, + "loss": 2.0914, + "step": 13198 + }, + { + "epoch": 2.68, + "learning_rate": 4.097370506686159e-06, + "loss": 2.0792, + "step": 13199 + }, + { + "epoch": 2.68, + "learning_rate": 4.095976269940774e-06, + "loss": 2.0836, + "step": 13200 + }, + { + "epoch": 2.68, + "learning_rate": 4.094582209353169e-06, + "loss": 2.1724, + "step": 13201 + }, + { + "epoch": 2.68, + "learning_rate": 4.093188324964935e-06, + "loss": 2.0495, + "step": 13202 + }, + { + "epoch": 2.68, + "learning_rate": 4.091794616817663e-06, + "loss": 2.1386, + "step": 13203 + }, + { + "epoch": 2.68, + "learning_rate": 4.09040108495294e-06, + "loss": 2.127, + "step": 13204 + }, + { + "epoch": 2.68, + "learning_rate": 4.089007729412343e-06, + "loss": 2.1365, + "step": 13205 + }, + { + "epoch": 2.68, + "learning_rate": 4.087614550237442e-06, + "loss": 2.1275, + "step": 13206 + }, + { + "epoch": 2.68, + "learning_rate": 4.086221547469812e-06, + "loss": 2.1509, + "step": 13207 + }, + { + "epoch": 2.68, + "learning_rate": 4.084828721151004e-06, + "loss": 2.0612, + "step": 13208 + }, + { + "epoch": 2.68, + "learning_rate": 4.08343607132259e-06, + "loss": 2.1369, + "step": 13209 + }, + { + "epoch": 2.68, + "learning_rate": 4.082043598026113e-06, + "loss": 2.124, + "step": 13210 + }, + { + "epoch": 2.68, + "learning_rate": 4.080651301303126e-06, + "loss": 2.1571, + "step": 13211 + }, + { + "epoch": 2.68, + "learning_rate": 4.079259181195163e-06, + "loss": 2.1018, + "step": 13212 + }, + { + "epoch": 2.68, + "learning_rate": 4.077867237743767e-06, + "loss": 2.1037, + "step": 13213 + }, + { + "epoch": 2.68, + "learning_rate": 4.076475470990465e-06, + "loss": 2.1069, + "step": 13214 + }, + { + "epoch": 2.68, + "learning_rate": 4.075083880976791e-06, + "loss": 2.2019, + "step": 13215 + }, + { + "epoch": 2.68, + "learning_rate": 4.073692467744256e-06, + "loss": 2.1249, + "step": 13216 + }, + { + "epoch": 2.68, + "learning_rate": 4.072301231334379e-06, + "loss": 2.2068, + "step": 13217 + }, + { + "epoch": 2.68, + "learning_rate": 4.070910171788672e-06, + "loss": 2.0844, + "step": 13218 + }, + { + "epoch": 2.68, + "learning_rate": 4.069519289148641e-06, + "loss": 2.1501, + "step": 13219 + }, + { + "epoch": 2.68, + "learning_rate": 4.06812858345578e-06, + "loss": 2.1074, + "step": 13220 + }, + { + "epoch": 2.68, + "learning_rate": 4.066738054751588e-06, + "loss": 2.0401, + "step": 13221 + }, + { + "epoch": 2.68, + "learning_rate": 4.065347703077556e-06, + "loss": 2.1352, + "step": 13222 + }, + { + "epoch": 2.68, + "learning_rate": 4.06395752847516e-06, + "loss": 2.1166, + "step": 13223 + }, + { + "epoch": 2.68, + "learning_rate": 4.0625675309858824e-06, + "loss": 2.1127, + "step": 13224 + }, + { + "epoch": 2.68, + "learning_rate": 4.061177710651198e-06, + "loss": 2.0956, + "step": 13225 + }, + { + "epoch": 2.68, + "learning_rate": 4.0597880675125774e-06, + "loss": 2.0753, + "step": 13226 + }, + { + "epoch": 2.68, + "learning_rate": 4.058398601611478e-06, + "loss": 2.143, + "step": 13227 + }, + { + "epoch": 2.69, + "learning_rate": 4.057009312989356e-06, + "loss": 2.1005, + "step": 13228 + }, + { + "epoch": 2.69, + "learning_rate": 4.055620201687667e-06, + "loss": 2.1299, + "step": 13229 + }, + { + "epoch": 2.69, + "learning_rate": 4.054231267747862e-06, + "loss": 2.1451, + "step": 13230 + }, + { + "epoch": 2.69, + "learning_rate": 4.052842511211371e-06, + "loss": 2.1119, + "step": 13231 + }, + { + "epoch": 2.69, + "learning_rate": 4.051453932119639e-06, + "loss": 2.1948, + "step": 13232 + }, + { + "epoch": 2.69, + "learning_rate": 4.0500655305140945e-06, + "loss": 2.0092, + "step": 13233 + }, + { + "epoch": 2.69, + "learning_rate": 4.048677306436166e-06, + "loss": 2.1701, + "step": 13234 + }, + { + "epoch": 2.69, + "learning_rate": 4.047289259927268e-06, + "loss": 2.043, + "step": 13235 + }, + { + "epoch": 2.69, + "learning_rate": 4.045901391028818e-06, + "loss": 2.1086, + "step": 13236 + }, + { + "epoch": 2.69, + "learning_rate": 4.044513699782226e-06, + "loss": 1.9915, + "step": 13237 + }, + { + "epoch": 2.69, + "learning_rate": 4.0431261862289016e-06, + "loss": 2.1186, + "step": 13238 + }, + { + "epoch": 2.69, + "learning_rate": 4.041738850410235e-06, + "loss": 2.1347, + "step": 13239 + }, + { + "epoch": 2.69, + "learning_rate": 4.040351692367623e-06, + "loss": 2.0579, + "step": 13240 + }, + { + "epoch": 2.69, + "learning_rate": 4.0389647121424605e-06, + "loss": 2.1265, + "step": 13241 + }, + { + "epoch": 2.69, + "learning_rate": 4.037577909776119e-06, + "loss": 2.1451, + "step": 13242 + }, + { + "epoch": 2.69, + "learning_rate": 4.036191285309985e-06, + "loss": 2.0744, + "step": 13243 + }, + { + "epoch": 2.69, + "learning_rate": 4.034804838785428e-06, + "loss": 2.143, + "step": 13244 + }, + { + "epoch": 2.69, + "learning_rate": 4.033418570243819e-06, + "loss": 2.1208, + "step": 13245 + }, + { + "epoch": 2.69, + "learning_rate": 4.032032479726514e-06, + "loss": 2.1369, + "step": 13246 + }, + { + "epoch": 2.69, + "learning_rate": 4.0306465672748716e-06, + "loss": 2.0796, + "step": 13247 + }, + { + "epoch": 2.69, + "learning_rate": 4.029260832930245e-06, + "loss": 2.1126, + "step": 13248 + }, + { + "epoch": 2.69, + "learning_rate": 4.027875276733983e-06, + "loss": 2.1655, + "step": 13249 + }, + { + "epoch": 2.69, + "learning_rate": 4.026489898727419e-06, + "loss": 2.091, + "step": 13250 + }, + { + "epoch": 2.69, + "learning_rate": 4.025104698951896e-06, + "loss": 2.1581, + "step": 13251 + }, + { + "epoch": 2.69, + "learning_rate": 4.023719677448733e-06, + "loss": 2.1106, + "step": 13252 + }, + { + "epoch": 2.69, + "learning_rate": 4.022334834259269e-06, + "loss": 2.1455, + "step": 13253 + }, + { + "epoch": 2.69, + "learning_rate": 4.020950169424811e-06, + "loss": 2.1451, + "step": 13254 + }, + { + "epoch": 2.69, + "learning_rate": 4.0195656829866835e-06, + "loss": 2.0953, + "step": 13255 + }, + { + "epoch": 2.69, + "learning_rate": 4.018181374986186e-06, + "loss": 2.0993, + "step": 13256 + }, + { + "epoch": 2.69, + "learning_rate": 4.0167972454646275e-06, + "loss": 2.1358, + "step": 13257 + }, + { + "epoch": 2.69, + "learning_rate": 4.015413294463304e-06, + "loss": 2.0691, + "step": 13258 + }, + { + "epoch": 2.69, + "learning_rate": 4.014029522023511e-06, + "loss": 2.1125, + "step": 13259 + }, + { + "epoch": 2.69, + "learning_rate": 4.012645928186533e-06, + "loss": 2.1096, + "step": 13260 + }, + { + "epoch": 2.69, + "learning_rate": 4.011262512993652e-06, + "loss": 2.1294, + "step": 13261 + }, + { + "epoch": 2.69, + "learning_rate": 4.009879276486147e-06, + "loss": 2.1019, + "step": 13262 + }, + { + "epoch": 2.69, + "learning_rate": 4.008496218705291e-06, + "loss": 2.0901, + "step": 13263 + }, + { + "epoch": 2.69, + "learning_rate": 4.007113339692345e-06, + "loss": 2.0979, + "step": 13264 + }, + { + "epoch": 2.69, + "learning_rate": 4.005730639488572e-06, + "loss": 2.1089, + "step": 13265 + }, + { + "epoch": 2.69, + "learning_rate": 4.0043481181352316e-06, + "loss": 2.176, + "step": 13266 + }, + { + "epoch": 2.69, + "learning_rate": 4.002965775673567e-06, + "loss": 2.1508, + "step": 13267 + }, + { + "epoch": 2.69, + "learning_rate": 4.001583612144827e-06, + "loss": 2.1339, + "step": 13268 + }, + { + "epoch": 2.69, + "learning_rate": 4.00020162759025e-06, + "loss": 2.0609, + "step": 13269 + }, + { + "epoch": 2.69, + "learning_rate": 3.998819822051074e-06, + "loss": 2.0902, + "step": 13270 + }, + { + "epoch": 2.69, + "learning_rate": 3.997438195568521e-06, + "loss": 2.1074, + "step": 13271 + }, + { + "epoch": 2.69, + "learning_rate": 3.996056748183817e-06, + "loss": 2.1378, + "step": 13272 + }, + { + "epoch": 2.69, + "learning_rate": 3.994675479938183e-06, + "loss": 2.12, + "step": 13273 + }, + { + "epoch": 2.69, + "learning_rate": 3.993294390872832e-06, + "loss": 2.1189, + "step": 13274 + }, + { + "epoch": 2.69, + "learning_rate": 3.991913481028965e-06, + "loss": 2.082, + "step": 13275 + }, + { + "epoch": 2.69, + "learning_rate": 3.990532750447793e-06, + "loss": 2.1242, + "step": 13276 + }, + { + "epoch": 2.7, + "learning_rate": 3.989152199170501e-06, + "loss": 2.1187, + "step": 13277 + }, + { + "epoch": 2.7, + "learning_rate": 3.987771827238294e-06, + "loss": 2.1601, + "step": 13278 + }, + { + "epoch": 2.7, + "learning_rate": 3.986391634692347e-06, + "loss": 2.0839, + "step": 13279 + }, + { + "epoch": 2.7, + "learning_rate": 3.985011621573848e-06, + "loss": 2.1668, + "step": 13280 + }, + { + "epoch": 2.7, + "learning_rate": 3.983631787923969e-06, + "loss": 2.166, + "step": 13281 + }, + { + "epoch": 2.7, + "learning_rate": 3.982252133783878e-06, + "loss": 2.1354, + "step": 13282 + }, + { + "epoch": 2.7, + "learning_rate": 3.9808726591947425e-06, + "loss": 2.0425, + "step": 13283 + }, + { + "epoch": 2.7, + "learning_rate": 3.979493364197721e-06, + "loss": 2.1422, + "step": 13284 + }, + { + "epoch": 2.7, + "learning_rate": 3.978114248833971e-06, + "loss": 2.0158, + "step": 13285 + }, + { + "epoch": 2.7, + "learning_rate": 3.976735313144634e-06, + "loss": 2.0669, + "step": 13286 + }, + { + "epoch": 2.7, + "learning_rate": 3.975356557170857e-06, + "loss": 2.1535, + "step": 13287 + }, + { + "epoch": 2.7, + "learning_rate": 3.9739779809537775e-06, + "loss": 2.1452, + "step": 13288 + }, + { + "epoch": 2.7, + "learning_rate": 3.972599584534531e-06, + "loss": 2.0947, + "step": 13289 + }, + { + "epoch": 2.7, + "learning_rate": 3.971221367954239e-06, + "loss": 2.0953, + "step": 13290 + }, + { + "epoch": 2.7, + "learning_rate": 3.969843331254029e-06, + "loss": 2.1112, + "step": 13291 + }, + { + "epoch": 2.7, + "learning_rate": 3.968465474475006e-06, + "loss": 2.0796, + "step": 13292 + }, + { + "epoch": 2.7, + "learning_rate": 3.967087797658298e-06, + "loss": 2.114, + "step": 13293 + }, + { + "epoch": 2.7, + "learning_rate": 3.965710300844997e-06, + "loss": 2.0695, + "step": 13294 + }, + { + "epoch": 2.7, + "learning_rate": 3.9643329840762115e-06, + "loss": 2.0744, + "step": 13295 + }, + { + "epoch": 2.7, + "learning_rate": 3.96295584739303e-06, + "loss": 2.1157, + "step": 13296 + }, + { + "epoch": 2.7, + "learning_rate": 3.961578890836544e-06, + "loss": 2.1791, + "step": 13297 + }, + { + "epoch": 2.7, + "learning_rate": 3.960202114447839e-06, + "loss": 2.1026, + "step": 13298 + }, + { + "epoch": 2.7, + "learning_rate": 3.958825518267997e-06, + "loss": 2.2019, + "step": 13299 + }, + { + "epoch": 2.7, + "learning_rate": 3.957449102338083e-06, + "loss": 2.1748, + "step": 13300 + }, + { + "epoch": 2.7, + "learning_rate": 3.956072866699171e-06, + "loss": 2.0948, + "step": 13301 + }, + { + "epoch": 2.7, + "learning_rate": 3.954696811392321e-06, + "loss": 2.0596, + "step": 13302 + }, + { + "epoch": 2.7, + "learning_rate": 3.953320936458596e-06, + "loss": 2.0982, + "step": 13303 + }, + { + "epoch": 2.7, + "learning_rate": 3.951945241939038e-06, + "loss": 2.1283, + "step": 13304 + }, + { + "epoch": 2.7, + "learning_rate": 3.9505697278747e-06, + "loss": 2.0327, + "step": 13305 + }, + { + "epoch": 2.7, + "learning_rate": 3.949194394306624e-06, + "loss": 2.093, + "step": 13306 + }, + { + "epoch": 2.7, + "learning_rate": 3.947819241275842e-06, + "loss": 2.1043, + "step": 13307 + }, + { + "epoch": 2.7, + "learning_rate": 3.946444268823384e-06, + "loss": 2.1345, + "step": 13308 + }, + { + "epoch": 2.7, + "learning_rate": 3.945069476990278e-06, + "loss": 2.1599, + "step": 13309 + }, + { + "epoch": 2.7, + "learning_rate": 3.943694865817545e-06, + "loss": 2.0782, + "step": 13310 + }, + { + "epoch": 2.7, + "learning_rate": 3.9423204353461934e-06, + "loss": 2.2513, + "step": 13311 + }, + { + "epoch": 2.7, + "learning_rate": 3.940946185617234e-06, + "loss": 2.0907, + "step": 13312 + }, + { + "epoch": 2.7, + "learning_rate": 3.9395721166716705e-06, + "loss": 2.1252, + "step": 13313 + }, + { + "epoch": 2.7, + "learning_rate": 3.938198228550506e-06, + "loss": 2.0527, + "step": 13314 + }, + { + "epoch": 2.7, + "learning_rate": 3.936824521294726e-06, + "loss": 2.0641, + "step": 13315 + }, + { + "epoch": 2.7, + "learning_rate": 3.935450994945322e-06, + "loss": 2.1466, + "step": 13316 + }, + { + "epoch": 2.7, + "learning_rate": 3.934077649543267e-06, + "loss": 2.153, + "step": 13317 + }, + { + "epoch": 2.7, + "learning_rate": 3.932704485129552e-06, + "loss": 2.0926, + "step": 13318 + }, + { + "epoch": 2.7, + "learning_rate": 3.931331501745137e-06, + "loss": 2.1307, + "step": 13319 + }, + { + "epoch": 2.7, + "learning_rate": 3.929958699430993e-06, + "loss": 2.1364, + "step": 13320 + }, + { + "epoch": 2.7, + "learning_rate": 3.928586078228072e-06, + "loss": 2.0987, + "step": 13321 + }, + { + "epoch": 2.7, + "learning_rate": 3.927213638177342e-06, + "loss": 2.0749, + "step": 13322 + }, + { + "epoch": 2.7, + "learning_rate": 3.925841379319741e-06, + "loss": 2.1074, + "step": 13323 + }, + { + "epoch": 2.7, + "learning_rate": 3.924469301696221e-06, + "loss": 2.1194, + "step": 13324 + }, + { + "epoch": 2.7, + "learning_rate": 3.923097405347713e-06, + "loss": 1.9836, + "step": 13325 + }, + { + "epoch": 2.7, + "learning_rate": 3.921725690315154e-06, + "loss": 2.0466, + "step": 13326 + }, + { + "epoch": 2.71, + "learning_rate": 3.920354156639471e-06, + "loss": 2.1545, + "step": 13327 + }, + { + "epoch": 2.71, + "learning_rate": 3.918982804361591e-06, + "loss": 2.0954, + "step": 13328 + }, + { + "epoch": 2.71, + "learning_rate": 3.9176116335224225e-06, + "loss": 2.167, + "step": 13329 + }, + { + "epoch": 2.71, + "learning_rate": 3.9162406441628806e-06, + "loss": 2.1349, + "step": 13330 + }, + { + "epoch": 2.71, + "learning_rate": 3.914869836323874e-06, + "loss": 2.1349, + "step": 13331 + }, + { + "epoch": 2.71, + "learning_rate": 3.913499210046303e-06, + "loss": 2.1063, + "step": 13332 + }, + { + "epoch": 2.71, + "learning_rate": 3.912128765371058e-06, + "loss": 2.1081, + "step": 13333 + }, + { + "epoch": 2.71, + "learning_rate": 3.910758502339033e-06, + "loss": 2.1312, + "step": 13334 + }, + { + "epoch": 2.71, + "learning_rate": 3.909388420991113e-06, + "loss": 2.114, + "step": 13335 + }, + { + "epoch": 2.71, + "learning_rate": 3.908018521368171e-06, + "loss": 2.0518, + "step": 13336 + }, + { + "epoch": 2.71, + "learning_rate": 3.9066488035110905e-06, + "loss": 2.2206, + "step": 13337 + }, + { + "epoch": 2.71, + "learning_rate": 3.905279267460731e-06, + "loss": 2.0972, + "step": 13338 + }, + { + "epoch": 2.71, + "learning_rate": 3.903909913257962e-06, + "loss": 2.0797, + "step": 13339 + }, + { + "epoch": 2.71, + "learning_rate": 3.902540740943632e-06, + "loss": 2.1718, + "step": 13340 + }, + { + "epoch": 2.71, + "learning_rate": 3.901171750558599e-06, + "loss": 2.0582, + "step": 13341 + }, + { + "epoch": 2.71, + "learning_rate": 3.899802942143709e-06, + "loss": 2.0782, + "step": 13342 + }, + { + "epoch": 2.71, + "learning_rate": 3.898434315739805e-06, + "loss": 2.1028, + "step": 13343 + }, + { + "epoch": 2.71, + "learning_rate": 3.897065871387717e-06, + "loss": 2.122, + "step": 13344 + }, + { + "epoch": 2.71, + "learning_rate": 3.895697609128278e-06, + "loss": 2.168, + "step": 13345 + }, + { + "epoch": 2.71, + "learning_rate": 3.894329529002313e-06, + "loss": 2.1429, + "step": 13346 + }, + { + "epoch": 2.71, + "learning_rate": 3.892961631050645e-06, + "loss": 2.1252, + "step": 13347 + }, + { + "epoch": 2.71, + "learning_rate": 3.891593915314079e-06, + "loss": 2.1346, + "step": 13348 + }, + { + "epoch": 2.71, + "learning_rate": 3.890226381833428e-06, + "loss": 2.0519, + "step": 13349 + }, + { + "epoch": 2.71, + "learning_rate": 3.8888590306495015e-06, + "loss": 2.116, + "step": 13350 + }, + { + "epoch": 2.71, + "learning_rate": 3.887491861803085e-06, + "loss": 2.1646, + "step": 13351 + }, + { + "epoch": 2.71, + "learning_rate": 3.886124875334977e-06, + "loss": 2.1418, + "step": 13352 + }, + { + "epoch": 2.71, + "learning_rate": 3.884758071285963e-06, + "loss": 1.9917, + "step": 13353 + }, + { + "epoch": 2.71, + "learning_rate": 3.883391449696829e-06, + "loss": 2.0553, + "step": 13354 + }, + { + "epoch": 2.71, + "learning_rate": 3.8820250106083434e-06, + "loss": 2.1231, + "step": 13355 + }, + { + "epoch": 2.71, + "learning_rate": 3.880658754061279e-06, + "loss": 2.0712, + "step": 13356 + }, + { + "epoch": 2.71, + "learning_rate": 3.879292680096402e-06, + "loss": 2.134, + "step": 13357 + }, + { + "epoch": 2.71, + "learning_rate": 3.8779267887544725e-06, + "loss": 2.1012, + "step": 13358 + }, + { + "epoch": 2.71, + "learning_rate": 3.876561080076241e-06, + "loss": 2.0697, + "step": 13359 + }, + { + "epoch": 2.71, + "learning_rate": 3.8751955541024625e-06, + "loss": 2.1318, + "step": 13360 + }, + { + "epoch": 2.71, + "learning_rate": 3.873830210873867e-06, + "loss": 2.1176, + "step": 13361 + }, + { + "epoch": 2.71, + "learning_rate": 3.872465050431208e-06, + "loss": 2.0635, + "step": 13362 + }, + { + "epoch": 2.71, + "learning_rate": 3.871100072815207e-06, + "loss": 2.0831, + "step": 13363 + }, + { + "epoch": 2.71, + "learning_rate": 3.869735278066599e-06, + "loss": 2.1683, + "step": 13364 + }, + { + "epoch": 2.71, + "learning_rate": 3.8683706662260945e-06, + "loss": 2.1711, + "step": 13365 + }, + { + "epoch": 2.71, + "learning_rate": 3.867006237334416e-06, + "loss": 2.194, + "step": 13366 + }, + { + "epoch": 2.71, + "learning_rate": 3.865641991432274e-06, + "loss": 2.1764, + "step": 13367 + }, + { + "epoch": 2.71, + "learning_rate": 3.864277928560375e-06, + "loss": 2.2076, + "step": 13368 + }, + { + "epoch": 2.71, + "learning_rate": 3.862914048759412e-06, + "loss": 2.0545, + "step": 13369 + }, + { + "epoch": 2.71, + "learning_rate": 3.861550352070083e-06, + "loss": 2.1385, + "step": 13370 + }, + { + "epoch": 2.71, + "learning_rate": 3.860186838533077e-06, + "loss": 2.1082, + "step": 13371 + }, + { + "epoch": 2.71, + "learning_rate": 3.858823508189078e-06, + "loss": 2.1865, + "step": 13372 + }, + { + "epoch": 2.71, + "learning_rate": 3.85746036107876e-06, + "loss": 2.1475, + "step": 13373 + }, + { + "epoch": 2.71, + "learning_rate": 3.856097397242795e-06, + "loss": 2.0989, + "step": 13374 + }, + { + "epoch": 2.71, + "learning_rate": 3.854734616721857e-06, + "loss": 2.0522, + "step": 13375 + }, + { + "epoch": 2.72, + "learning_rate": 3.853372019556596e-06, + "loss": 2.1225, + "step": 13376 + }, + { + "epoch": 2.72, + "learning_rate": 3.852009605787674e-06, + "loss": 2.1073, + "step": 13377 + }, + { + "epoch": 2.72, + "learning_rate": 3.8506473754557415e-06, + "loss": 2.1249, + "step": 13378 + }, + { + "epoch": 2.72, + "learning_rate": 3.849285328601445e-06, + "loss": 2.1116, + "step": 13379 + }, + { + "epoch": 2.72, + "learning_rate": 3.847923465265418e-06, + "loss": 2.0845, + "step": 13380 + }, + { + "epoch": 2.72, + "learning_rate": 3.846561785488296e-06, + "loss": 2.1501, + "step": 13381 + }, + { + "epoch": 2.72, + "learning_rate": 3.84520028931071e-06, + "loss": 2.1158, + "step": 13382 + }, + { + "epoch": 2.72, + "learning_rate": 3.843838976773285e-06, + "loss": 2.0873, + "step": 13383 + }, + { + "epoch": 2.72, + "learning_rate": 3.84247784791663e-06, + "loss": 2.0818, + "step": 13384 + }, + { + "epoch": 2.72, + "learning_rate": 3.841116902781365e-06, + "loss": 2.1826, + "step": 13385 + }, + { + "epoch": 2.72, + "learning_rate": 3.839756141408088e-06, + "loss": 2.0996, + "step": 13386 + }, + { + "epoch": 2.72, + "learning_rate": 3.83839556383741e-06, + "loss": 2.1204, + "step": 13387 + }, + { + "epoch": 2.72, + "learning_rate": 3.837035170109919e-06, + "loss": 2.1206, + "step": 13388 + }, + { + "epoch": 2.72, + "learning_rate": 3.835674960266206e-06, + "loss": 2.17, + "step": 13389 + }, + { + "epoch": 2.72, + "learning_rate": 3.834314934346861e-06, + "loss": 2.0924, + "step": 13390 + }, + { + "epoch": 2.72, + "learning_rate": 3.8329550923924555e-06, + "loss": 2.0858, + "step": 13391 + }, + { + "epoch": 2.72, + "learning_rate": 3.831595434443566e-06, + "loss": 2.0103, + "step": 13392 + }, + { + "epoch": 2.72, + "learning_rate": 3.8302359605407604e-06, + "loss": 2.1791, + "step": 13393 + }, + { + "epoch": 2.72, + "learning_rate": 3.828876670724605e-06, + "loss": 2.0294, + "step": 13394 + }, + { + "epoch": 2.72, + "learning_rate": 3.8275175650356485e-06, + "loss": 2.093, + "step": 13395 + }, + { + "epoch": 2.72, + "learning_rate": 3.82615864351445e-06, + "loss": 2.1118, + "step": 13396 + }, + { + "epoch": 2.72, + "learning_rate": 3.8247999062015495e-06, + "loss": 2.1282, + "step": 13397 + }, + { + "epoch": 2.72, + "learning_rate": 3.8234413531374956e-06, + "loss": 2.1723, + "step": 13398 + }, + { + "epoch": 2.72, + "learning_rate": 3.822082984362815e-06, + "loss": 2.1487, + "step": 13399 + }, + { + "epoch": 2.72, + "learning_rate": 3.820724799918044e-06, + "loss": 2.1508, + "step": 13400 + }, + { + "epoch": 2.72, + "learning_rate": 3.819366799843697e-06, + "loss": 2.0686, + "step": 13401 + }, + { + "epoch": 2.72, + "learning_rate": 3.818008984180304e-06, + "loss": 2.1184, + "step": 13402 + }, + { + "epoch": 2.72, + "learning_rate": 3.81665135296837e-06, + "loss": 2.0163, + "step": 13403 + }, + { + "epoch": 2.72, + "learning_rate": 3.815293906248409e-06, + "loss": 2.0478, + "step": 13404 + }, + { + "epoch": 2.72, + "learning_rate": 3.8139366440609148e-06, + "loss": 2.0986, + "step": 13405 + }, + { + "epoch": 2.72, + "learning_rate": 3.8125795664463892e-06, + "loss": 2.1162, + "step": 13406 + }, + { + "epoch": 2.72, + "learning_rate": 3.811222673445323e-06, + "loss": 2.0969, + "step": 13407 + }, + { + "epoch": 2.72, + "learning_rate": 3.8098659650982027e-06, + "loss": 2.2045, + "step": 13408 + }, + { + "epoch": 2.72, + "learning_rate": 3.8085094414455036e-06, + "loss": 2.0818, + "step": 13409 + }, + { + "epoch": 2.72, + "learning_rate": 3.807153102527704e-06, + "loss": 2.1083, + "step": 13410 + }, + { + "epoch": 2.72, + "learning_rate": 3.805796948385272e-06, + "loss": 2.0392, + "step": 13411 + }, + { + "epoch": 2.72, + "learning_rate": 3.804440979058673e-06, + "loss": 2.0682, + "step": 13412 + }, + { + "epoch": 2.72, + "learning_rate": 3.8030851945883617e-06, + "loss": 2.0629, + "step": 13413 + }, + { + "epoch": 2.72, + "learning_rate": 3.8017295950147903e-06, + "loss": 2.0507, + "step": 13414 + }, + { + "epoch": 2.72, + "learning_rate": 3.8003741803784077e-06, + "loss": 2.1594, + "step": 13415 + }, + { + "epoch": 2.72, + "learning_rate": 3.7990189507196586e-06, + "loss": 2.1139, + "step": 13416 + }, + { + "epoch": 2.72, + "learning_rate": 3.79766390607897e-06, + "loss": 2.0961, + "step": 13417 + }, + { + "epoch": 2.72, + "learning_rate": 3.796309046496779e-06, + "loss": 2.0623, + "step": 13418 + }, + { + "epoch": 2.72, + "learning_rate": 3.7949543720135108e-06, + "loss": 2.1858, + "step": 13419 + }, + { + "epoch": 2.72, + "learning_rate": 3.7935998826695797e-06, + "loss": 2.0986, + "step": 13420 + }, + { + "epoch": 2.72, + "learning_rate": 3.7922455785054024e-06, + "loss": 2.1502, + "step": 13421 + }, + { + "epoch": 2.72, + "learning_rate": 3.7908914595613868e-06, + "loss": 2.1171, + "step": 13422 + }, + { + "epoch": 2.72, + "learning_rate": 3.7895375258779386e-06, + "loss": 2.0046, + "step": 13423 + }, + { + "epoch": 2.72, + "learning_rate": 3.78818377749545e-06, + "loss": 2.0922, + "step": 13424 + }, + { + "epoch": 2.73, + "learning_rate": 3.7868302144543146e-06, + "loss": 2.1593, + "step": 13425 + }, + { + "epoch": 2.73, + "learning_rate": 3.785476836794918e-06, + "loss": 2.0649, + "step": 13426 + }, + { + "epoch": 2.73, + "learning_rate": 3.7841236445576455e-06, + "loss": 2.1683, + "step": 13427 + }, + { + "epoch": 2.73, + "learning_rate": 3.7827706377828657e-06, + "loss": 2.0757, + "step": 13428 + }, + { + "epoch": 2.73, + "learning_rate": 3.781417816510953e-06, + "loss": 2.1187, + "step": 13429 + }, + { + "epoch": 2.73, + "learning_rate": 3.7800651807822632e-06, + "loss": 2.1168, + "step": 13430 + }, + { + "epoch": 2.73, + "learning_rate": 3.7787127306371663e-06, + "loss": 2.062, + "step": 13431 + }, + { + "epoch": 2.73, + "learning_rate": 3.7773604661160067e-06, + "loss": 2.1033, + "step": 13432 + }, + { + "epoch": 2.73, + "learning_rate": 3.776008387259139e-06, + "loss": 2.0655, + "step": 13433 + }, + { + "epoch": 2.73, + "learning_rate": 3.7746564941068964e-06, + "loss": 2.1405, + "step": 13434 + }, + { + "epoch": 2.73, + "learning_rate": 3.7733047866996197e-06, + "loss": 2.1334, + "step": 13435 + }, + { + "epoch": 2.73, + "learning_rate": 3.7719532650776393e-06, + "loss": 2.1096, + "step": 13436 + }, + { + "epoch": 2.73, + "learning_rate": 3.7706019292812845e-06, + "loss": 2.0875, + "step": 13437 + }, + { + "epoch": 2.73, + "learning_rate": 3.7692507793508672e-06, + "loss": 2.0412, + "step": 13438 + }, + { + "epoch": 2.73, + "learning_rate": 3.7678998153267055e-06, + "loss": 2.0693, + "step": 13439 + }, + { + "epoch": 2.73, + "learning_rate": 3.7665490372491088e-06, + "loss": 2.1556, + "step": 13440 + }, + { + "epoch": 2.73, + "learning_rate": 3.765198445158378e-06, + "loss": 2.1523, + "step": 13441 + }, + { + "epoch": 2.73, + "learning_rate": 3.763848039094815e-06, + "loss": 2.1098, + "step": 13442 + }, + { + "epoch": 2.73, + "learning_rate": 3.7624978190987062e-06, + "loss": 2.14, + "step": 13443 + }, + { + "epoch": 2.73, + "learning_rate": 3.761147785210344e-06, + "loss": 2.0485, + "step": 13444 + }, + { + "epoch": 2.73, + "learning_rate": 3.759797937469998e-06, + "loss": 2.0676, + "step": 13445 + }, + { + "epoch": 2.73, + "learning_rate": 3.7584482759179573e-06, + "loss": 2.1623, + "step": 13446 + }, + { + "epoch": 2.73, + "learning_rate": 3.757098800594483e-06, + "loss": 2.1585, + "step": 13447 + }, + { + "epoch": 2.73, + "learning_rate": 3.7557495115398446e-06, + "loss": 2.1868, + "step": 13448 + }, + { + "epoch": 2.73, + "learning_rate": 3.7544004087942943e-06, + "loss": 2.1057, + "step": 13449 + }, + { + "epoch": 2.73, + "learning_rate": 3.753051492398089e-06, + "loss": 2.2211, + "step": 13450 + }, + { + "epoch": 2.73, + "learning_rate": 3.751702762391475e-06, + "loss": 2.1448, + "step": 13451 + }, + { + "epoch": 2.73, + "learning_rate": 3.750354218814699e-06, + "loss": 2.1164, + "step": 13452 + }, + { + "epoch": 2.73, + "learning_rate": 3.7490058617079895e-06, + "loss": 2.1079, + "step": 13453 + }, + { + "epoch": 2.73, + "learning_rate": 3.7476576911115814e-06, + "loss": 2.1157, + "step": 13454 + }, + { + "epoch": 2.73, + "learning_rate": 3.7463097070656995e-06, + "loss": 2.1374, + "step": 13455 + }, + { + "epoch": 2.73, + "learning_rate": 3.744961909610567e-06, + "loss": 2.077, + "step": 13456 + }, + { + "epoch": 2.73, + "learning_rate": 3.743614298786391e-06, + "loss": 2.1154, + "step": 13457 + }, + { + "epoch": 2.73, + "learning_rate": 3.7422668746333846e-06, + "loss": 2.1676, + "step": 13458 + }, + { + "epoch": 2.73, + "learning_rate": 3.740919637191752e-06, + "loss": 2.1575, + "step": 13459 + }, + { + "epoch": 2.73, + "learning_rate": 3.739572586501686e-06, + "loss": 2.1409, + "step": 13460 + }, + { + "epoch": 2.73, + "learning_rate": 3.73822572260338e-06, + "loss": 2.1321, + "step": 13461 + }, + { + "epoch": 2.73, + "learning_rate": 3.7368790455370217e-06, + "loss": 2.0577, + "step": 13462 + }, + { + "epoch": 2.73, + "learning_rate": 3.7355325553427948e-06, + "loss": 2.1338, + "step": 13463 + }, + { + "epoch": 2.73, + "learning_rate": 3.7341862520608672e-06, + "loss": 2.1872, + "step": 13464 + }, + { + "epoch": 2.73, + "learning_rate": 3.7328401357314113e-06, + "loss": 2.0975, + "step": 13465 + }, + { + "epoch": 2.73, + "learning_rate": 3.7314942063945925e-06, + "loss": 2.1101, + "step": 13466 + }, + { + "epoch": 2.73, + "learning_rate": 3.7301484640905717e-06, + "loss": 2.0831, + "step": 13467 + }, + { + "epoch": 2.73, + "learning_rate": 3.7288029088594947e-06, + "loss": 2.1591, + "step": 13468 + }, + { + "epoch": 2.73, + "learning_rate": 3.727457540741516e-06, + "loss": 2.1489, + "step": 13469 + }, + { + "epoch": 2.73, + "learning_rate": 3.7261123597767667e-06, + "loss": 2.0961, + "step": 13470 + }, + { + "epoch": 2.73, + "learning_rate": 3.724767366005397e-06, + "loss": 2.1291, + "step": 13471 + }, + { + "epoch": 2.73, + "learning_rate": 3.7234225594675266e-06, + "loss": 2.1203, + "step": 13472 + }, + { + "epoch": 2.73, + "learning_rate": 3.7220779402032857e-06, + "loss": 2.1643, + "step": 13473 + }, + { + "epoch": 2.74, + "learning_rate": 3.7207335082527896e-06, + "loss": 2.0966, + "step": 13474 + }, + { + "epoch": 2.74, + "learning_rate": 3.7193892636561545e-06, + "loss": 2.0866, + "step": 13475 + }, + { + "epoch": 2.74, + "learning_rate": 3.7180452064534876e-06, + "loss": 2.1473, + "step": 13476 + }, + { + "epoch": 2.74, + "learning_rate": 3.716701336684895e-06, + "loss": 2.156, + "step": 13477 + }, + { + "epoch": 2.74, + "learning_rate": 3.715357654390467e-06, + "loss": 2.1284, + "step": 13478 + }, + { + "epoch": 2.74, + "learning_rate": 3.7140141596102984e-06, + "loss": 2.0779, + "step": 13479 + }, + { + "epoch": 2.74, + "learning_rate": 3.7126708523844756e-06, + "loss": 2.0944, + "step": 13480 + }, + { + "epoch": 2.74, + "learning_rate": 3.7113277327530815e-06, + "loss": 2.0753, + "step": 13481 + }, + { + "epoch": 2.74, + "learning_rate": 3.7099848007561836e-06, + "loss": 2.1041, + "step": 13482 + }, + { + "epoch": 2.74, + "learning_rate": 3.708642056433854e-06, + "loss": 2.0677, + "step": 13483 + }, + { + "epoch": 2.74, + "learning_rate": 3.7072994998261604e-06, + "loss": 2.1168, + "step": 13484 + }, + { + "epoch": 2.74, + "learning_rate": 3.7059571309731522e-06, + "loss": 2.1918, + "step": 13485 + }, + { + "epoch": 2.74, + "learning_rate": 3.7046149499148866e-06, + "loss": 2.119, + "step": 13486 + }, + { + "epoch": 2.74, + "learning_rate": 3.703272956691409e-06, + "loss": 2.1508, + "step": 13487 + }, + { + "epoch": 2.74, + "learning_rate": 3.7019311513427647e-06, + "loss": 2.1654, + "step": 13488 + }, + { + "epoch": 2.74, + "learning_rate": 3.7005895339089814e-06, + "loss": 2.1565, + "step": 13489 + }, + { + "epoch": 2.74, + "learning_rate": 3.6992481044300934e-06, + "loss": 2.1709, + "step": 13490 + }, + { + "epoch": 2.74, + "learning_rate": 3.697906862946122e-06, + "loss": 2.0894, + "step": 13491 + }, + { + "epoch": 2.74, + "learning_rate": 3.6965658094970926e-06, + "loss": 2.1356, + "step": 13492 + }, + { + "epoch": 2.74, + "learning_rate": 3.695224944123009e-06, + "loss": 2.1002, + "step": 13493 + }, + { + "epoch": 2.74, + "learning_rate": 3.693884266863883e-06, + "loss": 2.1273, + "step": 13494 + }, + { + "epoch": 2.74, + "learning_rate": 3.692543777759715e-06, + "loss": 2.0818, + "step": 13495 + }, + { + "epoch": 2.74, + "learning_rate": 3.6912034768505056e-06, + "loss": 2.2055, + "step": 13496 + }, + { + "epoch": 2.74, + "learning_rate": 3.6898633641762384e-06, + "loss": 2.1519, + "step": 13497 + }, + { + "epoch": 2.74, + "learning_rate": 3.688523439776901e-06, + "loss": 2.1139, + "step": 13498 + }, + { + "epoch": 2.74, + "learning_rate": 3.6871837036924764e-06, + "loss": 2.208, + "step": 13499 + }, + { + "epoch": 2.74, + "learning_rate": 3.685844155962931e-06, + "loss": 2.087, + "step": 13500 + }, + { + "epoch": 2.74, + "learning_rate": 3.6845047966282364e-06, + "loss": 2.0899, + "step": 13501 + }, + { + "epoch": 2.74, + "learning_rate": 3.6831656257283555e-06, + "loss": 2.1066, + "step": 13502 + }, + { + "epoch": 2.74, + "learning_rate": 3.681826643303247e-06, + "loss": 2.0676, + "step": 13503 + }, + { + "epoch": 2.74, + "learning_rate": 3.6804878493928565e-06, + "loss": 2.0682, + "step": 13504 + }, + { + "epoch": 2.74, + "learning_rate": 3.6791492440371314e-06, + "loss": 2.1154, + "step": 13505 + }, + { + "epoch": 2.74, + "learning_rate": 3.677810827276014e-06, + "loss": 2.1315, + "step": 13506 + }, + { + "epoch": 2.74, + "learning_rate": 3.6764725991494387e-06, + "loss": 2.1349, + "step": 13507 + }, + { + "epoch": 2.74, + "learning_rate": 3.6751345596973308e-06, + "loss": 2.1338, + "step": 13508 + }, + { + "epoch": 2.74, + "learning_rate": 3.673796708959614e-06, + "loss": 2.0849, + "step": 13509 + }, + { + "epoch": 2.74, + "learning_rate": 3.6724590469762066e-06, + "loss": 2.0935, + "step": 13510 + }, + { + "epoch": 2.74, + "learning_rate": 3.6711215737870233e-06, + "loss": 2.1453, + "step": 13511 + }, + { + "epoch": 2.74, + "learning_rate": 3.6697842894319636e-06, + "loss": 2.0956, + "step": 13512 + }, + { + "epoch": 2.74, + "learning_rate": 3.6684471939509345e-06, + "loss": 2.1137, + "step": 13513 + }, + { + "epoch": 2.74, + "learning_rate": 3.6671102873838217e-06, + "loss": 2.1811, + "step": 13514 + }, + { + "epoch": 2.74, + "learning_rate": 3.6657735697705267e-06, + "loss": 2.0707, + "step": 13515 + }, + { + "epoch": 2.74, + "learning_rate": 3.6644370411509235e-06, + "loss": 2.0873, + "step": 13516 + }, + { + "epoch": 2.74, + "learning_rate": 3.6631007015648956e-06, + "loss": 2.1593, + "step": 13517 + }, + { + "epoch": 2.74, + "learning_rate": 3.6617645510523103e-06, + "loss": 2.1104, + "step": 13518 + }, + { + "epoch": 2.74, + "learning_rate": 3.6604285896530365e-06, + "loss": 2.1377, + "step": 13519 + }, + { + "epoch": 2.74, + "learning_rate": 3.6590928174069353e-06, + "loss": 2.1945, + "step": 13520 + }, + { + "epoch": 2.74, + "learning_rate": 3.6577572343538646e-06, + "loss": 2.0417, + "step": 13521 + }, + { + "epoch": 2.74, + "learning_rate": 3.6564218405336683e-06, + "loss": 2.1637, + "step": 13522 + }, + { + "epoch": 2.74, + "learning_rate": 3.6550866359861935e-06, + "loss": 2.1631, + "step": 13523 + }, + { + "epoch": 2.75, + "learning_rate": 3.653751620751279e-06, + "loss": 2.0917, + "step": 13524 + }, + { + "epoch": 2.75, + "learning_rate": 3.6524167948687595e-06, + "loss": 2.1311, + "step": 13525 + }, + { + "epoch": 2.75, + "learning_rate": 3.6510821583784572e-06, + "loss": 2.1126, + "step": 13526 + }, + { + "epoch": 2.75, + "learning_rate": 3.649747711320196e-06, + "loss": 2.2147, + "step": 13527 + }, + { + "epoch": 2.75, + "learning_rate": 3.6484134537337945e-06, + "loss": 2.1742, + "step": 13528 + }, + { + "epoch": 2.75, + "learning_rate": 3.647079385659058e-06, + "loss": 2.1655, + "step": 13529 + }, + { + "epoch": 2.75, + "learning_rate": 3.6457455071357918e-06, + "loss": 2.0871, + "step": 13530 + }, + { + "epoch": 2.75, + "learning_rate": 3.644411818203796e-06, + "loss": 2.0603, + "step": 13531 + }, + { + "epoch": 2.75, + "learning_rate": 3.643078318902866e-06, + "loss": 2.1201, + "step": 13532 + }, + { + "epoch": 2.75, + "learning_rate": 3.6417450092727847e-06, + "loss": 2.0391, + "step": 13533 + }, + { + "epoch": 2.75, + "learning_rate": 3.6404118893533357e-06, + "loss": 2.1151, + "step": 13534 + }, + { + "epoch": 2.75, + "learning_rate": 3.6390789591842966e-06, + "loss": 2.1021, + "step": 13535 + }, + { + "epoch": 2.75, + "learning_rate": 3.637746218805439e-06, + "loss": 2.0981, + "step": 13536 + }, + { + "epoch": 2.75, + "learning_rate": 3.636413668256523e-06, + "loss": 2.128, + "step": 13537 + }, + { + "epoch": 2.75, + "learning_rate": 3.6350813075773137e-06, + "loss": 2.1836, + "step": 13538 + }, + { + "epoch": 2.75, + "learning_rate": 3.633749136807554e-06, + "loss": 2.161, + "step": 13539 + }, + { + "epoch": 2.75, + "learning_rate": 3.632417155987006e-06, + "loss": 2.0824, + "step": 13540 + }, + { + "epoch": 2.75, + "learning_rate": 3.631085365155402e-06, + "loss": 2.0778, + "step": 13541 + }, + { + "epoch": 2.75, + "learning_rate": 3.6297537643524817e-06, + "loss": 2.061, + "step": 13542 + }, + { + "epoch": 2.75, + "learning_rate": 3.6284223536179787e-06, + "loss": 2.0596, + "step": 13543 + }, + { + "epoch": 2.75, + "learning_rate": 3.6270911329916113e-06, + "loss": 2.0682, + "step": 13544 + }, + { + "epoch": 2.75, + "learning_rate": 3.625760102513103e-06, + "loss": 2.1451, + "step": 13545 + }, + { + "epoch": 2.75, + "learning_rate": 3.624429262222168e-06, + "loss": 2.0853, + "step": 13546 + }, + { + "epoch": 2.75, + "learning_rate": 3.6230986121585177e-06, + "loss": 2.1021, + "step": 13547 + }, + { + "epoch": 2.75, + "learning_rate": 3.6217681523618464e-06, + "loss": 2.1046, + "step": 13548 + }, + { + "epoch": 2.75, + "learning_rate": 3.6204378828718558e-06, + "loss": 2.1087, + "step": 13549 + }, + { + "epoch": 2.75, + "learning_rate": 3.619107803728237e-06, + "loss": 2.0786, + "step": 13550 + }, + { + "epoch": 2.75, + "learning_rate": 3.617777914970678e-06, + "loss": 2.1883, + "step": 13551 + }, + { + "epoch": 2.75, + "learning_rate": 3.6164482166388515e-06, + "loss": 2.0748, + "step": 13552 + }, + { + "epoch": 2.75, + "learning_rate": 3.6151187087724403e-06, + "loss": 2.0868, + "step": 13553 + }, + { + "epoch": 2.75, + "learning_rate": 3.6137893914111013e-06, + "loss": 2.1352, + "step": 13554 + }, + { + "epoch": 2.75, + "learning_rate": 3.6124602645945096e-06, + "loss": 2.1058, + "step": 13555 + }, + { + "epoch": 2.75, + "learning_rate": 3.611131328362314e-06, + "loss": 2.1819, + "step": 13556 + }, + { + "epoch": 2.75, + "learning_rate": 3.6098025827541714e-06, + "loss": 2.11, + "step": 13557 + }, + { + "epoch": 2.75, + "learning_rate": 3.6084740278097217e-06, + "loss": 2.0603, + "step": 13558 + }, + { + "epoch": 2.75, + "learning_rate": 3.6071456635686084e-06, + "loss": 2.1622, + "step": 13559 + }, + { + "epoch": 2.75, + "learning_rate": 3.6058174900704646e-06, + "loss": 2.085, + "step": 13560 + }, + { + "epoch": 2.75, + "learning_rate": 3.6044895073549226e-06, + "loss": 2.1715, + "step": 13561 + }, + { + "epoch": 2.75, + "learning_rate": 3.6031617154615984e-06, + "loss": 2.0801, + "step": 13562 + }, + { + "epoch": 2.75, + "learning_rate": 3.6018341144301126e-06, + "loss": 2.1339, + "step": 13563 + }, + { + "epoch": 2.75, + "learning_rate": 3.6005067043000786e-06, + "loss": 2.14, + "step": 13564 + }, + { + "epoch": 2.75, + "learning_rate": 3.599179485111103e-06, + "loss": 2.1921, + "step": 13565 + }, + { + "epoch": 2.75, + "learning_rate": 3.5978524569027794e-06, + "loss": 2.1181, + "step": 13566 + }, + { + "epoch": 2.75, + "learning_rate": 3.596525619714707e-06, + "loss": 2.0489, + "step": 13567 + }, + { + "epoch": 2.75, + "learning_rate": 3.5951989735864766e-06, + "loss": 2.1644, + "step": 13568 + }, + { + "epoch": 2.75, + "learning_rate": 3.5938725185576662e-06, + "loss": 2.1351, + "step": 13569 + }, + { + "epoch": 2.75, + "learning_rate": 3.5925462546678548e-06, + "loss": 2.1973, + "step": 13570 + }, + { + "epoch": 2.75, + "learning_rate": 3.591220181956614e-06, + "loss": 2.1768, + "step": 13571 + }, + { + "epoch": 2.75, + "learning_rate": 3.5898943004635144e-06, + "loss": 2.1732, + "step": 13572 + }, + { + "epoch": 2.76, + "learning_rate": 3.588568610228108e-06, + "loss": 2.0901, + "step": 13573 + }, + { + "epoch": 2.76, + "learning_rate": 3.587243111289954e-06, + "loss": 2.091, + "step": 13574 + }, + { + "epoch": 2.76, + "learning_rate": 3.5859178036886e-06, + "loss": 2.1301, + "step": 13575 + }, + { + "epoch": 2.76, + "learning_rate": 3.584592687463594e-06, + "loss": 2.1107, + "step": 13576 + }, + { + "epoch": 2.76, + "learning_rate": 3.5832677626544645e-06, + "loss": 2.108, + "step": 13577 + }, + { + "epoch": 2.76, + "learning_rate": 3.5819430293007506e-06, + "loss": 2.1518, + "step": 13578 + }, + { + "epoch": 2.76, + "learning_rate": 3.580618487441969e-06, + "loss": 2.1562, + "step": 13579 + }, + { + "epoch": 2.76, + "learning_rate": 3.5792941371176537e-06, + "loss": 2.1565, + "step": 13580 + }, + { + "epoch": 2.76, + "learning_rate": 3.5779699783673074e-06, + "loss": 2.1185, + "step": 13581 + }, + { + "epoch": 2.76, + "learning_rate": 3.5766460112304467e-06, + "loss": 2.076, + "step": 13582 + }, + { + "epoch": 2.76, + "learning_rate": 3.575322235746568e-06, + "loss": 2.1161, + "step": 13583 + }, + { + "epoch": 2.76, + "learning_rate": 3.573998651955172e-06, + "loss": 2.0638, + "step": 13584 + }, + { + "epoch": 2.76, + "learning_rate": 3.5726752598957495e-06, + "loss": 2.1812, + "step": 13585 + }, + { + "epoch": 2.76, + "learning_rate": 3.571352059607791e-06, + "loss": 2.1133, + "step": 13586 + }, + { + "epoch": 2.76, + "learning_rate": 3.570029051130769e-06, + "loss": 2.167, + "step": 13587 + }, + { + "epoch": 2.76, + "learning_rate": 3.5687062345041623e-06, + "loss": 2.1441, + "step": 13588 + }, + { + "epoch": 2.76, + "learning_rate": 3.5673836097674386e-06, + "loss": 2.1179, + "step": 13589 + }, + { + "epoch": 2.76, + "learning_rate": 3.566061176960065e-06, + "loss": 2.0662, + "step": 13590 + }, + { + "epoch": 2.76, + "learning_rate": 3.5647389361214914e-06, + "loss": 2.1997, + "step": 13591 + }, + { + "epoch": 2.76, + "learning_rate": 3.5634168872911735e-06, + "loss": 2.1144, + "step": 13592 + }, + { + "epoch": 2.76, + "learning_rate": 3.5620950305085566e-06, + "loss": 2.071, + "step": 13593 + }, + { + "epoch": 2.76, + "learning_rate": 3.5607733658130814e-06, + "loss": 2.0472, + "step": 13594 + }, + { + "epoch": 2.76, + "learning_rate": 3.559451893244186e-06, + "loss": 2.0841, + "step": 13595 + }, + { + "epoch": 2.76, + "learning_rate": 3.558130612841291e-06, + "loss": 2.0846, + "step": 13596 + }, + { + "epoch": 2.76, + "learning_rate": 3.556809524643826e-06, + "loss": 2.0755, + "step": 13597 + }, + { + "epoch": 2.76, + "learning_rate": 3.5554886286911983e-06, + "loss": 2.1813, + "step": 13598 + }, + { + "epoch": 2.76, + "learning_rate": 3.5541679250228345e-06, + "loss": 2.1277, + "step": 13599 + }, + { + "epoch": 2.76, + "learning_rate": 3.5528474136781287e-06, + "loss": 2.143, + "step": 13600 + }, + { + "epoch": 2.76, + "learning_rate": 3.5515270946964877e-06, + "loss": 2.1502, + "step": 13601 + }, + { + "epoch": 2.76, + "learning_rate": 3.550206968117299e-06, + "loss": 2.1318, + "step": 13602 + }, + { + "epoch": 2.76, + "learning_rate": 3.548887033979955e-06, + "loss": 2.0861, + "step": 13603 + }, + { + "epoch": 2.76, + "learning_rate": 3.547567292323838e-06, + "loss": 2.1119, + "step": 13604 + }, + { + "epoch": 2.76, + "learning_rate": 3.546247743188328e-06, + "loss": 2.0139, + "step": 13605 + }, + { + "epoch": 2.76, + "learning_rate": 3.54492838661279e-06, + "loss": 2.1411, + "step": 13606 + }, + { + "epoch": 2.76, + "learning_rate": 3.5436092226365936e-06, + "loss": 2.1341, + "step": 13607 + }, + { + "epoch": 2.76, + "learning_rate": 3.5422902512990965e-06, + "loss": 2.0874, + "step": 13608 + }, + { + "epoch": 2.76, + "learning_rate": 3.5409714726396584e-06, + "loss": 2.114, + "step": 13609 + }, + { + "epoch": 2.76, + "learning_rate": 3.53965288669762e-06, + "loss": 2.0947, + "step": 13610 + }, + { + "epoch": 2.76, + "learning_rate": 3.5383344935123255e-06, + "loss": 2.1796, + "step": 13611 + }, + { + "epoch": 2.76, + "learning_rate": 3.537016293123118e-06, + "loss": 2.2071, + "step": 13612 + }, + { + "epoch": 2.76, + "learning_rate": 3.53569828556932e-06, + "loss": 2.165, + "step": 13613 + }, + { + "epoch": 2.76, + "learning_rate": 3.5343804708902597e-06, + "loss": 2.112, + "step": 13614 + }, + { + "epoch": 2.76, + "learning_rate": 3.5330628491252583e-06, + "loss": 2.0851, + "step": 13615 + }, + { + "epoch": 2.76, + "learning_rate": 3.5317454203136313e-06, + "loss": 2.1768, + "step": 13616 + }, + { + "epoch": 2.76, + "learning_rate": 3.5304281844946807e-06, + "loss": 2.2001, + "step": 13617 + }, + { + "epoch": 2.76, + "learning_rate": 3.529111141707713e-06, + "loss": 2.126, + "step": 13618 + }, + { + "epoch": 2.76, + "learning_rate": 3.5277942919920226e-06, + "loss": 2.1235, + "step": 13619 + }, + { + "epoch": 2.76, + "learning_rate": 3.5264776353869046e-06, + "loss": 2.0759, + "step": 13620 + }, + { + "epoch": 2.76, + "learning_rate": 3.525161171931638e-06, + "loss": 2.0886, + "step": 13621 + }, + { + "epoch": 2.77, + "learning_rate": 3.5238449016655076e-06, + "loss": 2.16, + "step": 13622 + }, + { + "epoch": 2.77, + "learning_rate": 3.5225288246277777e-06, + "loss": 2.0845, + "step": 13623 + }, + { + "epoch": 2.77, + "learning_rate": 3.5212129408577278e-06, + "loss": 2.1375, + "step": 13624 + }, + { + "epoch": 2.77, + "learning_rate": 3.5198972503946114e-06, + "loss": 2.1335, + "step": 13625 + }, + { + "epoch": 2.77, + "learning_rate": 3.518581753277691e-06, + "loss": 2.1437, + "step": 13626 + }, + { + "epoch": 2.77, + "learning_rate": 3.5172664495462104e-06, + "loss": 2.074, + "step": 13627 + }, + { + "epoch": 2.77, + "learning_rate": 3.515951339239416e-06, + "loss": 2.1041, + "step": 13628 + }, + { + "epoch": 2.77, + "learning_rate": 3.514636422396549e-06, + "loss": 2.0719, + "step": 13629 + }, + { + "epoch": 2.77, + "learning_rate": 3.5133216990568443e-06, + "loss": 2.086, + "step": 13630 + }, + { + "epoch": 2.77, + "learning_rate": 3.5120071692595216e-06, + "loss": 2.1744, + "step": 13631 + }, + { + "epoch": 2.77, + "learning_rate": 3.510692833043807e-06, + "loss": 2.1548, + "step": 13632 + }, + { + "epoch": 2.77, + "learning_rate": 3.5093786904489167e-06, + "loss": 2.1013, + "step": 13633 + }, + { + "epoch": 2.77, + "learning_rate": 3.5080647415140633e-06, + "loss": 2.098, + "step": 13634 + }, + { + "epoch": 2.77, + "learning_rate": 3.5067509862784455e-06, + "loss": 2.1495, + "step": 13635 + }, + { + "epoch": 2.77, + "learning_rate": 3.505437424781262e-06, + "loss": 2.1592, + "step": 13636 + }, + { + "epoch": 2.77, + "learning_rate": 3.504124057061712e-06, + "loss": 2.0624, + "step": 13637 + }, + { + "epoch": 2.77, + "learning_rate": 3.5028108831589736e-06, + "loss": 2.0578, + "step": 13638 + }, + { + "epoch": 2.77, + "learning_rate": 3.501497903112232e-06, + "loss": 2.1395, + "step": 13639 + }, + { + "epoch": 2.77, + "learning_rate": 3.5001851169606618e-06, + "loss": 2.0873, + "step": 13640 + }, + { + "epoch": 2.77, + "learning_rate": 3.4988725247434365e-06, + "loss": 2.1177, + "step": 13641 + }, + { + "epoch": 2.77, + "learning_rate": 3.4975601264997128e-06, + "loss": 2.139, + "step": 13642 + }, + { + "epoch": 2.77, + "learning_rate": 3.496247922268653e-06, + "loss": 2.1533, + "step": 13643 + }, + { + "epoch": 2.77, + "learning_rate": 3.4949359120894077e-06, + "loss": 2.1742, + "step": 13644 + }, + { + "epoch": 2.77, + "learning_rate": 3.493624096001127e-06, + "loss": 2.0613, + "step": 13645 + }, + { + "epoch": 2.77, + "learning_rate": 3.4923124740429447e-06, + "loss": 2.1271, + "step": 13646 + }, + { + "epoch": 2.77, + "learning_rate": 3.491001046254e-06, + "loss": 2.1113, + "step": 13647 + }, + { + "epoch": 2.77, + "learning_rate": 3.48968981267342e-06, + "loss": 2.2304, + "step": 13648 + }, + { + "epoch": 2.77, + "learning_rate": 3.488378773340333e-06, + "loss": 2.144, + "step": 13649 + }, + { + "epoch": 2.77, + "learning_rate": 3.487067928293848e-06, + "loss": 2.1556, + "step": 13650 + }, + { + "epoch": 2.77, + "learning_rate": 3.485757277573081e-06, + "loss": 2.1313, + "step": 13651 + }, + { + "epoch": 2.77, + "learning_rate": 3.4844468212171402e-06, + "loss": 2.0767, + "step": 13652 + }, + { + "epoch": 2.77, + "learning_rate": 3.483136559265119e-06, + "loss": 2.0994, + "step": 13653 + }, + { + "epoch": 2.77, + "learning_rate": 3.481826491756116e-06, + "loss": 2.0834, + "step": 13654 + }, + { + "epoch": 2.77, + "learning_rate": 3.4805166187292195e-06, + "loss": 1.9673, + "step": 13655 + }, + { + "epoch": 2.77, + "learning_rate": 3.479206940223514e-06, + "loss": 2.1228, + "step": 13656 + }, + { + "epoch": 2.77, + "learning_rate": 3.477897456278071e-06, + "loss": 2.12, + "step": 13657 + }, + { + "epoch": 2.77, + "learning_rate": 3.4765881669319636e-06, + "loss": 2.1033, + "step": 13658 + }, + { + "epoch": 2.77, + "learning_rate": 3.475279072224258e-06, + "loss": 2.1031, + "step": 13659 + }, + { + "epoch": 2.77, + "learning_rate": 3.4739701721940153e-06, + "loss": 2.0829, + "step": 13660 + }, + { + "epoch": 2.77, + "learning_rate": 3.472661466880285e-06, + "loss": 2.0588, + "step": 13661 + }, + { + "epoch": 2.77, + "learning_rate": 3.471352956322118e-06, + "loss": 2.2005, + "step": 13662 + }, + { + "epoch": 2.77, + "learning_rate": 3.4700446405585496e-06, + "loss": 2.0615, + "step": 13663 + }, + { + "epoch": 2.77, + "learning_rate": 3.468736519628627e-06, + "loss": 2.1364, + "step": 13664 + }, + { + "epoch": 2.77, + "learning_rate": 3.4674285935713715e-06, + "loss": 2.1115, + "step": 13665 + }, + { + "epoch": 2.77, + "learning_rate": 3.4661208624258134e-06, + "loss": 2.1004, + "step": 13666 + }, + { + "epoch": 2.77, + "learning_rate": 3.4648133262309657e-06, + "loss": 2.1033, + "step": 13667 + }, + { + "epoch": 2.77, + "learning_rate": 3.4635059850258434e-06, + "loss": 2.1254, + "step": 13668 + }, + { + "epoch": 2.77, + "learning_rate": 3.4621988388494553e-06, + "loss": 2.0976, + "step": 13669 + }, + { + "epoch": 2.77, + "learning_rate": 3.4608918877408037e-06, + "loss": 2.0227, + "step": 13670 + }, + { + "epoch": 2.77, + "learning_rate": 3.4595851317388785e-06, + "loss": 2.1193, + "step": 13671 + }, + { + "epoch": 2.78, + "learning_rate": 3.4582785708826727e-06, + "loss": 2.1007, + "step": 13672 + }, + { + "epoch": 2.78, + "learning_rate": 3.45697220521117e-06, + "loss": 2.1045, + "step": 13673 + }, + { + "epoch": 2.78, + "learning_rate": 3.455666034763352e-06, + "loss": 2.0941, + "step": 13674 + }, + { + "epoch": 2.78, + "learning_rate": 3.454360059578182e-06, + "loss": 2.1136, + "step": 13675 + }, + { + "epoch": 2.78, + "learning_rate": 3.453054279694632e-06, + "loss": 2.1112, + "step": 13676 + }, + { + "epoch": 2.78, + "learning_rate": 3.4517486951516655e-06, + "loss": 2.1857, + "step": 13677 + }, + { + "epoch": 2.78, + "learning_rate": 3.450443305988229e-06, + "loss": 2.1095, + "step": 13678 + }, + { + "epoch": 2.78, + "learning_rate": 3.4491381122432754e-06, + "loss": 2.0314, + "step": 13679 + }, + { + "epoch": 2.78, + "learning_rate": 3.4478331139557475e-06, + "loss": 2.162, + "step": 13680 + }, + { + "epoch": 2.78, + "learning_rate": 3.4465283111645864e-06, + "loss": 2.0415, + "step": 13681 + }, + { + "epoch": 2.78, + "learning_rate": 3.4452237039087166e-06, + "loss": 2.1151, + "step": 13682 + }, + { + "epoch": 2.78, + "learning_rate": 3.443919292227066e-06, + "loss": 2.1515, + "step": 13683 + }, + { + "epoch": 2.78, + "learning_rate": 3.4426150761585543e-06, + "loss": 2.1307, + "step": 13684 + }, + { + "epoch": 2.78, + "learning_rate": 3.441311055742099e-06, + "loss": 2.0858, + "step": 13685 + }, + { + "epoch": 2.78, + "learning_rate": 3.440007231016602e-06, + "loss": 2.1001, + "step": 13686 + }, + { + "epoch": 2.78, + "learning_rate": 3.4387036020209687e-06, + "loss": 2.0563, + "step": 13687 + }, + { + "epoch": 2.78, + "learning_rate": 3.437400168794094e-06, + "loss": 2.0649, + "step": 13688 + }, + { + "epoch": 2.78, + "learning_rate": 3.4360969313748714e-06, + "loss": 2.1892, + "step": 13689 + }, + { + "epoch": 2.78, + "learning_rate": 3.4347938898021816e-06, + "loss": 2.1177, + "step": 13690 + }, + { + "epoch": 2.78, + "learning_rate": 3.433491044114907e-06, + "loss": 2.0856, + "step": 13691 + }, + { + "epoch": 2.78, + "learning_rate": 3.4321883943519117e-06, + "loss": 2.0956, + "step": 13692 + }, + { + "epoch": 2.78, + "learning_rate": 3.430885940552077e-06, + "loss": 2.1477, + "step": 13693 + }, + { + "epoch": 2.78, + "learning_rate": 3.4295836827542516e-06, + "loss": 2.1699, + "step": 13694 + }, + { + "epoch": 2.78, + "learning_rate": 3.428281620997296e-06, + "loss": 2.1303, + "step": 13695 + }, + { + "epoch": 2.78, + "learning_rate": 3.4269797553200636e-06, + "loss": 2.0516, + "step": 13696 + }, + { + "epoch": 2.78, + "learning_rate": 3.425678085761389e-06, + "loss": 2.0748, + "step": 13697 + }, + { + "epoch": 2.78, + "learning_rate": 3.424376612360116e-06, + "loss": 2.0987, + "step": 13698 + }, + { + "epoch": 2.78, + "learning_rate": 3.423075335155075e-06, + "loss": 2.2035, + "step": 13699 + }, + { + "epoch": 2.78, + "learning_rate": 3.421774254185096e-06, + "loss": 2.1348, + "step": 13700 + }, + { + "epoch": 2.78, + "learning_rate": 3.4204733694889925e-06, + "loss": 2.2063, + "step": 13701 + }, + { + "epoch": 2.78, + "learning_rate": 3.4191726811055815e-06, + "loss": 2.1042, + "step": 13702 + }, + { + "epoch": 2.78, + "learning_rate": 3.417872189073672e-06, + "loss": 2.1754, + "step": 13703 + }, + { + "epoch": 2.78, + "learning_rate": 3.4165718934320703e-06, + "loss": 2.197, + "step": 13704 + }, + { + "epoch": 2.78, + "learning_rate": 3.415271794219566e-06, + "loss": 2.0081, + "step": 13705 + }, + { + "epoch": 2.78, + "learning_rate": 3.4139718914749564e-06, + "loss": 2.1115, + "step": 13706 + }, + { + "epoch": 2.78, + "learning_rate": 3.412672185237018e-06, + "loss": 2.1901, + "step": 13707 + }, + { + "epoch": 2.78, + "learning_rate": 3.411372675544542e-06, + "loss": 2.0405, + "step": 13708 + }, + { + "epoch": 2.78, + "learning_rate": 3.4100733624362926e-06, + "loss": 2.0509, + "step": 13709 + }, + { + "epoch": 2.78, + "learning_rate": 3.408774245951043e-06, + "loss": 2.1259, + "step": 13710 + }, + { + "epoch": 2.78, + "learning_rate": 3.4074753261275484e-06, + "loss": 2.1052, + "step": 13711 + }, + { + "epoch": 2.78, + "learning_rate": 3.4061766030045694e-06, + "loss": 2.0961, + "step": 13712 + }, + { + "epoch": 2.78, + "learning_rate": 3.4048780766208534e-06, + "loss": 2.1972, + "step": 13713 + }, + { + "epoch": 2.78, + "learning_rate": 3.40357974701515e-06, + "loss": 2.1284, + "step": 13714 + }, + { + "epoch": 2.78, + "learning_rate": 3.4022816142261883e-06, + "loss": 2.0756, + "step": 13715 + }, + { + "epoch": 2.78, + "learning_rate": 3.4009836782927062e-06, + "loss": 2.1332, + "step": 13716 + }, + { + "epoch": 2.78, + "learning_rate": 3.3996859392534286e-06, + "loss": 2.1024, + "step": 13717 + }, + { + "epoch": 2.78, + "learning_rate": 3.3983883971470797e-06, + "loss": 2.1385, + "step": 13718 + }, + { + "epoch": 2.78, + "learning_rate": 3.397091052012369e-06, + "loss": 2.1549, + "step": 13719 + }, + { + "epoch": 2.78, + "learning_rate": 3.3957939038880062e-06, + "loss": 2.1357, + "step": 13720 + }, + { + "epoch": 2.79, + "learning_rate": 3.3944969528126983e-06, + "loss": 2.0551, + "step": 13721 + }, + { + "epoch": 2.79, + "learning_rate": 3.3932001988251364e-06, + "loss": 2.1117, + "step": 13722 + }, + { + "epoch": 2.79, + "learning_rate": 3.3919036419640138e-06, + "loss": 2.1348, + "step": 13723 + }, + { + "epoch": 2.79, + "learning_rate": 3.390607282268017e-06, + "loss": 2.1433, + "step": 13724 + }, + { + "epoch": 2.79, + "learning_rate": 3.3893111197758276e-06, + "loss": 2.1364, + "step": 13725 + }, + { + "epoch": 2.79, + "learning_rate": 3.388015154526113e-06, + "loss": 2.13, + "step": 13726 + }, + { + "epoch": 2.79, + "learning_rate": 3.386719386557544e-06, + "loss": 2.1349, + "step": 13727 + }, + { + "epoch": 2.79, + "learning_rate": 3.385423815908783e-06, + "loss": 2.1319, + "step": 13728 + }, + { + "epoch": 2.79, + "learning_rate": 3.384128442618488e-06, + "loss": 2.1385, + "step": 13729 + }, + { + "epoch": 2.79, + "learning_rate": 3.3828332667253027e-06, + "loss": 2.1161, + "step": 13730 + }, + { + "epoch": 2.79, + "learning_rate": 3.3815382882678773e-06, + "loss": 2.1516, + "step": 13731 + }, + { + "epoch": 2.79, + "learning_rate": 3.3802435072848405e-06, + "loss": 2.0019, + "step": 13732 + }, + { + "epoch": 2.79, + "learning_rate": 3.3789489238148386e-06, + "loss": 2.1155, + "step": 13733 + }, + { + "epoch": 2.79, + "learning_rate": 3.3776545378964866e-06, + "loss": 2.0855, + "step": 13734 + }, + { + "epoch": 2.79, + "learning_rate": 3.376360349568413e-06, + "loss": 2.0862, + "step": 13735 + }, + { + "epoch": 2.79, + "learning_rate": 3.3750663588692233e-06, + "loss": 2.1585, + "step": 13736 + }, + { + "epoch": 2.79, + "learning_rate": 3.373772565837532e-06, + "loss": 2.0993, + "step": 13737 + }, + { + "epoch": 2.79, + "learning_rate": 3.372478970511941e-06, + "loss": 2.1161, + "step": 13738 + }, + { + "epoch": 2.79, + "learning_rate": 3.3711855729310516e-06, + "loss": 2.1063, + "step": 13739 + }, + { + "epoch": 2.79, + "learning_rate": 3.3698923731334453e-06, + "loss": 2.1017, + "step": 13740 + }, + { + "epoch": 2.79, + "learning_rate": 3.3685993711577135e-06, + "loss": 2.1087, + "step": 13741 + }, + { + "epoch": 2.79, + "learning_rate": 3.367306567042433e-06, + "loss": 2.097, + "step": 13742 + }, + { + "epoch": 2.79, + "learning_rate": 3.3660139608261822e-06, + "loss": 2.055, + "step": 13743 + }, + { + "epoch": 2.79, + "learning_rate": 3.3647215525475217e-06, + "loss": 2.1504, + "step": 13744 + }, + { + "epoch": 2.79, + "learning_rate": 3.3634293422450146e-06, + "loss": 2.0636, + "step": 13745 + }, + { + "epoch": 2.79, + "learning_rate": 3.3621373299572215e-06, + "loss": 2.0344, + "step": 13746 + }, + { + "epoch": 2.79, + "learning_rate": 3.3608455157226814e-06, + "loss": 2.1077, + "step": 13747 + }, + { + "epoch": 2.79, + "learning_rate": 3.3595538995799513e-06, + "loss": 2.0599, + "step": 13748 + }, + { + "epoch": 2.79, + "learning_rate": 3.358262481567559e-06, + "loss": 2.0567, + "step": 13749 + }, + { + "epoch": 2.79, + "learning_rate": 3.3569712617240434e-06, + "loss": 2.1996, + "step": 13750 + }, + { + "epoch": 2.79, + "learning_rate": 3.355680240087924e-06, + "loss": 2.138, + "step": 13751 + }, + { + "epoch": 2.79, + "learning_rate": 3.354389416697724e-06, + "loss": 2.1611, + "step": 13752 + }, + { + "epoch": 2.79, + "learning_rate": 3.3530987915919567e-06, + "loss": 2.2199, + "step": 13753 + }, + { + "epoch": 2.79, + "learning_rate": 3.3518083648091347e-06, + "loss": 2.0734, + "step": 13754 + }, + { + "epoch": 2.79, + "learning_rate": 3.3505181363877536e-06, + "loss": 2.0841, + "step": 13755 + }, + { + "epoch": 2.79, + "learning_rate": 3.3492281063663123e-06, + "loss": 2.1152, + "step": 13756 + }, + { + "epoch": 2.79, + "learning_rate": 3.347938274783302e-06, + "loss": 2.085, + "step": 13757 + }, + { + "epoch": 2.79, + "learning_rate": 3.3466486416772113e-06, + "loss": 2.1269, + "step": 13758 + }, + { + "epoch": 2.79, + "learning_rate": 3.3453592070865114e-06, + "loss": 2.1096, + "step": 13759 + }, + { + "epoch": 2.79, + "learning_rate": 3.3440699710496784e-06, + "loss": 2.1625, + "step": 13760 + }, + { + "epoch": 2.79, + "learning_rate": 3.342780933605182e-06, + "loss": 2.0742, + "step": 13761 + }, + { + "epoch": 2.79, + "learning_rate": 3.3414920947914773e-06, + "loss": 2.0904, + "step": 13762 + }, + { + "epoch": 2.79, + "learning_rate": 3.340203454647022e-06, + "loss": 2.0704, + "step": 13763 + }, + { + "epoch": 2.79, + "learning_rate": 3.3389150132102654e-06, + "loss": 2.1312, + "step": 13764 + }, + { + "epoch": 2.79, + "learning_rate": 3.337626770519653e-06, + "loss": 2.2001, + "step": 13765 + }, + { + "epoch": 2.79, + "learning_rate": 3.336338726613617e-06, + "loss": 2.0637, + "step": 13766 + }, + { + "epoch": 2.79, + "learning_rate": 3.3350508815305917e-06, + "loss": 2.0299, + "step": 13767 + }, + { + "epoch": 2.79, + "learning_rate": 3.333763235309001e-06, + "loss": 2.1226, + "step": 13768 + }, + { + "epoch": 2.79, + "learning_rate": 3.3324757879872684e-06, + "loss": 2.1947, + "step": 13769 + }, + { + "epoch": 2.8, + "learning_rate": 3.3311885396038002e-06, + "loss": 2.2071, + "step": 13770 + }, + { + "epoch": 2.8, + "learning_rate": 3.3299014901970116e-06, + "loss": 2.1402, + "step": 13771 + }, + { + "epoch": 2.8, + "learning_rate": 3.328614639805294e-06, + "loss": 2.1312, + "step": 13772 + }, + { + "epoch": 2.8, + "learning_rate": 3.3273279884670563e-06, + "loss": 2.1469, + "step": 13773 + }, + { + "epoch": 2.8, + "learning_rate": 3.326041536220678e-06, + "loss": 2.0725, + "step": 13774 + }, + { + "epoch": 2.8, + "learning_rate": 3.324755283104548e-06, + "loss": 2.0565, + "step": 13775 + }, + { + "epoch": 2.8, + "learning_rate": 3.3234692291570358e-06, + "loss": 2.0531, + "step": 13776 + }, + { + "epoch": 2.8, + "learning_rate": 3.3221833744165278e-06, + "loss": 2.044, + "step": 13777 + }, + { + "epoch": 2.8, + "learning_rate": 3.3208977189213777e-06, + "loss": 2.1939, + "step": 13778 + }, + { + "epoch": 2.8, + "learning_rate": 3.3196122627099527e-06, + "loss": 2.1712, + "step": 13779 + }, + { + "epoch": 2.8, + "learning_rate": 3.3183270058205997e-06, + "loss": 2.0566, + "step": 13780 + }, + { + "epoch": 2.8, + "learning_rate": 3.3170419482916714e-06, + "loss": 2.132, + "step": 13781 + }, + { + "epoch": 2.8, + "learning_rate": 3.315757090161509e-06, + "loss": 2.068, + "step": 13782 + }, + { + "epoch": 2.8, + "learning_rate": 3.3144724314684527e-06, + "loss": 2.0833, + "step": 13783 + }, + { + "epoch": 2.8, + "learning_rate": 3.3131879722508254e-06, + "loss": 2.1094, + "step": 13784 + }, + { + "epoch": 2.8, + "learning_rate": 3.3119037125469553e-06, + "loss": 2.1111, + "step": 13785 + }, + { + "epoch": 2.8, + "learning_rate": 3.3106196523951606e-06, + "loss": 2.0251, + "step": 13786 + }, + { + "epoch": 2.8, + "learning_rate": 3.3093357918337567e-06, + "loss": 2.0481, + "step": 13787 + }, + { + "epoch": 2.8, + "learning_rate": 3.308052130901044e-06, + "loss": 2.1437, + "step": 13788 + }, + { + "epoch": 2.8, + "learning_rate": 3.306768669635325e-06, + "loss": 2.217, + "step": 13789 + }, + { + "epoch": 2.8, + "learning_rate": 3.3054854080748976e-06, + "loss": 2.0843, + "step": 13790 + }, + { + "epoch": 2.8, + "learning_rate": 3.3042023462580464e-06, + "loss": 2.0817, + "step": 13791 + }, + { + "epoch": 2.8, + "learning_rate": 3.302919484223054e-06, + "loss": 2.0744, + "step": 13792 + }, + { + "epoch": 2.8, + "learning_rate": 3.3016368220081987e-06, + "loss": 2.1399, + "step": 13793 + }, + { + "epoch": 2.8, + "learning_rate": 3.300354359651754e-06, + "loss": 2.0467, + "step": 13794 + }, + { + "epoch": 2.8, + "learning_rate": 3.2990720971919785e-06, + "loss": 2.104, + "step": 13795 + }, + { + "epoch": 2.8, + "learning_rate": 3.2977900346671342e-06, + "loss": 2.1096, + "step": 13796 + }, + { + "epoch": 2.8, + "learning_rate": 3.2965081721154736e-06, + "loss": 2.0542, + "step": 13797 + }, + { + "epoch": 2.8, + "learning_rate": 3.2952265095752456e-06, + "loss": 2.1425, + "step": 13798 + }, + { + "epoch": 2.8, + "learning_rate": 3.2939450470846867e-06, + "loss": 2.1889, + "step": 13799 + }, + { + "epoch": 2.8, + "learning_rate": 3.2926637846820332e-06, + "loss": 2.17, + "step": 13800 + }, + { + "epoch": 2.8, + "learning_rate": 3.291382722405515e-06, + "loss": 2.0674, + "step": 13801 + }, + { + "epoch": 2.8, + "learning_rate": 3.290101860293359e-06, + "loss": 2.1339, + "step": 13802 + }, + { + "epoch": 2.8, + "learning_rate": 3.2888211983837735e-06, + "loss": 2.1012, + "step": 13803 + }, + { + "epoch": 2.8, + "learning_rate": 3.287540736714975e-06, + "loss": 2.1315, + "step": 13804 + }, + { + "epoch": 2.8, + "learning_rate": 3.2862604753251705e-06, + "loss": 2.1559, + "step": 13805 + }, + { + "epoch": 2.8, + "learning_rate": 3.284980414252552e-06, + "loss": 2.0997, + "step": 13806 + }, + { + "epoch": 2.8, + "learning_rate": 3.2837005535353174e-06, + "loss": 2.1496, + "step": 13807 + }, + { + "epoch": 2.8, + "learning_rate": 3.282420893211652e-06, + "loss": 2.1282, + "step": 13808 + }, + { + "epoch": 2.8, + "learning_rate": 3.2811414333197423e-06, + "loss": 2.0584, + "step": 13809 + }, + { + "epoch": 2.8, + "learning_rate": 3.279862173897754e-06, + "loss": 2.1534, + "step": 13810 + }, + { + "epoch": 2.8, + "learning_rate": 3.2785831149838633e-06, + "loss": 2.109, + "step": 13811 + }, + { + "epoch": 2.8, + "learning_rate": 3.2773042566162293e-06, + "loss": 2.0183, + "step": 13812 + }, + { + "epoch": 2.8, + "learning_rate": 3.2760255988330158e-06, + "loss": 2.1527, + "step": 13813 + }, + { + "epoch": 2.8, + "learning_rate": 3.2747471416723654e-06, + "loss": 2.0241, + "step": 13814 + }, + { + "epoch": 2.8, + "learning_rate": 3.2734688851724307e-06, + "loss": 2.103, + "step": 13815 + }, + { + "epoch": 2.8, + "learning_rate": 3.2721908293713412e-06, + "loss": 2.1286, + "step": 13816 + }, + { + "epoch": 2.8, + "learning_rate": 3.270912974307242e-06, + "loss": 2.12, + "step": 13817 + }, + { + "epoch": 2.8, + "learning_rate": 3.269635320018253e-06, + "loss": 2.1094, + "step": 13818 + }, + { + "epoch": 2.81, + "learning_rate": 3.2683578665424985e-06, + "loss": 2.1392, + "step": 13819 + }, + { + "epoch": 2.81, + "learning_rate": 3.26708061391809e-06, + "loss": 2.1104, + "step": 13820 + }, + { + "epoch": 2.81, + "learning_rate": 3.2658035621831397e-06, + "loss": 2.1104, + "step": 13821 + }, + { + "epoch": 2.81, + "learning_rate": 3.2645267113757496e-06, + "loss": 2.0813, + "step": 13822 + }, + { + "epoch": 2.81, + "learning_rate": 3.2632500615340214e-06, + "loss": 2.1664, + "step": 13823 + }, + { + "epoch": 2.81, + "learning_rate": 3.261973612696039e-06, + "loss": 2.0902, + "step": 13824 + }, + { + "epoch": 2.81, + "learning_rate": 3.2606973648998918e-06, + "loss": 2.1318, + "step": 13825 + }, + { + "epoch": 2.81, + "learning_rate": 3.259421318183659e-06, + "loss": 2.2058, + "step": 13826 + }, + { + "epoch": 2.81, + "learning_rate": 3.2581454725854158e-06, + "loss": 2.0429, + "step": 13827 + }, + { + "epoch": 2.81, + "learning_rate": 3.2568698281432243e-06, + "loss": 2.021, + "step": 13828 + }, + { + "epoch": 2.81, + "learning_rate": 3.2555943848951476e-06, + "loss": 2.0801, + "step": 13829 + }, + { + "epoch": 2.81, + "learning_rate": 3.2543191428792466e-06, + "loss": 2.1091, + "step": 13830 + }, + { + "epoch": 2.81, + "learning_rate": 3.2530441021335624e-06, + "loss": 2.0837, + "step": 13831 + }, + { + "epoch": 2.81, + "learning_rate": 3.251769262696143e-06, + "loss": 2.111, + "step": 13832 + }, + { + "epoch": 2.81, + "learning_rate": 3.250494624605023e-06, + "loss": 2.1306, + "step": 13833 + }, + { + "epoch": 2.81, + "learning_rate": 3.249220187898239e-06, + "loss": 2.1379, + "step": 13834 + }, + { + "epoch": 2.81, + "learning_rate": 3.247945952613809e-06, + "loss": 2.1252, + "step": 13835 + }, + { + "epoch": 2.81, + "learning_rate": 3.2466719187897555e-06, + "loss": 2.0667, + "step": 13836 + }, + { + "epoch": 2.81, + "learning_rate": 3.245398086464093e-06, + "loss": 2.1186, + "step": 13837 + }, + { + "epoch": 2.81, + "learning_rate": 3.2441244556748297e-06, + "loss": 2.1331, + "step": 13838 + }, + { + "epoch": 2.81, + "learning_rate": 3.242851026459962e-06, + "loss": 2.1026, + "step": 13839 + }, + { + "epoch": 2.81, + "learning_rate": 3.2415777988574914e-06, + "loss": 2.1009, + "step": 13840 + }, + { + "epoch": 2.81, + "learning_rate": 3.240304772905396e-06, + "loss": 2.142, + "step": 13841 + }, + { + "epoch": 2.81, + "learning_rate": 3.2390319486416733e-06, + "loss": 2.1076, + "step": 13842 + }, + { + "epoch": 2.81, + "learning_rate": 3.2377593261042907e-06, + "loss": 2.2536, + "step": 13843 + }, + { + "epoch": 2.81, + "learning_rate": 3.236486905331225e-06, + "loss": 2.0942, + "step": 13844 + }, + { + "epoch": 2.81, + "learning_rate": 3.235214686360435e-06, + "loss": 2.084, + "step": 13845 + }, + { + "epoch": 2.81, + "learning_rate": 3.2339426692298836e-06, + "loss": 2.1077, + "step": 13846 + }, + { + "epoch": 2.81, + "learning_rate": 3.232670853977523e-06, + "loss": 2.0949, + "step": 13847 + }, + { + "epoch": 2.81, + "learning_rate": 3.231399240641302e-06, + "loss": 2.1522, + "step": 13848 + }, + { + "epoch": 2.81, + "learning_rate": 3.230127829259162e-06, + "loss": 2.1641, + "step": 13849 + }, + { + "epoch": 2.81, + "learning_rate": 3.228856619869034e-06, + "loss": 2.0988, + "step": 13850 + }, + { + "epoch": 2.81, + "learning_rate": 3.227585612508849e-06, + "loss": 2.1975, + "step": 13851 + }, + { + "epoch": 2.81, + "learning_rate": 3.2263148072165295e-06, + "loss": 2.2024, + "step": 13852 + }, + { + "epoch": 2.81, + "learning_rate": 3.2250442040299967e-06, + "loss": 2.12, + "step": 13853 + }, + { + "epoch": 2.81, + "learning_rate": 3.2237738029871546e-06, + "loss": 2.0406, + "step": 13854 + }, + { + "epoch": 2.81, + "learning_rate": 3.222503604125915e-06, + "loss": 2.1108, + "step": 13855 + }, + { + "epoch": 2.81, + "learning_rate": 3.2212336074841664e-06, + "loss": 2.0892, + "step": 13856 + }, + { + "epoch": 2.81, + "learning_rate": 3.2199638130998135e-06, + "loss": 2.1229, + "step": 13857 + }, + { + "epoch": 2.81, + "learning_rate": 3.218694221010735e-06, + "loss": 2.1685, + "step": 13858 + }, + { + "epoch": 2.81, + "learning_rate": 3.2174248312548183e-06, + "loss": 2.1598, + "step": 13859 + }, + { + "epoch": 2.81, + "learning_rate": 3.2161556438699303e-06, + "loss": 2.0989, + "step": 13860 + }, + { + "epoch": 2.81, + "learning_rate": 3.2148866588939433e-06, + "loss": 2.1663, + "step": 13861 + }, + { + "epoch": 2.81, + "learning_rate": 3.2136178763647185e-06, + "loss": 2.1921, + "step": 13862 + }, + { + "epoch": 2.81, + "learning_rate": 3.2123492963201186e-06, + "loss": 2.1512, + "step": 13863 + }, + { + "epoch": 2.81, + "learning_rate": 3.2110809187979864e-06, + "loss": 2.1159, + "step": 13864 + }, + { + "epoch": 2.81, + "learning_rate": 3.209812743836168e-06, + "loss": 2.0869, + "step": 13865 + }, + { + "epoch": 2.81, + "learning_rate": 3.208544771472504e-06, + "loss": 2.0783, + "step": 13866 + }, + { + "epoch": 2.81, + "learning_rate": 3.2072770017448283e-06, + "loss": 2.1866, + "step": 13867 + }, + { + "epoch": 2.81, + "learning_rate": 3.206009434690962e-06, + "loss": 2.1451, + "step": 13868 + }, + { + "epoch": 2.82, + "learning_rate": 3.2047420703487285e-06, + "loss": 2.1002, + "step": 13869 + }, + { + "epoch": 2.82, + "learning_rate": 3.2034749087559413e-06, + "loss": 2.1206, + "step": 13870 + }, + { + "epoch": 2.82, + "learning_rate": 3.202207949950412e-06, + "loss": 2.1347, + "step": 13871 + }, + { + "epoch": 2.82, + "learning_rate": 3.2009411939699374e-06, + "loss": 2.1035, + "step": 13872 + }, + { + "epoch": 2.82, + "learning_rate": 3.199674640852315e-06, + "loss": 2.1309, + "step": 13873 + }, + { + "epoch": 2.82, + "learning_rate": 3.1984082906353386e-06, + "loss": 2.1251, + "step": 13874 + }, + { + "epoch": 2.82, + "learning_rate": 3.197142143356787e-06, + "loss": 2.0686, + "step": 13875 + }, + { + "epoch": 2.82, + "learning_rate": 3.1958761990544396e-06, + "loss": 2.1188, + "step": 13876 + }, + { + "epoch": 2.82, + "learning_rate": 3.1946104577660697e-06, + "loss": 2.1585, + "step": 13877 + }, + { + "epoch": 2.82, + "learning_rate": 3.193344919529445e-06, + "loss": 2.1563, + "step": 13878 + }, + { + "epoch": 2.82, + "learning_rate": 3.1920795843823206e-06, + "loss": 2.1507, + "step": 13879 + }, + { + "epoch": 2.82, + "learning_rate": 3.190814452362451e-06, + "loss": 2.1584, + "step": 13880 + }, + { + "epoch": 2.82, + "learning_rate": 3.1895495235075856e-06, + "loss": 2.1085, + "step": 13881 + }, + { + "epoch": 2.82, + "learning_rate": 3.188284797855469e-06, + "loss": 2.0533, + "step": 13882 + }, + { + "epoch": 2.82, + "learning_rate": 3.1870202754438284e-06, + "loss": 2.1396, + "step": 13883 + }, + { + "epoch": 2.82, + "learning_rate": 3.1857559563104024e-06, + "loss": 2.0726, + "step": 13884 + }, + { + "epoch": 2.82, + "learning_rate": 3.1844918404929035e-06, + "loss": 2.1611, + "step": 13885 + }, + { + "epoch": 2.82, + "learning_rate": 3.1832279280290612e-06, + "loss": 2.1343, + "step": 13886 + }, + { + "epoch": 2.82, + "learning_rate": 3.1819642189565782e-06, + "loss": 2.0839, + "step": 13887 + }, + { + "epoch": 2.82, + "learning_rate": 3.1807007133131653e-06, + "loss": 2.1624, + "step": 13888 + }, + { + "epoch": 2.82, + "learning_rate": 3.1794374111365157e-06, + "loss": 2.1481, + "step": 13889 + }, + { + "epoch": 2.82, + "learning_rate": 3.178174312464326e-06, + "loss": 2.1155, + "step": 13890 + }, + { + "epoch": 2.82, + "learning_rate": 3.176911417334282e-06, + "loss": 2.1405, + "step": 13891 + }, + { + "epoch": 2.82, + "learning_rate": 3.1756487257840685e-06, + "loss": 2.0286, + "step": 13892 + }, + { + "epoch": 2.82, + "learning_rate": 3.1743862378513537e-06, + "loss": 2.1735, + "step": 13893 + }, + { + "epoch": 2.82, + "learning_rate": 3.173123953573809e-06, + "loss": 2.1061, + "step": 13894 + }, + { + "epoch": 2.82, + "learning_rate": 3.171861872989098e-06, + "loss": 2.0594, + "step": 13895 + }, + { + "epoch": 2.82, + "learning_rate": 3.170599996134881e-06, + "loss": 2.1289, + "step": 13896 + }, + { + "epoch": 2.82, + "learning_rate": 3.1693383230488006e-06, + "loss": 2.1289, + "step": 13897 + }, + { + "epoch": 2.82, + "learning_rate": 3.1680768537685045e-06, + "loss": 2.1788, + "step": 13898 + }, + { + "epoch": 2.82, + "learning_rate": 3.1668155883316355e-06, + "loss": 2.0757, + "step": 13899 + }, + { + "epoch": 2.82, + "learning_rate": 3.165554526775816e-06, + "loss": 2.0868, + "step": 13900 + }, + { + "epoch": 2.82, + "learning_rate": 3.164293669138684e-06, + "loss": 2.1386, + "step": 13901 + }, + { + "epoch": 2.82, + "learning_rate": 3.1630330154578493e-06, + "loss": 2.1865, + "step": 13902 + }, + { + "epoch": 2.82, + "learning_rate": 3.1617725657709354e-06, + "loss": 2.1071, + "step": 13903 + }, + { + "epoch": 2.82, + "learning_rate": 3.1605123201155407e-06, + "loss": 2.1184, + "step": 13904 + }, + { + "epoch": 2.82, + "learning_rate": 3.1592522785292714e-06, + "loss": 2.1593, + "step": 13905 + }, + { + "epoch": 2.82, + "learning_rate": 3.157992441049723e-06, + "loss": 2.1351, + "step": 13906 + }, + { + "epoch": 2.82, + "learning_rate": 3.15673280771449e-06, + "loss": 2.0895, + "step": 13907 + }, + { + "epoch": 2.82, + "learning_rate": 3.155473378561147e-06, + "loss": 2.109, + "step": 13908 + }, + { + "epoch": 2.82, + "learning_rate": 3.1542141536272764e-06, + "loss": 2.1033, + "step": 13909 + }, + { + "epoch": 2.82, + "learning_rate": 3.152955132950448e-06, + "loss": 2.1486, + "step": 13910 + }, + { + "epoch": 2.82, + "learning_rate": 3.1516963165682325e-06, + "loss": 2.0737, + "step": 13911 + }, + { + "epoch": 2.82, + "learning_rate": 3.1504377045181812e-06, + "loss": 2.1259, + "step": 13912 + }, + { + "epoch": 2.82, + "learning_rate": 3.14917929683785e-06, + "loss": 2.1692, + "step": 13913 + }, + { + "epoch": 2.82, + "learning_rate": 3.1479210935647907e-06, + "loss": 2.1064, + "step": 13914 + }, + { + "epoch": 2.82, + "learning_rate": 3.1466630947365363e-06, + "loss": 2.1906, + "step": 13915 + }, + { + "epoch": 2.82, + "learning_rate": 3.145405300390625e-06, + "loss": 2.0086, + "step": 13916 + }, + { + "epoch": 2.82, + "learning_rate": 3.1441477105645857e-06, + "loss": 2.1152, + "step": 13917 + }, + { + "epoch": 2.83, + "learning_rate": 3.1428903252959452e-06, + "loss": 2.1319, + "step": 13918 + }, + { + "epoch": 2.83, + "learning_rate": 3.1416331446222127e-06, + "loss": 2.1246, + "step": 13919 + }, + { + "epoch": 2.83, + "learning_rate": 3.1403761685809007e-06, + "loss": 2.0626, + "step": 13920 + }, + { + "epoch": 2.83, + "learning_rate": 3.1391193972095145e-06, + "loss": 2.1206, + "step": 13921 + }, + { + "epoch": 2.83, + "learning_rate": 3.1378628305455562e-06, + "loss": 2.0383, + "step": 13922 + }, + { + "epoch": 2.83, + "learning_rate": 3.136606468626511e-06, + "loss": 2.1176, + "step": 13923 + }, + { + "epoch": 2.83, + "learning_rate": 3.1353503114898696e-06, + "loss": 2.1364, + "step": 13924 + }, + { + "epoch": 2.83, + "learning_rate": 3.1340943591731045e-06, + "loss": 2.0686, + "step": 13925 + }, + { + "epoch": 2.83, + "learning_rate": 3.132838611713701e-06, + "loss": 2.089, + "step": 13926 + }, + { + "epoch": 2.83, + "learning_rate": 3.131583069149119e-06, + "loss": 2.1543, + "step": 13927 + }, + { + "epoch": 2.83, + "learning_rate": 3.1303277315168233e-06, + "loss": 2.1281, + "step": 13928 + }, + { + "epoch": 2.83, + "learning_rate": 3.1290725988542647e-06, + "loss": 2.0624, + "step": 13929 + }, + { + "epoch": 2.83, + "learning_rate": 3.127817671198895e-06, + "loss": 2.1347, + "step": 13930 + }, + { + "epoch": 2.83, + "learning_rate": 3.1265629485881586e-06, + "loss": 2.0824, + "step": 13931 + }, + { + "epoch": 2.83, + "learning_rate": 3.125308431059494e-06, + "loss": 2.1526, + "step": 13932 + }, + { + "epoch": 2.83, + "learning_rate": 3.124054118650327e-06, + "loss": 2.1639, + "step": 13933 + }, + { + "epoch": 2.83, + "learning_rate": 3.122800011398086e-06, + "loss": 2.138, + "step": 13934 + }, + { + "epoch": 2.83, + "learning_rate": 3.121546109340188e-06, + "loss": 2.145, + "step": 13935 + }, + { + "epoch": 2.83, + "learning_rate": 3.12029241251405e-06, + "loss": 2.1407, + "step": 13936 + }, + { + "epoch": 2.83, + "learning_rate": 3.1190389209570717e-06, + "loss": 2.1167, + "step": 13937 + }, + { + "epoch": 2.83, + "learning_rate": 3.117785634706656e-06, + "loss": 2.0882, + "step": 13938 + }, + { + "epoch": 2.83, + "learning_rate": 3.116532553800202e-06, + "loss": 2.1662, + "step": 13939 + }, + { + "epoch": 2.83, + "learning_rate": 3.1152796782750894e-06, + "loss": 2.1818, + "step": 13940 + }, + { + "epoch": 2.83, + "learning_rate": 3.114027008168704e-06, + "loss": 2.1341, + "step": 13941 + }, + { + "epoch": 2.83, + "learning_rate": 3.112774543518422e-06, + "loss": 2.1653, + "step": 13942 + }, + { + "epoch": 2.83, + "learning_rate": 3.1115222843616167e-06, + "loss": 2.0471, + "step": 13943 + }, + { + "epoch": 2.83, + "learning_rate": 3.1102702307356438e-06, + "loss": 2.1184, + "step": 13944 + }, + { + "epoch": 2.83, + "learning_rate": 3.109018382677864e-06, + "loss": 2.0817, + "step": 13945 + }, + { + "epoch": 2.83, + "learning_rate": 3.1077667402256294e-06, + "loss": 2.1386, + "step": 13946 + }, + { + "epoch": 2.83, + "learning_rate": 3.10651530341629e-06, + "loss": 2.1313, + "step": 13947 + }, + { + "epoch": 2.83, + "learning_rate": 3.105264072287174e-06, + "loss": 2.1025, + "step": 13948 + }, + { + "epoch": 2.83, + "learning_rate": 3.1040130468756245e-06, + "loss": 2.1427, + "step": 13949 + }, + { + "epoch": 2.83, + "learning_rate": 3.1027622272189572e-06, + "loss": 2.1608, + "step": 13950 + }, + { + "epoch": 2.83, + "learning_rate": 3.1015116133545053e-06, + "loss": 2.1219, + "step": 13951 + }, + { + "epoch": 2.83, + "learning_rate": 3.1002612053195745e-06, + "loss": 2.0692, + "step": 13952 + }, + { + "epoch": 2.83, + "learning_rate": 3.0990110031514754e-06, + "loss": 2.162, + "step": 13953 + }, + { + "epoch": 2.83, + "learning_rate": 3.097761006887514e-06, + "loss": 2.1086, + "step": 13954 + }, + { + "epoch": 2.83, + "learning_rate": 3.096511216564979e-06, + "loss": 2.1138, + "step": 13955 + }, + { + "epoch": 2.83, + "learning_rate": 3.095261632221165e-06, + "loss": 2.0849, + "step": 13956 + }, + { + "epoch": 2.83, + "learning_rate": 3.0940122538933536e-06, + "loss": 2.1006, + "step": 13957 + }, + { + "epoch": 2.83, + "learning_rate": 3.092763081618828e-06, + "loss": 2.1645, + "step": 13958 + }, + { + "epoch": 2.83, + "learning_rate": 3.0915141154348515e-06, + "loss": 2.1795, + "step": 13959 + }, + { + "epoch": 2.83, + "learning_rate": 3.0902653553786933e-06, + "loss": 2.1212, + "step": 13960 + }, + { + "epoch": 2.83, + "learning_rate": 3.089016801487612e-06, + "loss": 2.1334, + "step": 13961 + }, + { + "epoch": 2.83, + "learning_rate": 3.087768453798864e-06, + "loss": 2.1279, + "step": 13962 + }, + { + "epoch": 2.83, + "learning_rate": 3.08652031234969e-06, + "loss": 2.1012, + "step": 13963 + }, + { + "epoch": 2.83, + "learning_rate": 3.085272377177334e-06, + "loss": 2.131, + "step": 13964 + }, + { + "epoch": 2.83, + "learning_rate": 3.0840246483190304e-06, + "loss": 2.066, + "step": 13965 + }, + { + "epoch": 2.83, + "learning_rate": 3.0827771258120097e-06, + "loss": 2.1505, + "step": 13966 + }, + { + "epoch": 2.84, + "learning_rate": 3.0815298096934897e-06, + "loss": 2.117, + "step": 13967 + }, + { + "epoch": 2.84, + "learning_rate": 3.0802827000006906e-06, + "loss": 2.1, + "step": 13968 + }, + { + "epoch": 2.84, + "learning_rate": 3.079035796770815e-06, + "loss": 2.1012, + "step": 13969 + }, + { + "epoch": 2.84, + "learning_rate": 3.0777891000410775e-06, + "loss": 2.0535, + "step": 13970 + }, + { + "epoch": 2.84, + "learning_rate": 3.0765426098486673e-06, + "loss": 2.1335, + "step": 13971 + }, + { + "epoch": 2.84, + "learning_rate": 3.075296326230782e-06, + "loss": 2.1268, + "step": 13972 + }, + { + "epoch": 2.84, + "learning_rate": 3.0740502492246003e-06, + "loss": 2.0807, + "step": 13973 + }, + { + "epoch": 2.84, + "learning_rate": 3.0728043788673033e-06, + "loss": 2.0823, + "step": 13974 + }, + { + "epoch": 2.84, + "learning_rate": 3.071558715196066e-06, + "loss": 2.0823, + "step": 13975 + }, + { + "epoch": 2.84, + "learning_rate": 3.0703132582480567e-06, + "loss": 2.095, + "step": 13976 + }, + { + "epoch": 2.84, + "learning_rate": 3.0690680080604307e-06, + "loss": 2.069, + "step": 13977 + }, + { + "epoch": 2.84, + "learning_rate": 3.0678229646703452e-06, + "loss": 2.0787, + "step": 13978 + }, + { + "epoch": 2.84, + "learning_rate": 3.0665781281149477e-06, + "loss": 2.1171, + "step": 13979 + }, + { + "epoch": 2.84, + "learning_rate": 3.0653334984313844e-06, + "loss": 2.09, + "step": 13980 + }, + { + "epoch": 2.84, + "learning_rate": 3.0640890756567854e-06, + "loss": 2.1315, + "step": 13981 + }, + { + "epoch": 2.84, + "learning_rate": 3.0628448598282812e-06, + "loss": 2.125, + "step": 13982 + }, + { + "epoch": 2.84, + "learning_rate": 3.0616008509830007e-06, + "loss": 2.1491, + "step": 13983 + }, + { + "epoch": 2.84, + "learning_rate": 3.0603570491580546e-06, + "loss": 2.1273, + "step": 13984 + }, + { + "epoch": 2.84, + "learning_rate": 3.0591134543905567e-06, + "loss": 2.1409, + "step": 13985 + }, + { + "epoch": 2.84, + "learning_rate": 3.057870066717613e-06, + "loss": 2.1298, + "step": 13986 + }, + { + "epoch": 2.84, + "learning_rate": 3.056626886176324e-06, + "loss": 2.1185, + "step": 13987 + }, + { + "epoch": 2.84, + "learning_rate": 3.0553839128037768e-06, + "loss": 2.0595, + "step": 13988 + }, + { + "epoch": 2.84, + "learning_rate": 3.0541411466370608e-06, + "loss": 2.1259, + "step": 13989 + }, + { + "epoch": 2.84, + "learning_rate": 3.0528985877132565e-06, + "loss": 2.1396, + "step": 13990 + }, + { + "epoch": 2.84, + "learning_rate": 3.0516562360694423e-06, + "loss": 2.0697, + "step": 13991 + }, + { + "epoch": 2.84, + "learning_rate": 3.0504140917426785e-06, + "loss": 2.2028, + "step": 13992 + }, + { + "epoch": 2.84, + "learning_rate": 3.0491721547700325e-06, + "loss": 2.1911, + "step": 13993 + }, + { + "epoch": 2.84, + "learning_rate": 3.047930425188551e-06, + "loss": 2.1169, + "step": 13994 + }, + { + "epoch": 2.84, + "learning_rate": 3.0466889030352976e-06, + "loss": 2.0585, + "step": 13995 + }, + { + "epoch": 2.84, + "learning_rate": 3.0454475883473033e-06, + "loss": 2.0636, + "step": 13996 + }, + { + "epoch": 2.84, + "learning_rate": 3.0442064811616136e-06, + "loss": 2.0873, + "step": 13997 + }, + { + "epoch": 2.84, + "learning_rate": 3.0429655815152513e-06, + "loss": 2.1534, + "step": 13998 + }, + { + "epoch": 2.84, + "learning_rate": 3.0417248894452454e-06, + "loss": 2.1259, + "step": 13999 + }, + { + "epoch": 2.84, + "learning_rate": 3.0404844049886138e-06, + "loss": 2.1111, + "step": 14000 + }, + { + "epoch": 2.84, + "learning_rate": 3.039244128182369e-06, + "loss": 2.1023, + "step": 14001 + }, + { + "epoch": 2.84, + "learning_rate": 3.0380040590635207e-06, + "loss": 2.1418, + "step": 14002 + }, + { + "epoch": 2.84, + "learning_rate": 3.036764197669061e-06, + "loss": 2.152, + "step": 14003 + }, + { + "epoch": 2.84, + "learning_rate": 3.035524544035987e-06, + "loss": 2.0877, + "step": 14004 + }, + { + "epoch": 2.84, + "learning_rate": 3.034285098201286e-06, + "loss": 2.0906, + "step": 14005 + }, + { + "epoch": 2.84, + "learning_rate": 3.033045860201944e-06, + "loss": 2.0629, + "step": 14006 + }, + { + "epoch": 2.84, + "learning_rate": 3.031806830074928e-06, + "loss": 2.1067, + "step": 14007 + }, + { + "epoch": 2.84, + "learning_rate": 3.0305680078572142e-06, + "loss": 2.0815, + "step": 14008 + }, + { + "epoch": 2.84, + "learning_rate": 3.0293293935857548e-06, + "loss": 2.1712, + "step": 14009 + }, + { + "epoch": 2.84, + "learning_rate": 3.0280909872975194e-06, + "loss": 2.13, + "step": 14010 + }, + { + "epoch": 2.84, + "learning_rate": 3.0268527890294487e-06, + "loss": 2.1178, + "step": 14011 + }, + { + "epoch": 2.84, + "learning_rate": 3.0256147988184934e-06, + "loss": 2.1137, + "step": 14012 + }, + { + "epoch": 2.84, + "learning_rate": 3.024377016701584e-06, + "loss": 2.162, + "step": 14013 + }, + { + "epoch": 2.84, + "learning_rate": 3.0231394427156556e-06, + "loss": 2.0506, + "step": 14014 + }, + { + "epoch": 2.84, + "learning_rate": 3.0219020768976336e-06, + "loss": 2.1406, + "step": 14015 + }, + { + "epoch": 2.85, + "learning_rate": 3.0206649192844418e-06, + "loss": 2.0787, + "step": 14016 + }, + { + "epoch": 2.85, + "learning_rate": 3.0194279699129838e-06, + "loss": 2.1588, + "step": 14017 + }, + { + "epoch": 2.85, + "learning_rate": 3.018191228820172e-06, + "loss": 2.2188, + "step": 14018 + }, + { + "epoch": 2.85, + "learning_rate": 3.0169546960429065e-06, + "loss": 2.1342, + "step": 14019 + }, + { + "epoch": 2.85, + "learning_rate": 3.0157183716180838e-06, + "loss": 2.0723, + "step": 14020 + }, + { + "epoch": 2.85, + "learning_rate": 3.014482255582586e-06, + "loss": 2.0887, + "step": 14021 + }, + { + "epoch": 2.85, + "learning_rate": 3.0132463479732988e-06, + "loss": 2.135, + "step": 14022 + }, + { + "epoch": 2.85, + "learning_rate": 3.0120106488271008e-06, + "loss": 2.1204, + "step": 14023 + }, + { + "epoch": 2.85, + "learning_rate": 3.010775158180854e-06, + "loss": 2.1135, + "step": 14024 + }, + { + "epoch": 2.85, + "learning_rate": 3.009539876071427e-06, + "loss": 2.1444, + "step": 14025 + }, + { + "epoch": 2.85, + "learning_rate": 3.008304802535674e-06, + "loss": 2.0707, + "step": 14026 + }, + { + "epoch": 2.85, + "learning_rate": 3.0070699376104516e-06, + "loss": 2.1522, + "step": 14027 + }, + { + "epoch": 2.85, + "learning_rate": 3.0058352813325964e-06, + "loss": 2.1766, + "step": 14028 + }, + { + "epoch": 2.85, + "learning_rate": 3.00460083373895e-06, + "loss": 2.1012, + "step": 14029 + }, + { + "epoch": 2.85, + "learning_rate": 3.003366594866345e-06, + "loss": 2.1057, + "step": 14030 + }, + { + "epoch": 2.85, + "learning_rate": 3.002132564751611e-06, + "loss": 2.0926, + "step": 14031 + }, + { + "epoch": 2.85, + "learning_rate": 3.0008987434315596e-06, + "loss": 2.0621, + "step": 14032 + }, + { + "epoch": 2.85, + "learning_rate": 2.9996651309430113e-06, + "loss": 2.0828, + "step": 14033 + }, + { + "epoch": 2.85, + "learning_rate": 2.998431727322765e-06, + "loss": 2.1355, + "step": 14034 + }, + { + "epoch": 2.85, + "learning_rate": 2.9971985326076327e-06, + "loss": 2.0434, + "step": 14035 + }, + { + "epoch": 2.85, + "learning_rate": 2.9959655468343994e-06, + "loss": 2.159, + "step": 14036 + }, + { + "epoch": 2.85, + "learning_rate": 2.994732770039862e-06, + "loss": 2.1033, + "step": 14037 + }, + { + "epoch": 2.85, + "learning_rate": 2.993500202260795e-06, + "loss": 2.0728, + "step": 14038 + }, + { + "epoch": 2.85, + "learning_rate": 2.992267843533977e-06, + "loss": 2.143, + "step": 14039 + }, + { + "epoch": 2.85, + "learning_rate": 2.9910356938961782e-06, + "loss": 2.1231, + "step": 14040 + }, + { + "epoch": 2.85, + "learning_rate": 2.989803753384166e-06, + "loss": 2.1499, + "step": 14041 + }, + { + "epoch": 2.85, + "learning_rate": 2.9885720220346915e-06, + "loss": 2.1966, + "step": 14042 + }, + { + "epoch": 2.85, + "learning_rate": 2.9873404998845068e-06, + "loss": 2.0535, + "step": 14043 + }, + { + "epoch": 2.85, + "learning_rate": 2.9861091869703594e-06, + "loss": 2.0432, + "step": 14044 + }, + { + "epoch": 2.85, + "learning_rate": 2.984878083328989e-06, + "loss": 2.1703, + "step": 14045 + }, + { + "epoch": 2.85, + "learning_rate": 2.983647188997123e-06, + "loss": 2.1336, + "step": 14046 + }, + { + "epoch": 2.85, + "learning_rate": 2.9824165040114906e-06, + "loss": 2.0663, + "step": 14047 + }, + { + "epoch": 2.85, + "learning_rate": 2.98118602840881e-06, + "loss": 2.1222, + "step": 14048 + }, + { + "epoch": 2.85, + "learning_rate": 2.979955762225799e-06, + "loss": 2.1812, + "step": 14049 + }, + { + "epoch": 2.85, + "learning_rate": 2.978725705499159e-06, + "loss": 2.0254, + "step": 14050 + }, + { + "epoch": 2.85, + "learning_rate": 2.9774958582655943e-06, + "loss": 2.164, + "step": 14051 + }, + { + "epoch": 2.85, + "learning_rate": 2.976266220561802e-06, + "loss": 2.1425, + "step": 14052 + }, + { + "epoch": 2.85, + "learning_rate": 2.9750367924244617e-06, + "loss": 2.1732, + "step": 14053 + }, + { + "epoch": 2.85, + "learning_rate": 2.973807573890268e-06, + "loss": 2.1313, + "step": 14054 + }, + { + "epoch": 2.85, + "learning_rate": 2.9725785649958895e-06, + "loss": 2.0522, + "step": 14055 + }, + { + "epoch": 2.85, + "learning_rate": 2.9713497657779987e-06, + "loss": 2.0644, + "step": 14056 + }, + { + "epoch": 2.85, + "learning_rate": 2.9701211762732553e-06, + "loss": 2.0942, + "step": 14057 + }, + { + "epoch": 2.85, + "learning_rate": 2.96889279651832e-06, + "loss": 2.0996, + "step": 14058 + }, + { + "epoch": 2.85, + "learning_rate": 2.9676646265498423e-06, + "loss": 1.9832, + "step": 14059 + }, + { + "epoch": 2.85, + "learning_rate": 2.966436666404471e-06, + "loss": 2.1579, + "step": 14060 + }, + { + "epoch": 2.85, + "learning_rate": 2.9652089161188393e-06, + "loss": 2.0833, + "step": 14061 + }, + { + "epoch": 2.85, + "learning_rate": 2.9639813757295798e-06, + "loss": 2.0693, + "step": 14062 + }, + { + "epoch": 2.85, + "learning_rate": 2.9627540452733217e-06, + "loss": 2.1028, + "step": 14063 + }, + { + "epoch": 2.85, + "learning_rate": 2.9615269247866863e-06, + "loss": 2.1613, + "step": 14064 + }, + { + "epoch": 2.85, + "learning_rate": 2.960300014306281e-06, + "loss": 2.0753, + "step": 14065 + }, + { + "epoch": 2.86, + "learning_rate": 2.9590733138687155e-06, + "loss": 2.1032, + "step": 14066 + }, + { + "epoch": 2.86, + "learning_rate": 2.957846823510595e-06, + "loss": 2.1441, + "step": 14067 + }, + { + "epoch": 2.86, + "learning_rate": 2.956620543268508e-06, + "loss": 2.1534, + "step": 14068 + }, + { + "epoch": 2.86, + "learning_rate": 2.955394473179044e-06, + "loss": 2.1153, + "step": 14069 + }, + { + "epoch": 2.86, + "learning_rate": 2.9541686132787873e-06, + "loss": 2.107, + "step": 14070 + }, + { + "epoch": 2.86, + "learning_rate": 2.952942963604316e-06, + "loss": 2.0686, + "step": 14071 + }, + { + "epoch": 2.86, + "learning_rate": 2.951717524192195e-06, + "loss": 2.0492, + "step": 14072 + }, + { + "epoch": 2.86, + "learning_rate": 2.950492295078987e-06, + "loss": 2.1588, + "step": 14073 + }, + { + "epoch": 2.86, + "learning_rate": 2.949267276301253e-06, + "loss": 2.1788, + "step": 14074 + }, + { + "epoch": 2.86, + "learning_rate": 2.948042467895544e-06, + "loss": 2.1182, + "step": 14075 + }, + { + "epoch": 2.86, + "learning_rate": 2.9468178698984006e-06, + "loss": 2.1035, + "step": 14076 + }, + { + "epoch": 2.86, + "learning_rate": 2.9455934823463663e-06, + "loss": 2.2158, + "step": 14077 + }, + { + "epoch": 2.86, + "learning_rate": 2.9443693052759626e-06, + "loss": 2.1626, + "step": 14078 + }, + { + "epoch": 2.86, + "learning_rate": 2.9431453387237297e-06, + "loss": 2.0443, + "step": 14079 + }, + { + "epoch": 2.86, + "learning_rate": 2.941921582726176e-06, + "loss": 2.1473, + "step": 14080 + }, + { + "epoch": 2.86, + "learning_rate": 2.9406980373198215e-06, + "loss": 2.102, + "step": 14081 + }, + { + "epoch": 2.86, + "learning_rate": 2.939474702541165e-06, + "loss": 2.169, + "step": 14082 + }, + { + "epoch": 2.86, + "learning_rate": 2.9382515784267142e-06, + "loss": 2.1009, + "step": 14083 + }, + { + "epoch": 2.86, + "learning_rate": 2.937028665012959e-06, + "loss": 2.1075, + "step": 14084 + }, + { + "epoch": 2.86, + "learning_rate": 2.9358059623363934e-06, + "loss": 2.1396, + "step": 14085 + }, + { + "epoch": 2.86, + "learning_rate": 2.934583470433491e-06, + "loss": 2.1436, + "step": 14086 + }, + { + "epoch": 2.86, + "learning_rate": 2.933361189340731e-06, + "loss": 2.1447, + "step": 14087 + }, + { + "epoch": 2.86, + "learning_rate": 2.9321391190945826e-06, + "loss": 2.1245, + "step": 14088 + }, + { + "epoch": 2.86, + "learning_rate": 2.930917259731512e-06, + "loss": 1.9731, + "step": 14089 + }, + { + "epoch": 2.86, + "learning_rate": 2.929695611287969e-06, + "loss": 2.2063, + "step": 14090 + }, + { + "epoch": 2.86, + "learning_rate": 2.9284741738004074e-06, + "loss": 2.2003, + "step": 14091 + }, + { + "epoch": 2.86, + "learning_rate": 2.9272529473052724e-06, + "loss": 2.1032, + "step": 14092 + }, + { + "epoch": 2.86, + "learning_rate": 2.9260319318389983e-06, + "loss": 2.2221, + "step": 14093 + }, + { + "epoch": 2.86, + "learning_rate": 2.9248111274380174e-06, + "loss": 2.0652, + "step": 14094 + }, + { + "epoch": 2.86, + "learning_rate": 2.923590534138756e-06, + "loss": 2.0915, + "step": 14095 + }, + { + "epoch": 2.86, + "learning_rate": 2.922370151977635e-06, + "loss": 2.0975, + "step": 14096 + }, + { + "epoch": 2.86, + "learning_rate": 2.9211499809910602e-06, + "loss": 2.1291, + "step": 14097 + }, + { + "epoch": 2.86, + "learning_rate": 2.9199300212154413e-06, + "loss": 2.1103, + "step": 14098 + }, + { + "epoch": 2.86, + "learning_rate": 2.9187102726871797e-06, + "loss": 2.1285, + "step": 14099 + }, + { + "epoch": 2.86, + "learning_rate": 2.9174907354426696e-06, + "loss": 2.1175, + "step": 14100 + }, + { + "epoch": 2.86, + "learning_rate": 2.9162714095182942e-06, + "loss": 2.143, + "step": 14101 + }, + { + "epoch": 2.86, + "learning_rate": 2.915052294950439e-06, + "loss": 2.0493, + "step": 14102 + }, + { + "epoch": 2.86, + "learning_rate": 2.913833391775469e-06, + "loss": 2.1105, + "step": 14103 + }, + { + "epoch": 2.86, + "learning_rate": 2.9126147000297667e-06, + "loss": 2.092, + "step": 14104 + }, + { + "epoch": 2.86, + "learning_rate": 2.9113962197496836e-06, + "loss": 2.0633, + "step": 14105 + }, + { + "epoch": 2.86, + "learning_rate": 2.910177950971579e-06, + "loss": 2.1253, + "step": 14106 + }, + { + "epoch": 2.86, + "learning_rate": 2.9089598937318044e-06, + "loss": 2.165, + "step": 14107 + }, + { + "epoch": 2.86, + "learning_rate": 2.9077420480666973e-06, + "loss": 2.1183, + "step": 14108 + }, + { + "epoch": 2.86, + "learning_rate": 2.906524414012597e-06, + "loss": 2.1426, + "step": 14109 + }, + { + "epoch": 2.86, + "learning_rate": 2.9053069916058352e-06, + "loss": 2.0699, + "step": 14110 + }, + { + "epoch": 2.86, + "learning_rate": 2.9040897808827384e-06, + "loss": 2.0686, + "step": 14111 + }, + { + "epoch": 2.86, + "learning_rate": 2.9028727818796175e-06, + "loss": 2.1893, + "step": 14112 + }, + { + "epoch": 2.86, + "learning_rate": 2.901655994632788e-06, + "loss": 2.1569, + "step": 14113 + }, + { + "epoch": 2.86, + "learning_rate": 2.9004394191785556e-06, + "loss": 2.1943, + "step": 14114 + }, + { + "epoch": 2.87, + "learning_rate": 2.899223055553221e-06, + "loss": 2.123, + "step": 14115 + }, + { + "epoch": 2.87, + "learning_rate": 2.8980069037930712e-06, + "loss": 2.123, + "step": 14116 + }, + { + "epoch": 2.87, + "learning_rate": 2.8967909639343983e-06, + "loss": 2.0671, + "step": 14117 + }, + { + "epoch": 2.87, + "learning_rate": 2.8955752360134727e-06, + "loss": 2.1229, + "step": 14118 + }, + { + "epoch": 2.87, + "learning_rate": 2.8943597200665808e-06, + "loss": 2.1704, + "step": 14119 + }, + { + "epoch": 2.87, + "learning_rate": 2.8931444161299794e-06, + "loss": 2.0685, + "step": 14120 + }, + { + "epoch": 2.87, + "learning_rate": 2.891929324239937e-06, + "loss": 2.0674, + "step": 14121 + }, + { + "epoch": 2.87, + "learning_rate": 2.890714444432703e-06, + "loss": 2.0307, + "step": 14122 + }, + { + "epoch": 2.87, + "learning_rate": 2.8894997767445255e-06, + "loss": 2.1899, + "step": 14123 + }, + { + "epoch": 2.87, + "learning_rate": 2.888285321211649e-06, + "loss": 2.1265, + "step": 14124 + }, + { + "epoch": 2.87, + "learning_rate": 2.8870710778703105e-06, + "loss": 2.0783, + "step": 14125 + }, + { + "epoch": 2.87, + "learning_rate": 2.885857046756735e-06, + "loss": 2.1142, + "step": 14126 + }, + { + "epoch": 2.87, + "learning_rate": 2.884643227907147e-06, + "loss": 2.1483, + "step": 14127 + }, + { + "epoch": 2.87, + "learning_rate": 2.883429621357764e-06, + "loss": 2.124, + "step": 14128 + }, + { + "epoch": 2.87, + "learning_rate": 2.8822162271447984e-06, + "loss": 2.1724, + "step": 14129 + }, + { + "epoch": 2.87, + "learning_rate": 2.881003045304448e-06, + "loss": 2.12, + "step": 14130 + }, + { + "epoch": 2.87, + "learning_rate": 2.8797900758729145e-06, + "loss": 2.0874, + "step": 14131 + }, + { + "epoch": 2.87, + "learning_rate": 2.878577318886392e-06, + "loss": 2.0698, + "step": 14132 + }, + { + "epoch": 2.87, + "learning_rate": 2.8773647743810586e-06, + "loss": 2.0635, + "step": 14133 + }, + { + "epoch": 2.87, + "learning_rate": 2.8761524423930963e-06, + "loss": 2.0968, + "step": 14134 + }, + { + "epoch": 2.87, + "learning_rate": 2.874940322958678e-06, + "loss": 2.0988, + "step": 14135 + }, + { + "epoch": 2.87, + "learning_rate": 2.873728416113971e-06, + "loss": 2.0989, + "step": 14136 + }, + { + "epoch": 2.87, + "learning_rate": 2.87251672189513e-06, + "loss": 2.1497, + "step": 14137 + }, + { + "epoch": 2.87, + "learning_rate": 2.871305240338311e-06, + "loss": 2.1291, + "step": 14138 + }, + { + "epoch": 2.87, + "learning_rate": 2.8700939714796605e-06, + "loss": 2.0758, + "step": 14139 + }, + { + "epoch": 2.87, + "learning_rate": 2.868882915355322e-06, + "loss": 2.097, + "step": 14140 + }, + { + "epoch": 2.87, + "learning_rate": 2.867672072001425e-06, + "loss": 2.1193, + "step": 14141 + }, + { + "epoch": 2.87, + "learning_rate": 2.866461441454098e-06, + "loss": 2.1693, + "step": 14142 + }, + { + "epoch": 2.87, + "learning_rate": 2.8652510237494647e-06, + "loss": 2.1384, + "step": 14143 + }, + { + "epoch": 2.87, + "learning_rate": 2.8640408189236425e-06, + "loss": 2.1322, + "step": 14144 + }, + { + "epoch": 2.87, + "learning_rate": 2.8628308270127335e-06, + "loss": 2.0256, + "step": 14145 + }, + { + "epoch": 2.87, + "learning_rate": 2.861621048052846e-06, + "loss": 2.1414, + "step": 14146 + }, + { + "epoch": 2.87, + "learning_rate": 2.860411482080069e-06, + "loss": 2.0565, + "step": 14147 + }, + { + "epoch": 2.87, + "learning_rate": 2.8592021291305016e-06, + "loss": 2.1629, + "step": 14148 + }, + { + "epoch": 2.87, + "learning_rate": 2.8579929892402193e-06, + "loss": 2.0881, + "step": 14149 + }, + { + "epoch": 2.87, + "learning_rate": 2.856784062445306e-06, + "loss": 2.0995, + "step": 14150 + }, + { + "epoch": 2.87, + "learning_rate": 2.855575348781825e-06, + "loss": 2.1306, + "step": 14151 + }, + { + "epoch": 2.87, + "learning_rate": 2.8543668482858433e-06, + "loss": 2.1446, + "step": 14152 + }, + { + "epoch": 2.87, + "learning_rate": 2.853158560993421e-06, + "loss": 2.0411, + "step": 14153 + }, + { + "epoch": 2.87, + "learning_rate": 2.85195048694061e-06, + "loss": 2.1111, + "step": 14154 + }, + { + "epoch": 2.87, + "learning_rate": 2.8507426261634506e-06, + "loss": 2.1016, + "step": 14155 + }, + { + "epoch": 2.87, + "learning_rate": 2.8495349786979852e-06, + "loss": 2.1199, + "step": 14156 + }, + { + "epoch": 2.87, + "learning_rate": 2.8483275445802462e-06, + "loss": 2.0813, + "step": 14157 + }, + { + "epoch": 2.87, + "learning_rate": 2.8471203238462587e-06, + "loss": 2.089, + "step": 14158 + }, + { + "epoch": 2.87, + "learning_rate": 2.845913316532046e-06, + "loss": 2.156, + "step": 14159 + }, + { + "epoch": 2.87, + "learning_rate": 2.844706522673616e-06, + "loss": 2.1308, + "step": 14160 + }, + { + "epoch": 2.87, + "learning_rate": 2.843499942306981e-06, + "loss": 2.0632, + "step": 14161 + }, + { + "epoch": 2.87, + "learning_rate": 2.8422935754681326e-06, + "loss": 2.079, + "step": 14162 + }, + { + "epoch": 2.87, + "learning_rate": 2.841087422193076e-06, + "loss": 2.1746, + "step": 14163 + }, + { + "epoch": 2.88, + "learning_rate": 2.8398814825177934e-06, + "loss": 2.1, + "step": 14164 + }, + { + "epoch": 2.88, + "learning_rate": 2.8386757564782684e-06, + "loss": 1.9884, + "step": 14165 + }, + { + "epoch": 2.88, + "learning_rate": 2.837470244110473e-06, + "loss": 2.1178, + "step": 14166 + }, + { + "epoch": 2.88, + "learning_rate": 2.8362649454503776e-06, + "loss": 2.1207, + "step": 14167 + }, + { + "epoch": 2.88, + "learning_rate": 2.835059860533944e-06, + "loss": 2.1579, + "step": 14168 + }, + { + "epoch": 2.88, + "learning_rate": 2.8338549893971333e-06, + "loss": 2.1484, + "step": 14169 + }, + { + "epoch": 2.88, + "learning_rate": 2.832650332075888e-06, + "loss": 2.0295, + "step": 14170 + }, + { + "epoch": 2.88, + "learning_rate": 2.8314458886061534e-06, + "loss": 2.1696, + "step": 14171 + }, + { + "epoch": 2.88, + "learning_rate": 2.830241659023867e-06, + "loss": 2.1395, + "step": 14172 + }, + { + "epoch": 2.88, + "learning_rate": 2.8290376433649626e-06, + "loss": 2.1277, + "step": 14173 + }, + { + "epoch": 2.88, + "learning_rate": 2.8278338416653585e-06, + "loss": 2.0876, + "step": 14174 + }, + { + "epoch": 2.88, + "learning_rate": 2.8266302539609747e-06, + "loss": 2.0635, + "step": 14175 + }, + { + "epoch": 2.88, + "learning_rate": 2.825426880287727e-06, + "loss": 2.1174, + "step": 14176 + }, + { + "epoch": 2.88, + "learning_rate": 2.8242237206815113e-06, + "loss": 2.1778, + "step": 14177 + }, + { + "epoch": 2.88, + "learning_rate": 2.8230207751782322e-06, + "loss": 2.0962, + "step": 14178 + }, + { + "epoch": 2.88, + "learning_rate": 2.821818043813781e-06, + "loss": 2.065, + "step": 14179 + }, + { + "epoch": 2.88, + "learning_rate": 2.8206155266240464e-06, + "loss": 2.1466, + "step": 14180 + }, + { + "epoch": 2.88, + "learning_rate": 2.819413223644901e-06, + "loss": 2.0421, + "step": 14181 + }, + { + "epoch": 2.88, + "learning_rate": 2.818211134912222e-06, + "loss": 2.1814, + "step": 14182 + }, + { + "epoch": 2.88, + "learning_rate": 2.817009260461875e-06, + "loss": 2.1761, + "step": 14183 + }, + { + "epoch": 2.88, + "learning_rate": 2.815807600329724e-06, + "loss": 2.1502, + "step": 14184 + }, + { + "epoch": 2.88, + "learning_rate": 2.8146061545516156e-06, + "loss": 2.067, + "step": 14185 + }, + { + "epoch": 2.88, + "learning_rate": 2.813404923163405e-06, + "loss": 2.1277, + "step": 14186 + }, + { + "epoch": 2.88, + "learning_rate": 2.8122039062009233e-06, + "loss": 2.0669, + "step": 14187 + }, + { + "epoch": 2.88, + "learning_rate": 2.811003103700016e-06, + "loss": 2.1328, + "step": 14188 + }, + { + "epoch": 2.88, + "learning_rate": 2.8098025156965047e-06, + "loss": 2.1934, + "step": 14189 + }, + { + "epoch": 2.88, + "learning_rate": 2.8086021422262154e-06, + "loss": 2.2085, + "step": 14190 + }, + { + "epoch": 2.88, + "learning_rate": 2.8074019833249575e-06, + "loss": 2.1391, + "step": 14191 + }, + { + "epoch": 2.88, + "learning_rate": 2.806202039028545e-06, + "loss": 2.206, + "step": 14192 + }, + { + "epoch": 2.88, + "learning_rate": 2.8050023093727784e-06, + "loss": 2.088, + "step": 14193 + }, + { + "epoch": 2.88, + "learning_rate": 2.8038027943934577e-06, + "loss": 2.0936, + "step": 14194 + }, + { + "epoch": 2.88, + "learning_rate": 2.802603494126367e-06, + "loss": 2.1545, + "step": 14195 + }, + { + "epoch": 2.88, + "learning_rate": 2.8014044086072924e-06, + "loss": 2.0357, + "step": 14196 + }, + { + "epoch": 2.88, + "learning_rate": 2.8002055378720106e-06, + "loss": 2.1351, + "step": 14197 + }, + { + "epoch": 2.88, + "learning_rate": 2.7990068819562956e-06, + "loss": 2.1084, + "step": 14198 + }, + { + "epoch": 2.88, + "learning_rate": 2.797808440895905e-06, + "loss": 2.1272, + "step": 14199 + }, + { + "epoch": 2.88, + "learning_rate": 2.7966102147265993e-06, + "loss": 2.1491, + "step": 14200 + }, + { + "epoch": 2.88, + "learning_rate": 2.795412203484136e-06, + "loss": 2.1416, + "step": 14201 + }, + { + "epoch": 2.88, + "learning_rate": 2.79421440720425e-06, + "loss": 2.0944, + "step": 14202 + }, + { + "epoch": 2.88, + "learning_rate": 2.793016825922684e-06, + "loss": 2.1174, + "step": 14203 + }, + { + "epoch": 2.88, + "learning_rate": 2.7918194596751713e-06, + "loss": 2.2047, + "step": 14204 + }, + { + "epoch": 2.88, + "learning_rate": 2.7906223084974405e-06, + "loss": 2.0966, + "step": 14205 + }, + { + "epoch": 2.88, + "learning_rate": 2.789425372425203e-06, + "loss": 2.1134, + "step": 14206 + }, + { + "epoch": 2.88, + "learning_rate": 2.788228651494176e-06, + "loss": 2.1294, + "step": 14207 + }, + { + "epoch": 2.88, + "learning_rate": 2.787032145740066e-06, + "loss": 2.055, + "step": 14208 + }, + { + "epoch": 2.88, + "learning_rate": 2.7858358551985765e-06, + "loss": 2.1715, + "step": 14209 + }, + { + "epoch": 2.88, + "learning_rate": 2.7846397799053948e-06, + "loss": 2.1142, + "step": 14210 + }, + { + "epoch": 2.88, + "learning_rate": 2.7834439198962105e-06, + "loss": 2.2497, + "step": 14211 + }, + { + "epoch": 2.88, + "learning_rate": 2.7822482752067037e-06, + "loss": 2.1886, + "step": 14212 + }, + { + "epoch": 2.89, + "learning_rate": 2.7810528458725527e-06, + "loss": 2.0946, + "step": 14213 + }, + { + "epoch": 2.89, + "learning_rate": 2.77985763192942e-06, + "loss": 2.1537, + "step": 14214 + }, + { + "epoch": 2.89, + "learning_rate": 2.7786626334129697e-06, + "loss": 2.0983, + "step": 14215 + }, + { + "epoch": 2.89, + "learning_rate": 2.777467850358859e-06, + "loss": 2.0873, + "step": 14216 + }, + { + "epoch": 2.89, + "learning_rate": 2.7762732828027317e-06, + "loss": 2.1285, + "step": 14217 + }, + { + "epoch": 2.89, + "learning_rate": 2.7750789307802316e-06, + "loss": 2.0937, + "step": 14218 + }, + { + "epoch": 2.89, + "learning_rate": 2.7738847943269966e-06, + "loss": 2.2175, + "step": 14219 + }, + { + "epoch": 2.89, + "learning_rate": 2.772690873478656e-06, + "loss": 2.1946, + "step": 14220 + }, + { + "epoch": 2.89, + "learning_rate": 2.7714971682708303e-06, + "loss": 2.074, + "step": 14221 + }, + { + "epoch": 2.89, + "learning_rate": 2.7703036787391357e-06, + "loss": 2.0345, + "step": 14222 + }, + { + "epoch": 2.89, + "learning_rate": 2.769110404919184e-06, + "loss": 2.1664, + "step": 14223 + }, + { + "epoch": 2.89, + "learning_rate": 2.7679173468465813e-06, + "loss": 2.1533, + "step": 14224 + }, + { + "epoch": 2.89, + "learning_rate": 2.766724504556919e-06, + "loss": 2.126, + "step": 14225 + }, + { + "epoch": 2.89, + "learning_rate": 2.765531878085794e-06, + "loss": 2.0133, + "step": 14226 + }, + { + "epoch": 2.89, + "learning_rate": 2.7643394674687807e-06, + "loss": 2.0532, + "step": 14227 + }, + { + "epoch": 2.89, + "learning_rate": 2.76314727274147e-06, + "loss": 2.1192, + "step": 14228 + }, + { + "epoch": 2.89, + "learning_rate": 2.7619552939394234e-06, + "loss": 2.1492, + "step": 14229 + }, + { + "epoch": 2.89, + "learning_rate": 2.760763531098213e-06, + "loss": 2.1221, + "step": 14230 + }, + { + "epoch": 2.89, + "learning_rate": 2.7595719842533863e-06, + "loss": 2.1664, + "step": 14231 + }, + { + "epoch": 2.89, + "learning_rate": 2.75838065344051e-06, + "loss": 2.0472, + "step": 14232 + }, + { + "epoch": 2.89, + "learning_rate": 2.7571895386951184e-06, + "loss": 2.1307, + "step": 14233 + }, + { + "epoch": 2.89, + "learning_rate": 2.7559986400527584e-06, + "loss": 2.1497, + "step": 14234 + }, + { + "epoch": 2.89, + "learning_rate": 2.754807957548955e-06, + "loss": 2.0822, + "step": 14235 + }, + { + "epoch": 2.89, + "learning_rate": 2.7536174912192395e-06, + "loss": 2.1303, + "step": 14236 + }, + { + "epoch": 2.89, + "learning_rate": 2.7524272410991293e-06, + "loss": 2.097, + "step": 14237 + }, + { + "epoch": 2.89, + "learning_rate": 2.7512372072241435e-06, + "loss": 2.1256, + "step": 14238 + }, + { + "epoch": 2.89, + "learning_rate": 2.7500473896297806e-06, + "loss": 2.1745, + "step": 14239 + }, + { + "epoch": 2.89, + "learning_rate": 2.7488577883515456e-06, + "loss": 2.223, + "step": 14240 + }, + { + "epoch": 2.89, + "learning_rate": 2.7476684034249325e-06, + "loss": 2.0775, + "step": 14241 + }, + { + "epoch": 2.89, + "learning_rate": 2.746479234885431e-06, + "loss": 2.1482, + "step": 14242 + }, + { + "epoch": 2.89, + "learning_rate": 2.7452902827685167e-06, + "loss": 2.1376, + "step": 14243 + }, + { + "epoch": 2.89, + "learning_rate": 2.7441015471096665e-06, + "loss": 2.1506, + "step": 14244 + }, + { + "epoch": 2.89, + "learning_rate": 2.7429130279443528e-06, + "loss": 2.0648, + "step": 14245 + }, + { + "epoch": 2.89, + "learning_rate": 2.7417247253080313e-06, + "loss": 2.1628, + "step": 14246 + }, + { + "epoch": 2.89, + "learning_rate": 2.7405366392361586e-06, + "loss": 2.1281, + "step": 14247 + }, + { + "epoch": 2.89, + "learning_rate": 2.7393487697641863e-06, + "loss": 2.1318, + "step": 14248 + }, + { + "epoch": 2.89, + "learning_rate": 2.738161116927557e-06, + "loss": 2.1009, + "step": 14249 + }, + { + "epoch": 2.89, + "learning_rate": 2.736973680761702e-06, + "loss": 2.0885, + "step": 14250 + }, + { + "epoch": 2.89, + "learning_rate": 2.7357864613020524e-06, + "loss": 2.096, + "step": 14251 + }, + { + "epoch": 2.89, + "learning_rate": 2.734599458584033e-06, + "loss": 2.044, + "step": 14252 + }, + { + "epoch": 2.89, + "learning_rate": 2.7334126726430634e-06, + "loss": 2.0646, + "step": 14253 + }, + { + "epoch": 2.89, + "learning_rate": 2.7322261035145448e-06, + "loss": 2.1078, + "step": 14254 + }, + { + "epoch": 2.89, + "learning_rate": 2.7310397512338904e-06, + "loss": 2.0969, + "step": 14255 + }, + { + "epoch": 2.89, + "learning_rate": 2.729853615836485e-06, + "loss": 2.1233, + "step": 14256 + }, + { + "epoch": 2.89, + "learning_rate": 2.728667697357733e-06, + "loss": 2.1174, + "step": 14257 + }, + { + "epoch": 2.89, + "learning_rate": 2.72748199583301e-06, + "loss": 2.0947, + "step": 14258 + }, + { + "epoch": 2.89, + "learning_rate": 2.726296511297696e-06, + "loss": 2.0903, + "step": 14259 + }, + { + "epoch": 2.89, + "learning_rate": 2.725111243787164e-06, + "loss": 2.1748, + "step": 14260 + }, + { + "epoch": 2.89, + "learning_rate": 2.7239261933367743e-06, + "loss": 2.0501, + "step": 14261 + }, + { + "epoch": 2.89, + "learning_rate": 2.7227413599818874e-06, + "loss": 2.144, + "step": 14262 + }, + { + "epoch": 2.9, + "learning_rate": 2.721556743757856e-06, + "loss": 2.1822, + "step": 14263 + }, + { + "epoch": 2.9, + "learning_rate": 2.7203723447000264e-06, + "loss": 2.1741, + "step": 14264 + }, + { + "epoch": 2.9, + "learning_rate": 2.7191881628437335e-06, + "loss": 2.0931, + "step": 14265 + }, + { + "epoch": 2.9, + "learning_rate": 2.7180041982243124e-06, + "loss": 2.0769, + "step": 14266 + }, + { + "epoch": 2.9, + "learning_rate": 2.7168204508770868e-06, + "loss": 2.1387, + "step": 14267 + }, + { + "epoch": 2.9, + "learning_rate": 2.715636920837381e-06, + "loss": 2.1596, + "step": 14268 + }, + { + "epoch": 2.9, + "learning_rate": 2.7144536081405027e-06, + "loss": 2.1306, + "step": 14269 + }, + { + "epoch": 2.9, + "learning_rate": 2.713270512821762e-06, + "loss": 2.0578, + "step": 14270 + }, + { + "epoch": 2.9, + "learning_rate": 2.7120876349164503e-06, + "loss": 2.1569, + "step": 14271 + }, + { + "epoch": 2.9, + "learning_rate": 2.7109049744598735e-06, + "loss": 2.1605, + "step": 14272 + }, + { + "epoch": 2.9, + "learning_rate": 2.709722531487311e-06, + "loss": 2.1741, + "step": 14273 + }, + { + "epoch": 2.9, + "learning_rate": 2.7085403060340464e-06, + "loss": 2.0572, + "step": 14274 + }, + { + "epoch": 2.9, + "learning_rate": 2.7073582981353498e-06, + "loss": 2.0887, + "step": 14275 + }, + { + "epoch": 2.9, + "learning_rate": 2.706176507826491e-06, + "loss": 2.1608, + "step": 14276 + }, + { + "epoch": 2.9, + "learning_rate": 2.704994935142731e-06, + "loss": 2.0649, + "step": 14277 + }, + { + "epoch": 2.9, + "learning_rate": 2.7038135801193253e-06, + "loss": 2.1107, + "step": 14278 + }, + { + "epoch": 2.9, + "learning_rate": 2.7026324427915196e-06, + "loss": 2.0665, + "step": 14279 + }, + { + "epoch": 2.9, + "learning_rate": 2.7014515231945557e-06, + "loss": 2.162, + "step": 14280 + }, + { + "epoch": 2.9, + "learning_rate": 2.7002708213636695e-06, + "loss": 2.1158, + "step": 14281 + }, + { + "epoch": 2.9, + "learning_rate": 2.6990903373340928e-06, + "loss": 2.0263, + "step": 14282 + }, + { + "epoch": 2.9, + "learning_rate": 2.69791007114104e-06, + "loss": 2.1236, + "step": 14283 + }, + { + "epoch": 2.9, + "learning_rate": 2.696730022819731e-06, + "loss": 2.1663, + "step": 14284 + }, + { + "epoch": 2.9, + "learning_rate": 2.6955501924053784e-06, + "loss": 2.0943, + "step": 14285 + }, + { + "epoch": 2.9, + "learning_rate": 2.6943705799331776e-06, + "loss": 2.1417, + "step": 14286 + }, + { + "epoch": 2.9, + "learning_rate": 2.6931911854383273e-06, + "loss": 2.1483, + "step": 14287 + }, + { + "epoch": 2.9, + "learning_rate": 2.6920120089560175e-06, + "loss": 2.0349, + "step": 14288 + }, + { + "epoch": 2.9, + "learning_rate": 2.6908330505214343e-06, + "loss": 2.194, + "step": 14289 + }, + { + "epoch": 2.9, + "learning_rate": 2.6896543101697483e-06, + "loss": 2.1383, + "step": 14290 + }, + { + "epoch": 2.9, + "learning_rate": 2.688475787936131e-06, + "loss": 2.1004, + "step": 14291 + }, + { + "epoch": 2.9, + "learning_rate": 2.6872974838557476e-06, + "loss": 2.1225, + "step": 14292 + }, + { + "epoch": 2.9, + "learning_rate": 2.686119397963758e-06, + "loss": 2.0559, + "step": 14293 + }, + { + "epoch": 2.9, + "learning_rate": 2.6849415302953053e-06, + "loss": 2.08, + "step": 14294 + }, + { + "epoch": 2.9, + "learning_rate": 2.6837638808855402e-06, + "loss": 2.1001, + "step": 14295 + }, + { + "epoch": 2.9, + "learning_rate": 2.682586449769592e-06, + "loss": 2.1164, + "step": 14296 + }, + { + "epoch": 2.9, + "learning_rate": 2.6814092369826017e-06, + "loss": 2.1465, + "step": 14297 + }, + { + "epoch": 2.9, + "learning_rate": 2.680232242559686e-06, + "loss": 2.0921, + "step": 14298 + }, + { + "epoch": 2.9, + "learning_rate": 2.6790554665359693e-06, + "loss": 2.0957, + "step": 14299 + }, + { + "epoch": 2.9, + "learning_rate": 2.677878908946555e-06, + "loss": 2.2607, + "step": 14300 + }, + { + "epoch": 2.9, + "learning_rate": 2.6767025698265526e-06, + "loss": 2.1023, + "step": 14301 + }, + { + "epoch": 2.9, + "learning_rate": 2.67552644921106e-06, + "loss": 2.081, + "step": 14302 + }, + { + "epoch": 2.9, + "learning_rate": 2.6743505471351717e-06, + "loss": 2.1101, + "step": 14303 + }, + { + "epoch": 2.9, + "learning_rate": 2.673174863633966e-06, + "loss": 2.0702, + "step": 14304 + }, + { + "epoch": 2.9, + "learning_rate": 2.6719993987425274e-06, + "loss": 2.1445, + "step": 14305 + }, + { + "epoch": 2.9, + "learning_rate": 2.6708241524959254e-06, + "loss": 2.0693, + "step": 14306 + }, + { + "epoch": 2.9, + "learning_rate": 2.66964912492923e-06, + "loss": 2.0805, + "step": 14307 + }, + { + "epoch": 2.9, + "learning_rate": 2.668474316077495e-06, + "loss": 2.0317, + "step": 14308 + }, + { + "epoch": 2.9, + "learning_rate": 2.6672997259757748e-06, + "loss": 2.0706, + "step": 14309 + }, + { + "epoch": 2.9, + "learning_rate": 2.666125354659119e-06, + "loss": 2.0641, + "step": 14310 + }, + { + "epoch": 2.9, + "learning_rate": 2.664951202162558e-06, + "loss": 2.1256, + "step": 14311 + }, + { + "epoch": 2.91, + "learning_rate": 2.663777268521137e-06, + "loss": 2.1195, + "step": 14312 + }, + { + "epoch": 2.91, + "learning_rate": 2.6626035537698735e-06, + "loss": 2.1643, + "step": 14313 + }, + { + "epoch": 2.91, + "learning_rate": 2.6614300579437947e-06, + "loss": 2.1526, + "step": 14314 + }, + { + "epoch": 2.91, + "learning_rate": 2.6602567810779054e-06, + "loss": 2.108, + "step": 14315 + }, + { + "epoch": 2.91, + "learning_rate": 2.6590837232072186e-06, + "loss": 2.1346, + "step": 14316 + }, + { + "epoch": 2.91, + "learning_rate": 2.6579108843667323e-06, + "loss": 2.1497, + "step": 14317 + }, + { + "epoch": 2.91, + "learning_rate": 2.656738264591445e-06, + "loss": 2.1154, + "step": 14318 + }, + { + "epoch": 2.91, + "learning_rate": 2.6555658639163363e-06, + "loss": 2.1395, + "step": 14319 + }, + { + "epoch": 2.91, + "learning_rate": 2.6543936823763928e-06, + "loss": 2.1346, + "step": 14320 + }, + { + "epoch": 2.91, + "learning_rate": 2.6532217200065856e-06, + "loss": 2.1704, + "step": 14321 + }, + { + "epoch": 2.91, + "learning_rate": 2.6520499768418874e-06, + "loss": 2.1793, + "step": 14322 + }, + { + "epoch": 2.91, + "learning_rate": 2.6508784529172525e-06, + "loss": 2.1259, + "step": 14323 + }, + { + "epoch": 2.91, + "learning_rate": 2.6497071482676394e-06, + "loss": 2.1106, + "step": 14324 + }, + { + "epoch": 2.91, + "learning_rate": 2.6485360629279956e-06, + "loss": 2.128, + "step": 14325 + }, + { + "epoch": 2.91, + "learning_rate": 2.6473651969332658e-06, + "loss": 2.1104, + "step": 14326 + }, + { + "epoch": 2.91, + "learning_rate": 2.646194550318378e-06, + "loss": 2.0649, + "step": 14327 + }, + { + "epoch": 2.91, + "learning_rate": 2.645024123118266e-06, + "loss": 2.0848, + "step": 14328 + }, + { + "epoch": 2.91, + "learning_rate": 2.6438539153678533e-06, + "loss": 2.0978, + "step": 14329 + }, + { + "epoch": 2.91, + "learning_rate": 2.6426839271020488e-06, + "loss": 2.1051, + "step": 14330 + }, + { + "epoch": 2.91, + "learning_rate": 2.6415141583557648e-06, + "loss": 2.1153, + "step": 14331 + }, + { + "epoch": 2.91, + "learning_rate": 2.640344609163904e-06, + "loss": 2.0784, + "step": 14332 + }, + { + "epoch": 2.91, + "learning_rate": 2.6391752795613646e-06, + "loss": 2.1792, + "step": 14333 + }, + { + "epoch": 2.91, + "learning_rate": 2.6380061695830295e-06, + "loss": 2.0643, + "step": 14334 + }, + { + "epoch": 2.91, + "learning_rate": 2.6368372792637852e-06, + "loss": 2.0649, + "step": 14335 + }, + { + "epoch": 2.91, + "learning_rate": 2.6356686086385075e-06, + "loss": 2.1313, + "step": 14336 + }, + { + "epoch": 2.91, + "learning_rate": 2.6345001577420694e-06, + "loss": 2.127, + "step": 14337 + }, + { + "epoch": 2.91, + "learning_rate": 2.633331926609326e-06, + "loss": 2.0216, + "step": 14338 + }, + { + "epoch": 2.91, + "learning_rate": 2.632163915275141e-06, + "loss": 2.1112, + "step": 14339 + }, + { + "epoch": 2.91, + "learning_rate": 2.6309961237743553e-06, + "loss": 2.0904, + "step": 14340 + }, + { + "epoch": 2.91, + "learning_rate": 2.6298285521418243e-06, + "loss": 2.1355, + "step": 14341 + }, + { + "epoch": 2.91, + "learning_rate": 2.6286612004123757e-06, + "loss": 2.095, + "step": 14342 + }, + { + "epoch": 2.91, + "learning_rate": 2.627494068620845e-06, + "loss": 2.1274, + "step": 14343 + }, + { + "epoch": 2.91, + "learning_rate": 2.62632715680205e-06, + "loss": 2.1489, + "step": 14344 + }, + { + "epoch": 2.91, + "learning_rate": 2.6251604649908113e-06, + "loss": 2.0439, + "step": 14345 + }, + { + "epoch": 2.91, + "learning_rate": 2.623993993221938e-06, + "loss": 2.1244, + "step": 14346 + }, + { + "epoch": 2.91, + "learning_rate": 2.6228277415302395e-06, + "loss": 2.0852, + "step": 14347 + }, + { + "epoch": 2.91, + "learning_rate": 2.621661709950505e-06, + "loss": 2.1052, + "step": 14348 + }, + { + "epoch": 2.91, + "learning_rate": 2.620495898517529e-06, + "loss": 2.1762, + "step": 14349 + }, + { + "epoch": 2.91, + "learning_rate": 2.619330307266096e-06, + "loss": 2.1133, + "step": 14350 + }, + { + "epoch": 2.91, + "learning_rate": 2.6181649362309847e-06, + "loss": 2.1159, + "step": 14351 + }, + { + "epoch": 2.91, + "learning_rate": 2.6169997854469633e-06, + "loss": 2.0337, + "step": 14352 + }, + { + "epoch": 2.91, + "learning_rate": 2.6158348549487968e-06, + "loss": 2.1, + "step": 14353 + }, + { + "epoch": 2.91, + "learning_rate": 2.6146701447712475e-06, + "loss": 2.1012, + "step": 14354 + }, + { + "epoch": 2.91, + "learning_rate": 2.61350565494906e-06, + "loss": 2.1089, + "step": 14355 + }, + { + "epoch": 2.91, + "learning_rate": 2.6123413855169832e-06, + "loss": 2.0665, + "step": 14356 + }, + { + "epoch": 2.91, + "learning_rate": 2.6111773365097538e-06, + "loss": 2.153, + "step": 14357 + }, + { + "epoch": 2.91, + "learning_rate": 2.6100135079621057e-06, + "loss": 2.0601, + "step": 14358 + }, + { + "epoch": 2.91, + "learning_rate": 2.6088498999087607e-06, + "loss": 2.1222, + "step": 14359 + }, + { + "epoch": 2.91, + "learning_rate": 2.6076865123844384e-06, + "loss": 2.1995, + "step": 14360 + }, + { + "epoch": 2.92, + "learning_rate": 2.6065233454238526e-06, + "loss": 2.0115, + "step": 14361 + }, + { + "epoch": 2.92, + "learning_rate": 2.605360399061708e-06, + "loss": 2.1327, + "step": 14362 + }, + { + "epoch": 2.92, + "learning_rate": 2.6041976733327e-06, + "loss": 2.04, + "step": 14363 + }, + { + "epoch": 2.92, + "learning_rate": 2.6030351682715236e-06, + "loss": 2.1693, + "step": 14364 + }, + { + "epoch": 2.92, + "learning_rate": 2.601872883912864e-06, + "loss": 2.0826, + "step": 14365 + }, + { + "epoch": 2.92, + "learning_rate": 2.6007108202914033e-06, + "loss": 2.1169, + "step": 14366 + }, + { + "epoch": 2.92, + "learning_rate": 2.599548977441807e-06, + "loss": 2.0776, + "step": 14367 + }, + { + "epoch": 2.92, + "learning_rate": 2.598387355398745e-06, + "loss": 2.0992, + "step": 14368 + }, + { + "epoch": 2.92, + "learning_rate": 2.597225954196878e-06, + "loss": 2.1339, + "step": 14369 + }, + { + "epoch": 2.92, + "learning_rate": 2.5960647738708553e-06, + "loss": 2.1134, + "step": 14370 + }, + { + "epoch": 2.92, + "learning_rate": 2.594903814455323e-06, + "loss": 2.158, + "step": 14371 + }, + { + "epoch": 2.92, + "learning_rate": 2.5937430759849226e-06, + "loss": 2.1858, + "step": 14372 + }, + { + "epoch": 2.92, + "learning_rate": 2.5925825584942886e-06, + "loss": 2.0752, + "step": 14373 + }, + { + "epoch": 2.92, + "learning_rate": 2.5914222620180417e-06, + "loss": 2.1052, + "step": 14374 + }, + { + "epoch": 2.92, + "learning_rate": 2.590262186590805e-06, + "loss": 2.0946, + "step": 14375 + }, + { + "epoch": 2.92, + "learning_rate": 2.589102332247192e-06, + "loss": 2.1202, + "step": 14376 + }, + { + "epoch": 2.92, + "learning_rate": 2.5879426990218105e-06, + "loss": 2.1865, + "step": 14377 + }, + { + "epoch": 2.92, + "learning_rate": 2.5867832869492547e-06, + "loss": 2.174, + "step": 14378 + }, + { + "epoch": 2.92, + "learning_rate": 2.5856240960641255e-06, + "loss": 2.1283, + "step": 14379 + }, + { + "epoch": 2.92, + "learning_rate": 2.584465126400999e-06, + "loss": 2.1334, + "step": 14380 + }, + { + "epoch": 2.92, + "learning_rate": 2.583306377994468e-06, + "loss": 2.1372, + "step": 14381 + }, + { + "epoch": 2.92, + "learning_rate": 2.5821478508790965e-06, + "loss": 2.1362, + "step": 14382 + }, + { + "epoch": 2.92, + "learning_rate": 2.5809895450894573e-06, + "loss": 2.0902, + "step": 14383 + }, + { + "epoch": 2.92, + "learning_rate": 2.579831460660106e-06, + "loss": 2.1363, + "step": 14384 + }, + { + "epoch": 2.92, + "learning_rate": 2.578673597625597e-06, + "loss": 2.134, + "step": 14385 + }, + { + "epoch": 2.92, + "learning_rate": 2.577515956020479e-06, + "loss": 2.1405, + "step": 14386 + }, + { + "epoch": 2.92, + "learning_rate": 2.5763585358792944e-06, + "loss": 2.151, + "step": 14387 + }, + { + "epoch": 2.92, + "learning_rate": 2.5752013372365725e-06, + "loss": 2.1131, + "step": 14388 + }, + { + "epoch": 2.92, + "learning_rate": 2.5740443601268417e-06, + "loss": 2.0884, + "step": 14389 + }, + { + "epoch": 2.92, + "learning_rate": 2.572887604584624e-06, + "loss": 2.1043, + "step": 14390 + }, + { + "epoch": 2.92, + "learning_rate": 2.5717310706444364e-06, + "loss": 2.0391, + "step": 14391 + }, + { + "epoch": 2.92, + "learning_rate": 2.5705747583407793e-06, + "loss": 2.0631, + "step": 14392 + }, + { + "epoch": 2.92, + "learning_rate": 2.5694186677081568e-06, + "loss": 2.075, + "step": 14393 + }, + { + "epoch": 2.92, + "learning_rate": 2.568262798781066e-06, + "loss": 2.1067, + "step": 14394 + }, + { + "epoch": 2.92, + "learning_rate": 2.5671071515939892e-06, + "loss": 2.1586, + "step": 14395 + }, + { + "epoch": 2.92, + "learning_rate": 2.5659517261814104e-06, + "loss": 2.0385, + "step": 14396 + }, + { + "epoch": 2.92, + "learning_rate": 2.5647965225778025e-06, + "loss": 2.1663, + "step": 14397 + }, + { + "epoch": 2.92, + "learning_rate": 2.5636415408176373e-06, + "loss": 2.1344, + "step": 14398 + }, + { + "epoch": 2.92, + "learning_rate": 2.562486780935369e-06, + "loss": 2.1122, + "step": 14399 + }, + { + "epoch": 2.92, + "learning_rate": 2.5613322429654573e-06, + "loss": 2.1351, + "step": 14400 + }, + { + "epoch": 2.92, + "learning_rate": 2.5601779269423464e-06, + "loss": 2.1993, + "step": 14401 + }, + { + "epoch": 2.92, + "learning_rate": 2.5590238329004846e-06, + "loss": 2.0886, + "step": 14402 + }, + { + "epoch": 2.92, + "learning_rate": 2.5578699608742972e-06, + "loss": 2.0821, + "step": 14403 + }, + { + "epoch": 2.92, + "learning_rate": 2.5567163108982197e-06, + "loss": 2.1306, + "step": 14404 + }, + { + "epoch": 2.92, + "learning_rate": 2.5555628830066635e-06, + "loss": 2.162, + "step": 14405 + }, + { + "epoch": 2.92, + "learning_rate": 2.5544096772340567e-06, + "loss": 2.0819, + "step": 14406 + }, + { + "epoch": 2.92, + "learning_rate": 2.5532566936147974e-06, + "loss": 2.1143, + "step": 14407 + }, + { + "epoch": 2.92, + "learning_rate": 2.552103932183293e-06, + "loss": 2.1488, + "step": 14408 + }, + { + "epoch": 2.92, + "learning_rate": 2.5509513929739304e-06, + "loss": 2.0087, + "step": 14409 + }, + { + "epoch": 2.92, + "learning_rate": 2.549799076021109e-06, + "loss": 2.1046, + "step": 14410 + }, + { + "epoch": 2.93, + "learning_rate": 2.548646981359201e-06, + "loss": 2.2191, + "step": 14411 + }, + { + "epoch": 2.93, + "learning_rate": 2.547495109022584e-06, + "loss": 2.0887, + "step": 14412 + }, + { + "epoch": 2.93, + "learning_rate": 2.5463434590456304e-06, + "loss": 2.117, + "step": 14413 + }, + { + "epoch": 2.93, + "learning_rate": 2.5451920314626966e-06, + "loss": 2.1233, + "step": 14414 + }, + { + "epoch": 2.93, + "learning_rate": 2.5440408263081385e-06, + "loss": 2.1292, + "step": 14415 + }, + { + "epoch": 2.93, + "learning_rate": 2.542889843616306e-06, + "loss": 2.0427, + "step": 14416 + }, + { + "epoch": 2.93, + "learning_rate": 2.5417390834215426e-06, + "loss": 2.1354, + "step": 14417 + }, + { + "epoch": 2.93, + "learning_rate": 2.5405885457581793e-06, + "loss": 2.1168, + "step": 14418 + }, + { + "epoch": 2.93, + "learning_rate": 2.5394382306605457e-06, + "loss": 2.0957, + "step": 14419 + }, + { + "epoch": 2.93, + "learning_rate": 2.538288138162965e-06, + "loss": 2.1808, + "step": 14420 + }, + { + "epoch": 2.93, + "learning_rate": 2.537138268299755e-06, + "loss": 2.2176, + "step": 14421 + }, + { + "epoch": 2.93, + "learning_rate": 2.5359886211052177e-06, + "loss": 2.1514, + "step": 14422 + }, + { + "epoch": 2.93, + "learning_rate": 2.534839196613661e-06, + "loss": 2.1182, + "step": 14423 + }, + { + "epoch": 2.93, + "learning_rate": 2.533689994859372e-06, + "loss": 2.0693, + "step": 14424 + }, + { + "epoch": 2.93, + "learning_rate": 2.5325410158766504e-06, + "loss": 2.129, + "step": 14425 + }, + { + "epoch": 2.93, + "learning_rate": 2.53139225969977e-06, + "loss": 2.125, + "step": 14426 + }, + { + "epoch": 2.93, + "learning_rate": 2.530243726363012e-06, + "loss": 2.0807, + "step": 14427 + }, + { + "epoch": 2.93, + "learning_rate": 2.5290954159006388e-06, + "loss": 2.2368, + "step": 14428 + }, + { + "epoch": 2.93, + "learning_rate": 2.5279473283469148e-06, + "loss": 2.06, + "step": 14429 + }, + { + "epoch": 2.93, + "learning_rate": 2.526799463736097e-06, + "loss": 2.1259, + "step": 14430 + }, + { + "epoch": 2.93, + "learning_rate": 2.5256518221024352e-06, + "loss": 2.1749, + "step": 14431 + }, + { + "epoch": 2.93, + "learning_rate": 2.524504403480167e-06, + "loss": 2.0733, + "step": 14432 + }, + { + "epoch": 2.93, + "learning_rate": 2.5233572079035306e-06, + "loss": 2.1112, + "step": 14433 + }, + { + "epoch": 2.93, + "learning_rate": 2.5222102354067534e-06, + "loss": 2.1021, + "step": 14434 + }, + { + "epoch": 2.93, + "learning_rate": 2.5210634860240623e-06, + "loss": 2.0966, + "step": 14435 + }, + { + "epoch": 2.93, + "learning_rate": 2.519916959789667e-06, + "loss": 2.1368, + "step": 14436 + }, + { + "epoch": 2.93, + "learning_rate": 2.518770656737777e-06, + "loss": 2.0337, + "step": 14437 + }, + { + "epoch": 2.93, + "learning_rate": 2.5176245769025988e-06, + "loss": 2.173, + "step": 14438 + }, + { + "epoch": 2.93, + "learning_rate": 2.5164787203183216e-06, + "loss": 2.1058, + "step": 14439 + }, + { + "epoch": 2.93, + "learning_rate": 2.515333087019137e-06, + "loss": 2.0669, + "step": 14440 + }, + { + "epoch": 2.93, + "learning_rate": 2.5141876770392283e-06, + "loss": 2.1073, + "step": 14441 + }, + { + "epoch": 2.93, + "learning_rate": 2.5130424904127736e-06, + "loss": 2.029, + "step": 14442 + }, + { + "epoch": 2.93, + "learning_rate": 2.5118975271739345e-06, + "loss": 2.1153, + "step": 14443 + }, + { + "epoch": 2.93, + "learning_rate": 2.5107527873568784e-06, + "loss": 2.1251, + "step": 14444 + }, + { + "epoch": 2.93, + "learning_rate": 2.509608270995758e-06, + "loss": 2.0133, + "step": 14445 + }, + { + "epoch": 2.93, + "learning_rate": 2.5084639781247266e-06, + "loss": 2.1581, + "step": 14446 + }, + { + "epoch": 2.93, + "learning_rate": 2.507319908777921e-06, + "loss": 2.1155, + "step": 14447 + }, + { + "epoch": 2.93, + "learning_rate": 2.5061760629894815e-06, + "loss": 2.165, + "step": 14448 + }, + { + "epoch": 2.93, + "learning_rate": 2.5050324407935277e-06, + "loss": 2.1144, + "step": 14449 + }, + { + "epoch": 2.93, + "learning_rate": 2.5038890422241958e-06, + "loss": 2.1137, + "step": 14450 + }, + { + "epoch": 2.93, + "learning_rate": 2.5027458673155903e-06, + "loss": 2.1746, + "step": 14451 + }, + { + "epoch": 2.93, + "learning_rate": 2.5016029161018264e-06, + "loss": 2.0615, + "step": 14452 + }, + { + "epoch": 2.93, + "learning_rate": 2.500460188617e-06, + "loss": 2.1233, + "step": 14453 + }, + { + "epoch": 2.93, + "learning_rate": 2.4993176848952106e-06, + "loss": 2.1192, + "step": 14454 + }, + { + "epoch": 2.93, + "learning_rate": 2.4981754049705467e-06, + "loss": 2.1606, + "step": 14455 + }, + { + "epoch": 2.93, + "learning_rate": 2.4970333488770927e-06, + "loss": 2.0917, + "step": 14456 + }, + { + "epoch": 2.93, + "learning_rate": 2.4958915166489173e-06, + "loss": 2.1206, + "step": 14457 + }, + { + "epoch": 2.93, + "learning_rate": 2.4947499083200944e-06, + "loss": 2.0584, + "step": 14458 + }, + { + "epoch": 2.93, + "learning_rate": 2.4936085239246845e-06, + "loss": 2.0682, + "step": 14459 + }, + { + "epoch": 2.94, + "learning_rate": 2.492467363496747e-06, + "loss": 2.1378, + "step": 14460 + }, + { + "epoch": 2.94, + "learning_rate": 2.491326427070323e-06, + "loss": 2.1454, + "step": 14461 + }, + { + "epoch": 2.94, + "learning_rate": 2.4901857146794593e-06, + "loss": 2.164, + "step": 14462 + }, + { + "epoch": 2.94, + "learning_rate": 2.489045226358193e-06, + "loss": 2.1443, + "step": 14463 + }, + { + "epoch": 2.94, + "learning_rate": 2.4879049621405447e-06, + "loss": 2.1453, + "step": 14464 + }, + { + "epoch": 2.94, + "learning_rate": 2.4867649220605473e-06, + "loss": 2.094, + "step": 14465 + }, + { + "epoch": 2.94, + "learning_rate": 2.4856251061522086e-06, + "loss": 2.0088, + "step": 14466 + }, + { + "epoch": 2.94, + "learning_rate": 2.4844855144495406e-06, + "loss": 2.1047, + "step": 14467 + }, + { + "epoch": 2.94, + "learning_rate": 2.4833461469865417e-06, + "loss": 2.1286, + "step": 14468 + }, + { + "epoch": 2.94, + "learning_rate": 2.482207003797209e-06, + "loss": 2.1487, + "step": 14469 + }, + { + "epoch": 2.94, + "learning_rate": 2.481068084915531e-06, + "loss": 2.1229, + "step": 14470 + }, + { + "epoch": 2.94, + "learning_rate": 2.479929390375493e-06, + "loss": 2.1254, + "step": 14471 + }, + { + "epoch": 2.94, + "learning_rate": 2.4787909202110636e-06, + "loss": 2.1241, + "step": 14472 + }, + { + "epoch": 2.94, + "learning_rate": 2.4776526744562146e-06, + "loss": 2.1592, + "step": 14473 + }, + { + "epoch": 2.94, + "learning_rate": 2.476514653144907e-06, + "loss": 2.0432, + "step": 14474 + }, + { + "epoch": 2.94, + "learning_rate": 2.475376856311099e-06, + "loss": 2.1042, + "step": 14475 + }, + { + "epoch": 2.94, + "learning_rate": 2.474239283988734e-06, + "loss": 2.0793, + "step": 14476 + }, + { + "epoch": 2.94, + "learning_rate": 2.473101936211756e-06, + "loss": 2.1376, + "step": 14477 + }, + { + "epoch": 2.94, + "learning_rate": 2.4719648130141026e-06, + "loss": 2.058, + "step": 14478 + }, + { + "epoch": 2.94, + "learning_rate": 2.4708279144296964e-06, + "loss": 2.0991, + "step": 14479 + }, + { + "epoch": 2.94, + "learning_rate": 2.4696912404924622e-06, + "loss": 2.1485, + "step": 14480 + }, + { + "epoch": 2.94, + "learning_rate": 2.468554791236314e-06, + "loss": 2.074, + "step": 14481 + }, + { + "epoch": 2.94, + "learning_rate": 2.4674185666951634e-06, + "loss": 2.101, + "step": 14482 + }, + { + "epoch": 2.94, + "learning_rate": 2.4662825669029054e-06, + "loss": 2.017, + "step": 14483 + }, + { + "epoch": 2.94, + "learning_rate": 2.4651467918934393e-06, + "loss": 2.0796, + "step": 14484 + }, + { + "epoch": 2.94, + "learning_rate": 2.4640112417006513e-06, + "loss": 2.1075, + "step": 14485 + }, + { + "epoch": 2.94, + "learning_rate": 2.462875916358426e-06, + "loss": 2.0317, + "step": 14486 + }, + { + "epoch": 2.94, + "learning_rate": 2.4617408159006328e-06, + "loss": 2.2107, + "step": 14487 + }, + { + "epoch": 2.94, + "learning_rate": 2.460605940361145e-06, + "loss": 2.0909, + "step": 14488 + }, + { + "epoch": 2.94, + "learning_rate": 2.459471289773815e-06, + "loss": 2.0641, + "step": 14489 + }, + { + "epoch": 2.94, + "learning_rate": 2.458336864172508e-06, + "loss": 2.1482, + "step": 14490 + }, + { + "epoch": 2.94, + "learning_rate": 2.457202663591065e-06, + "loss": 2.1306, + "step": 14491 + }, + { + "epoch": 2.94, + "learning_rate": 2.456068688063331e-06, + "loss": 2.1485, + "step": 14492 + }, + { + "epoch": 2.94, + "learning_rate": 2.4549349376231367e-06, + "loss": 2.0996, + "step": 14493 + }, + { + "epoch": 2.94, + "learning_rate": 2.45380141230431e-06, + "loss": 2.1019, + "step": 14494 + }, + { + "epoch": 2.94, + "learning_rate": 2.4526681121406737e-06, + "loss": 2.1502, + "step": 14495 + }, + { + "epoch": 2.94, + "learning_rate": 2.451535037166044e-06, + "loss": 2.1824, + "step": 14496 + }, + { + "epoch": 2.94, + "learning_rate": 2.4504021874142227e-06, + "loss": 2.1178, + "step": 14497 + }, + { + "epoch": 2.94, + "learning_rate": 2.4492695629190133e-06, + "loss": 2.1043, + "step": 14498 + }, + { + "epoch": 2.94, + "learning_rate": 2.4481371637142094e-06, + "loss": 2.0658, + "step": 14499 + }, + { + "epoch": 2.94, + "learning_rate": 2.447004989833602e-06, + "loss": 2.1279, + "step": 14500 + }, + { + "epoch": 2.94, + "learning_rate": 2.4458730413109666e-06, + "loss": 2.1066, + "step": 14501 + }, + { + "epoch": 2.94, + "learning_rate": 2.444741318180078e-06, + "loss": 2.075, + "step": 14502 + }, + { + "epoch": 2.94, + "learning_rate": 2.443609820474705e-06, + "loss": 2.1454, + "step": 14503 + }, + { + "epoch": 2.94, + "learning_rate": 2.4424785482286096e-06, + "loss": 2.2577, + "step": 14504 + }, + { + "epoch": 2.94, + "learning_rate": 2.4413475014755396e-06, + "loss": 2.0958, + "step": 14505 + }, + { + "epoch": 2.94, + "learning_rate": 2.4402166802492465e-06, + "loss": 2.1387, + "step": 14506 + }, + { + "epoch": 2.94, + "learning_rate": 2.439086084583472e-06, + "loss": 2.1744, + "step": 14507 + }, + { + "epoch": 2.94, + "learning_rate": 2.4379557145119436e-06, + "loss": 2.082, + "step": 14508 + }, + { + "epoch": 2.95, + "learning_rate": 2.436825570068392e-06, + "loss": 2.0855, + "step": 14509 + }, + { + "epoch": 2.95, + "learning_rate": 2.435695651286536e-06, + "loss": 2.061, + "step": 14510 + }, + { + "epoch": 2.95, + "learning_rate": 2.4345659582000937e-06, + "loss": 2.1143, + "step": 14511 + }, + { + "epoch": 2.95, + "learning_rate": 2.4334364908427634e-06, + "loss": 2.2051, + "step": 14512 + }, + { + "epoch": 2.95, + "learning_rate": 2.432307249248249e-06, + "loss": 2.1745, + "step": 14513 + }, + { + "epoch": 2.95, + "learning_rate": 2.4311782334502444e-06, + "loss": 2.1425, + "step": 14514 + }, + { + "epoch": 2.95, + "learning_rate": 2.4300494434824373e-06, + "loss": 2.1104, + "step": 14515 + }, + { + "epoch": 2.95, + "learning_rate": 2.4289208793785024e-06, + "loss": 2.0623, + "step": 14516 + }, + { + "epoch": 2.95, + "learning_rate": 2.4277925411721148e-06, + "loss": 2.1589, + "step": 14517 + }, + { + "epoch": 2.95, + "learning_rate": 2.426664428896942e-06, + "loss": 2.1647, + "step": 14518 + }, + { + "epoch": 2.95, + "learning_rate": 2.425536542586645e-06, + "loss": 2.2029, + "step": 14519 + }, + { + "epoch": 2.95, + "learning_rate": 2.42440888227487e-06, + "loss": 2.1403, + "step": 14520 + }, + { + "epoch": 2.95, + "learning_rate": 2.4232814479952683e-06, + "loss": 2.1047, + "step": 14521 + }, + { + "epoch": 2.95, + "learning_rate": 2.422154239781479e-06, + "loss": 2.1674, + "step": 14522 + }, + { + "epoch": 2.95, + "learning_rate": 2.421027257667131e-06, + "loss": 2.0508, + "step": 14523 + }, + { + "epoch": 2.95, + "learning_rate": 2.4199005016858514e-06, + "loss": 2.1175, + "step": 14524 + }, + { + "epoch": 2.95, + "learning_rate": 2.41877397187126e-06, + "loss": 2.0945, + "step": 14525 + }, + { + "epoch": 2.95, + "learning_rate": 2.4176476682569716e-06, + "loss": 2.118, + "step": 14526 + }, + { + "epoch": 2.95, + "learning_rate": 2.416521590876586e-06, + "loss": 2.1164, + "step": 14527 + }, + { + "epoch": 2.95, + "learning_rate": 2.415395739763704e-06, + "loss": 2.0727, + "step": 14528 + }, + { + "epoch": 2.95, + "learning_rate": 2.4142701149519178e-06, + "loss": 2.0674, + "step": 14529 + }, + { + "epoch": 2.95, + "learning_rate": 2.4131447164748156e-06, + "loss": 2.153, + "step": 14530 + }, + { + "epoch": 2.95, + "learning_rate": 2.4120195443659713e-06, + "loss": 2.159, + "step": 14531 + }, + { + "epoch": 2.95, + "learning_rate": 2.410894598658959e-06, + "loss": 2.0791, + "step": 14532 + }, + { + "epoch": 2.95, + "learning_rate": 2.409769879387339e-06, + "loss": 2.0455, + "step": 14533 + }, + { + "epoch": 2.95, + "learning_rate": 2.4086453865846783e-06, + "loss": 2.1159, + "step": 14534 + }, + { + "epoch": 2.95, + "learning_rate": 2.4075211202845207e-06, + "loss": 2.1087, + "step": 14535 + }, + { + "epoch": 2.95, + "learning_rate": 2.4063970805204153e-06, + "loss": 2.1594, + "step": 14536 + }, + { + "epoch": 2.95, + "learning_rate": 2.405273267325896e-06, + "loss": 2.0688, + "step": 14537 + }, + { + "epoch": 2.95, + "learning_rate": 2.4041496807344955e-06, + "loss": 2.0854, + "step": 14538 + }, + { + "epoch": 2.95, + "learning_rate": 2.403026320779739e-06, + "loss": 2.0258, + "step": 14539 + }, + { + "epoch": 2.95, + "learning_rate": 2.401903187495146e-06, + "loss": 2.0246, + "step": 14540 + }, + { + "epoch": 2.95, + "learning_rate": 2.400780280914222e-06, + "loss": 2.1228, + "step": 14541 + }, + { + "epoch": 2.95, + "learning_rate": 2.3996576010704743e-06, + "loss": 2.1048, + "step": 14542 + }, + { + "epoch": 2.95, + "learning_rate": 2.3985351479973996e-06, + "loss": 2.1509, + "step": 14543 + }, + { + "epoch": 2.95, + "learning_rate": 2.3974129217284927e-06, + "loss": 2.1025, + "step": 14544 + }, + { + "epoch": 2.95, + "learning_rate": 2.3962909222972295e-06, + "loss": 2.1748, + "step": 14545 + }, + { + "epoch": 2.95, + "learning_rate": 2.395169149737091e-06, + "loss": 2.1282, + "step": 14546 + }, + { + "epoch": 2.95, + "learning_rate": 2.3940476040815495e-06, + "loss": 2.1164, + "step": 14547 + }, + { + "epoch": 2.95, + "learning_rate": 2.3929262853640644e-06, + "loss": 2.0723, + "step": 14548 + }, + { + "epoch": 2.95, + "learning_rate": 2.391805193618094e-06, + "loss": 2.0995, + "step": 14549 + }, + { + "epoch": 2.95, + "learning_rate": 2.390684328877089e-06, + "loss": 2.1014, + "step": 14550 + }, + { + "epoch": 2.95, + "learning_rate": 2.3895636911744935e-06, + "loss": 2.1386, + "step": 14551 + }, + { + "epoch": 2.95, + "learning_rate": 2.3884432805437407e-06, + "loss": 2.1802, + "step": 14552 + }, + { + "epoch": 2.95, + "learning_rate": 2.3873230970182614e-06, + "loss": 2.1276, + "step": 14553 + }, + { + "epoch": 2.95, + "learning_rate": 2.386203140631479e-06, + "loss": 2.1892, + "step": 14554 + }, + { + "epoch": 2.95, + "learning_rate": 2.385083411416811e-06, + "loss": 2.0434, + "step": 14555 + }, + { + "epoch": 2.95, + "learning_rate": 2.383963909407664e-06, + "loss": 2.0337, + "step": 14556 + }, + { + "epoch": 2.95, + "learning_rate": 2.3828446346374425e-06, + "loss": 2.0247, + "step": 14557 + }, + { + "epoch": 2.96, + "learning_rate": 2.381725587139536e-06, + "loss": 2.1382, + "step": 14558 + }, + { + "epoch": 2.96, + "learning_rate": 2.3806067669473453e-06, + "loss": 2.2155, + "step": 14559 + }, + { + "epoch": 2.96, + "learning_rate": 2.379488174094242e-06, + "loss": 2.1461, + "step": 14560 + }, + { + "epoch": 2.96, + "learning_rate": 2.3783698086136077e-06, + "loss": 2.1371, + "step": 14561 + }, + { + "epoch": 2.96, + "learning_rate": 2.377251670538806e-06, + "loss": 2.0656, + "step": 14562 + }, + { + "epoch": 2.96, + "learning_rate": 2.376133759903202e-06, + "loss": 2.0242, + "step": 14563 + }, + { + "epoch": 2.96, + "learning_rate": 2.375016076740149e-06, + "loss": 2.1731, + "step": 14564 + }, + { + "epoch": 2.96, + "learning_rate": 2.3738986210829963e-06, + "loss": 2.1877, + "step": 14565 + }, + { + "epoch": 2.96, + "learning_rate": 2.372781392965088e-06, + "loss": 2.111, + "step": 14566 + }, + { + "epoch": 2.96, + "learning_rate": 2.3716643924197524e-06, + "loss": 2.1811, + "step": 14567 + }, + { + "epoch": 2.96, + "learning_rate": 2.370547619480321e-06, + "loss": 2.0545, + "step": 14568 + }, + { + "epoch": 2.96, + "learning_rate": 2.369431074180115e-06, + "loss": 2.1159, + "step": 14569 + }, + { + "epoch": 2.96, + "learning_rate": 2.368314756552451e-06, + "loss": 2.1946, + "step": 14570 + }, + { + "epoch": 2.96, + "learning_rate": 2.3671986666306313e-06, + "loss": 2.0514, + "step": 14571 + }, + { + "epoch": 2.96, + "learning_rate": 2.366082804447961e-06, + "loss": 2.081, + "step": 14572 + }, + { + "epoch": 2.96, + "learning_rate": 2.3649671700377264e-06, + "loss": 2.0854, + "step": 14573 + }, + { + "epoch": 2.96, + "learning_rate": 2.363851763433226e-06, + "loss": 2.129, + "step": 14574 + }, + { + "epoch": 2.96, + "learning_rate": 2.362736584667731e-06, + "loss": 2.0836, + "step": 14575 + }, + { + "epoch": 2.96, + "learning_rate": 2.3616216337745214e-06, + "loss": 2.1011, + "step": 14576 + }, + { + "epoch": 2.96, + "learning_rate": 2.360506910786857e-06, + "loss": 2.0683, + "step": 14577 + }, + { + "epoch": 2.96, + "learning_rate": 2.359392415738001e-06, + "loss": 2.1404, + "step": 14578 + }, + { + "epoch": 2.96, + "learning_rate": 2.358278148661208e-06, + "loss": 2.1732, + "step": 14579 + }, + { + "epoch": 2.96, + "learning_rate": 2.3571641095897244e-06, + "loss": 2.1808, + "step": 14580 + }, + { + "epoch": 2.96, + "learning_rate": 2.3560502985567866e-06, + "loss": 2.0505, + "step": 14581 + }, + { + "epoch": 2.96, + "learning_rate": 2.354936715595628e-06, + "loss": 2.1171, + "step": 14582 + }, + { + "epoch": 2.96, + "learning_rate": 2.3538233607394765e-06, + "loss": 2.0869, + "step": 14583 + }, + { + "epoch": 2.96, + "learning_rate": 2.3527102340215523e-06, + "loss": 2.1319, + "step": 14584 + }, + { + "epoch": 2.96, + "learning_rate": 2.3515973354750633e-06, + "loss": 2.1024, + "step": 14585 + }, + { + "epoch": 2.96, + "learning_rate": 2.3504846651332158e-06, + "loss": 2.091, + "step": 14586 + }, + { + "epoch": 2.96, + "learning_rate": 2.3493722230292125e-06, + "loss": 2.2079, + "step": 14587 + }, + { + "epoch": 2.96, + "learning_rate": 2.3482600091962403e-06, + "loss": 2.061, + "step": 14588 + }, + { + "epoch": 2.96, + "learning_rate": 2.347148023667487e-06, + "loss": 2.1399, + "step": 14589 + }, + { + "epoch": 2.96, + "learning_rate": 2.346036266476129e-06, + "loss": 2.0581, + "step": 14590 + }, + { + "epoch": 2.96, + "learning_rate": 2.344924737655342e-06, + "loss": 2.1258, + "step": 14591 + }, + { + "epoch": 2.96, + "learning_rate": 2.3438134372382846e-06, + "loss": 2.1781, + "step": 14592 + }, + { + "epoch": 2.96, + "learning_rate": 2.3427023652581172e-06, + "loss": 2.1262, + "step": 14593 + }, + { + "epoch": 2.96, + "learning_rate": 2.3415915217479913e-06, + "loss": 2.1618, + "step": 14594 + }, + { + "epoch": 2.96, + "learning_rate": 2.340480906741053e-06, + "loss": 2.0827, + "step": 14595 + }, + { + "epoch": 2.96, + "learning_rate": 2.3393705202704354e-06, + "loss": 2.1413, + "step": 14596 + }, + { + "epoch": 2.96, + "learning_rate": 2.33826036236927e-06, + "loss": 2.1211, + "step": 14597 + }, + { + "epoch": 2.96, + "learning_rate": 2.3371504330706806e-06, + "loss": 2.1753, + "step": 14598 + }, + { + "epoch": 2.96, + "learning_rate": 2.3360407324077893e-06, + "loss": 2.0794, + "step": 14599 + }, + { + "epoch": 2.96, + "learning_rate": 2.3349312604136974e-06, + "loss": 2.1559, + "step": 14600 + }, + { + "epoch": 2.96, + "learning_rate": 2.3338220171215155e-06, + "loss": 2.1272, + "step": 14601 + }, + { + "epoch": 2.96, + "learning_rate": 2.332713002564332e-06, + "loss": 2.0754, + "step": 14602 + }, + { + "epoch": 2.96, + "learning_rate": 2.3316042167752464e-06, + "loss": 2.1788, + "step": 14603 + }, + { + "epoch": 2.96, + "learning_rate": 2.3304956597873328e-06, + "loss": 2.0901, + "step": 14604 + }, + { + "epoch": 2.96, + "learning_rate": 2.3293873316336734e-06, + "loss": 2.0686, + "step": 14605 + }, + { + "epoch": 2.96, + "learning_rate": 2.3282792323473325e-06, + "loss": 2.1733, + "step": 14606 + }, + { + "epoch": 2.96, + "learning_rate": 2.327171361961373e-06, + "loss": 2.0942, + "step": 14607 + }, + { + "epoch": 2.97, + "learning_rate": 2.326063720508853e-06, + "loss": 2.2398, + "step": 14608 + }, + { + "epoch": 2.97, + "learning_rate": 2.324956308022822e-06, + "loss": 2.1038, + "step": 14609 + }, + { + "epoch": 2.97, + "learning_rate": 2.323849124536317e-06, + "loss": 2.1259, + "step": 14610 + }, + { + "epoch": 2.97, + "learning_rate": 2.322742170082376e-06, + "loss": 2.1969, + "step": 14611 + }, + { + "epoch": 2.97, + "learning_rate": 2.3216354446940247e-06, + "loss": 2.1404, + "step": 14612 + }, + { + "epoch": 2.97, + "learning_rate": 2.32052894840429e-06, + "loss": 2.0841, + "step": 14613 + }, + { + "epoch": 2.97, + "learning_rate": 2.3194226812461806e-06, + "loss": 2.1154, + "step": 14614 + }, + { + "epoch": 2.97, + "learning_rate": 2.318316643252705e-06, + "loss": 2.1532, + "step": 14615 + }, + { + "epoch": 2.97, + "learning_rate": 2.3172108344568677e-06, + "loss": 2.0927, + "step": 14616 + }, + { + "epoch": 2.97, + "learning_rate": 2.316105254891654e-06, + "loss": 1.9616, + "step": 14617 + }, + { + "epoch": 2.97, + "learning_rate": 2.314999904590063e-06, + "loss": 2.2109, + "step": 14618 + }, + { + "epoch": 2.97, + "learning_rate": 2.313894783585066e-06, + "loss": 2.0637, + "step": 14619 + }, + { + "epoch": 2.97, + "learning_rate": 2.3127898919096414e-06, + "loss": 2.081, + "step": 14620 + }, + { + "epoch": 2.97, + "learning_rate": 2.3116852295967507e-06, + "loss": 2.0491, + "step": 14621 + }, + { + "epoch": 2.97, + "learning_rate": 2.3105807966793557e-06, + "loss": 2.0898, + "step": 14622 + }, + { + "epoch": 2.97, + "learning_rate": 2.3094765931904107e-06, + "loss": 2.1744, + "step": 14623 + }, + { + "epoch": 2.97, + "learning_rate": 2.308372619162864e-06, + "loss": 2.1788, + "step": 14624 + }, + { + "epoch": 2.97, + "learning_rate": 2.307268874629649e-06, + "loss": 2.0867, + "step": 14625 + }, + { + "epoch": 2.97, + "learning_rate": 2.3061653596237e-06, + "loss": 2.122, + "step": 14626 + }, + { + "epoch": 2.97, + "learning_rate": 2.305062074177943e-06, + "loss": 2.1804, + "step": 14627 + }, + { + "epoch": 2.97, + "learning_rate": 2.3039590183253e-06, + "loss": 2.0189, + "step": 14628 + }, + { + "epoch": 2.97, + "learning_rate": 2.302856192098677e-06, + "loss": 2.0534, + "step": 14629 + }, + { + "epoch": 2.97, + "learning_rate": 2.3017535955309813e-06, + "loss": 2.1181, + "step": 14630 + }, + { + "epoch": 2.97, + "learning_rate": 2.3006512286551143e-06, + "loss": 2.1564, + "step": 14631 + }, + { + "epoch": 2.97, + "learning_rate": 2.299549091503961e-06, + "loss": 2.1593, + "step": 14632 + }, + { + "epoch": 2.97, + "learning_rate": 2.298447184110408e-06, + "loss": 2.1466, + "step": 14633 + }, + { + "epoch": 2.97, + "learning_rate": 2.2973455065073347e-06, + "loss": 2.0927, + "step": 14634 + }, + { + "epoch": 2.97, + "learning_rate": 2.2962440587276137e-06, + "loss": 2.0739, + "step": 14635 + }, + { + "epoch": 2.97, + "learning_rate": 2.295142840804102e-06, + "loss": 2.1431, + "step": 14636 + }, + { + "epoch": 2.97, + "learning_rate": 2.2940418527696605e-06, + "loss": 2.1664, + "step": 14637 + }, + { + "epoch": 2.97, + "learning_rate": 2.2929410946571395e-06, + "loss": 2.104, + "step": 14638 + }, + { + "epoch": 2.97, + "learning_rate": 2.291840566499385e-06, + "loss": 2.0377, + "step": 14639 + }, + { + "epoch": 2.97, + "learning_rate": 2.2907402683292268e-06, + "loss": 2.0668, + "step": 14640 + }, + { + "epoch": 2.97, + "learning_rate": 2.2896402001795005e-06, + "loss": 2.1396, + "step": 14641 + }, + { + "epoch": 2.97, + "learning_rate": 2.2885403620830205e-06, + "loss": 2.0832, + "step": 14642 + }, + { + "epoch": 2.97, + "learning_rate": 2.287440754072613e-06, + "loss": 2.0833, + "step": 14643 + }, + { + "epoch": 2.97, + "learning_rate": 2.2863413761810793e-06, + "loss": 2.0282, + "step": 14644 + }, + { + "epoch": 2.97, + "learning_rate": 2.2852422284412267e-06, + "loss": 2.0808, + "step": 14645 + }, + { + "epoch": 2.97, + "learning_rate": 2.2841433108858458e-06, + "loss": 2.1671, + "step": 14646 + }, + { + "epoch": 2.97, + "learning_rate": 2.2830446235477264e-06, + "loss": 2.1128, + "step": 14647 + }, + { + "epoch": 2.97, + "learning_rate": 2.2819461664596497e-06, + "loss": 2.0105, + "step": 14648 + }, + { + "epoch": 2.97, + "learning_rate": 2.280847939654395e-06, + "loss": 2.095, + "step": 14649 + }, + { + "epoch": 2.97, + "learning_rate": 2.279749943164722e-06, + "loss": 2.1101, + "step": 14650 + }, + { + "epoch": 2.97, + "learning_rate": 2.278652177023395e-06, + "loss": 2.0951, + "step": 14651 + }, + { + "epoch": 2.97, + "learning_rate": 2.277554641263169e-06, + "loss": 2.1377, + "step": 14652 + }, + { + "epoch": 2.97, + "learning_rate": 2.276457335916793e-06, + "loss": 2.119, + "step": 14653 + }, + { + "epoch": 2.97, + "learning_rate": 2.275360261017002e-06, + "loss": 2.0604, + "step": 14654 + }, + { + "epoch": 2.97, + "learning_rate": 2.2742634165965317e-06, + "loss": 2.1331, + "step": 14655 + }, + { + "epoch": 2.97, + "learning_rate": 2.2731668026881115e-06, + "loss": 2.1546, + "step": 14656 + }, + { + "epoch": 2.98, + "learning_rate": 2.2720704193244557e-06, + "loss": 2.1195, + "step": 14657 + }, + { + "epoch": 2.98, + "learning_rate": 2.2709742665382792e-06, + "loss": 2.1659, + "step": 14658 + }, + { + "epoch": 2.98, + "learning_rate": 2.2698783443622886e-06, + "loss": 2.0462, + "step": 14659 + }, + { + "epoch": 2.98, + "learning_rate": 2.2687826528291846e-06, + "loss": 2.1408, + "step": 14660 + }, + { + "epoch": 2.98, + "learning_rate": 2.2676871919716536e-06, + "loss": 2.141, + "step": 14661 + }, + { + "epoch": 2.98, + "learning_rate": 2.266591961822385e-06, + "loss": 2.0937, + "step": 14662 + }, + { + "epoch": 2.98, + "learning_rate": 2.2654969624140555e-06, + "loss": 2.116, + "step": 14663 + }, + { + "epoch": 2.98, + "learning_rate": 2.2644021937793416e-06, + "loss": 2.0839, + "step": 14664 + }, + { + "epoch": 2.98, + "learning_rate": 2.263307655950898e-06, + "loss": 2.1394, + "step": 14665 + }, + { + "epoch": 2.98, + "learning_rate": 2.262213348961393e-06, + "loss": 2.0693, + "step": 14666 + }, + { + "epoch": 2.98, + "learning_rate": 2.261119272843465e-06, + "loss": 2.1815, + "step": 14667 + }, + { + "epoch": 2.98, + "learning_rate": 2.2600254276297707e-06, + "loss": 2.1431, + "step": 14668 + }, + { + "epoch": 2.98, + "learning_rate": 2.258931813352938e-06, + "loss": 2.1464, + "step": 14669 + }, + { + "epoch": 2.98, + "learning_rate": 2.2578384300456014e-06, + "loss": 2.0206, + "step": 14670 + }, + { + "epoch": 2.98, + "learning_rate": 2.256745277740384e-06, + "loss": 2.124, + "step": 14671 + }, + { + "epoch": 2.98, + "learning_rate": 2.2556523564698994e-06, + "loss": 2.1149, + "step": 14672 + }, + { + "epoch": 2.98, + "learning_rate": 2.2545596662667578e-06, + "loss": 2.1324, + "step": 14673 + }, + { + "epoch": 2.98, + "learning_rate": 2.253467207163563e-06, + "loss": 2.1275, + "step": 14674 + }, + { + "epoch": 2.98, + "learning_rate": 2.252374979192913e-06, + "loss": 2.0992, + "step": 14675 + }, + { + "epoch": 2.98, + "learning_rate": 2.2512829823873904e-06, + "loss": 2.0961, + "step": 14676 + }, + { + "epoch": 2.98, + "learning_rate": 2.2501912167795803e-06, + "loss": 2.175, + "step": 14677 + }, + { + "epoch": 2.98, + "learning_rate": 2.2490996824020574e-06, + "loss": 2.031, + "step": 14678 + }, + { + "epoch": 2.98, + "learning_rate": 2.248008379287392e-06, + "loss": 2.1771, + "step": 14679 + }, + { + "epoch": 2.98, + "learning_rate": 2.2469173074681393e-06, + "loss": 2.1212, + "step": 14680 + }, + { + "epoch": 2.98, + "learning_rate": 2.2458264669768613e-06, + "loss": 2.1684, + "step": 14681 + }, + { + "epoch": 2.98, + "learning_rate": 2.2447358578460955e-06, + "loss": 2.169, + "step": 14682 + }, + { + "epoch": 2.98, + "learning_rate": 2.243645480108393e-06, + "loss": 2.0608, + "step": 14683 + }, + { + "epoch": 2.98, + "learning_rate": 2.24255533379628e-06, + "loss": 2.063, + "step": 14684 + }, + { + "epoch": 2.98, + "learning_rate": 2.241465418942288e-06, + "loss": 2.096, + "step": 14685 + }, + { + "epoch": 2.98, + "learning_rate": 2.240375735578928e-06, + "loss": 2.107, + "step": 14686 + }, + { + "epoch": 2.98, + "learning_rate": 2.2392862837387243e-06, + "loss": 2.1819, + "step": 14687 + }, + { + "epoch": 2.98, + "learning_rate": 2.238197063454174e-06, + "loss": 2.1153, + "step": 14688 + }, + { + "epoch": 2.98, + "learning_rate": 2.237108074757783e-06, + "loss": 2.1318, + "step": 14689 + }, + { + "epoch": 2.98, + "learning_rate": 2.236019317682037e-06, + "loss": 2.1012, + "step": 14690 + }, + { + "epoch": 2.98, + "learning_rate": 2.234930792259423e-06, + "loss": 2.2217, + "step": 14691 + }, + { + "epoch": 2.98, + "learning_rate": 2.2338424985224206e-06, + "loss": 2.1183, + "step": 14692 + }, + { + "epoch": 2.98, + "learning_rate": 2.2327544365035026e-06, + "loss": 2.054, + "step": 14693 + }, + { + "epoch": 2.98, + "learning_rate": 2.2316666062351287e-06, + "loss": 2.1028, + "step": 14694 + }, + { + "epoch": 2.98, + "learning_rate": 2.2305790077497593e-06, + "loss": 2.1555, + "step": 14695 + }, + { + "epoch": 2.98, + "learning_rate": 2.229491641079845e-06, + "loss": 2.0408, + "step": 14696 + }, + { + "epoch": 2.98, + "learning_rate": 2.228404506257832e-06, + "loss": 2.1218, + "step": 14697 + }, + { + "epoch": 2.98, + "learning_rate": 2.2273176033161502e-06, + "loss": 2.0297, + "step": 14698 + }, + { + "epoch": 2.98, + "learning_rate": 2.2262309322872343e-06, + "loss": 2.1023, + "step": 14699 + }, + { + "epoch": 2.98, + "learning_rate": 2.2251444932035094e-06, + "loss": 2.1725, + "step": 14700 + }, + { + "epoch": 2.98, + "learning_rate": 2.2240582860973848e-06, + "loss": 2.0347, + "step": 14701 + }, + { + "epoch": 2.98, + "learning_rate": 2.2229723110012737e-06, + "loss": 2.0903, + "step": 14702 + }, + { + "epoch": 2.98, + "learning_rate": 2.2218865679475765e-06, + "loss": 2.1214, + "step": 14703 + }, + { + "epoch": 2.98, + "learning_rate": 2.220801056968693e-06, + "loss": 2.1721, + "step": 14704 + }, + { + "epoch": 2.98, + "learning_rate": 2.219715778097006e-06, + "loss": 2.1104, + "step": 14705 + }, + { + "epoch": 2.99, + "learning_rate": 2.218630731364898e-06, + "loss": 2.0442, + "step": 14706 + }, + { + "epoch": 2.99, + "learning_rate": 2.217545916804744e-06, + "loss": 2.0662, + "step": 14707 + }, + { + "epoch": 2.99, + "learning_rate": 2.216461334448916e-06, + "loss": 2.0306, + "step": 14708 + }, + { + "epoch": 2.99, + "learning_rate": 2.215376984329767e-06, + "loss": 2.0528, + "step": 14709 + }, + { + "epoch": 2.99, + "learning_rate": 2.214292866479656e-06, + "loss": 2.1619, + "step": 14710 + }, + { + "epoch": 2.99, + "learning_rate": 2.2132089809309233e-06, + "loss": 2.0879, + "step": 14711 + }, + { + "epoch": 2.99, + "learning_rate": 2.2121253277159182e-06, + "loss": 2.1369, + "step": 14712 + }, + { + "epoch": 2.99, + "learning_rate": 2.211041906866965e-06, + "loss": 2.0813, + "step": 14713 + }, + { + "epoch": 2.99, + "learning_rate": 2.209958718416397e-06, + "loss": 2.1173, + "step": 14714 + }, + { + "epoch": 2.99, + "learning_rate": 2.2088757623965263e-06, + "loss": 2.0937, + "step": 14715 + }, + { + "epoch": 2.99, + "learning_rate": 2.2077930388396686e-06, + "loss": 2.0555, + "step": 14716 + }, + { + "epoch": 2.99, + "learning_rate": 2.206710547778128e-06, + "loss": 2.0993, + "step": 14717 + }, + { + "epoch": 2.99, + "learning_rate": 2.205628289244205e-06, + "loss": 2.0843, + "step": 14718 + }, + { + "epoch": 2.99, + "learning_rate": 2.2045462632701874e-06, + "loss": 2.026, + "step": 14719 + }, + { + "epoch": 2.99, + "learning_rate": 2.20346446988836e-06, + "loss": 2.099, + "step": 14720 + }, + { + "epoch": 2.99, + "learning_rate": 2.202382909131002e-06, + "loss": 2.0352, + "step": 14721 + }, + { + "epoch": 2.99, + "learning_rate": 2.2013015810303828e-06, + "loss": 2.1346, + "step": 14722 + }, + { + "epoch": 2.99, + "learning_rate": 2.200220485618768e-06, + "loss": 2.1446, + "step": 14723 + }, + { + "epoch": 2.99, + "learning_rate": 2.19913962292841e-06, + "loss": 2.1068, + "step": 14724 + }, + { + "epoch": 2.99, + "learning_rate": 2.198058992991564e-06, + "loss": 2.1567, + "step": 14725 + }, + { + "epoch": 2.99, + "learning_rate": 2.196978595840463e-06, + "loss": 2.037, + "step": 14726 + }, + { + "epoch": 2.99, + "learning_rate": 2.195898431507355e-06, + "loss": 2.0746, + "step": 14727 + }, + { + "epoch": 2.99, + "learning_rate": 2.1948185000244605e-06, + "loss": 2.1439, + "step": 14728 + }, + { + "epoch": 2.99, + "learning_rate": 2.193738801424006e-06, + "loss": 2.1148, + "step": 14729 + }, + { + "epoch": 2.99, + "learning_rate": 2.1926593357382e-06, + "loss": 2.0379, + "step": 14730 + }, + { + "epoch": 2.99, + "learning_rate": 2.1915801029992544e-06, + "loss": 2.1841, + "step": 14731 + }, + { + "epoch": 2.99, + "learning_rate": 2.190501103239372e-06, + "loss": 2.1346, + "step": 14732 + }, + { + "epoch": 2.99, + "learning_rate": 2.189422336490746e-06, + "loss": 2.0943, + "step": 14733 + }, + { + "epoch": 2.99, + "learning_rate": 2.1883438027855597e-06, + "loss": 2.1047, + "step": 14734 + }, + { + "epoch": 2.99, + "learning_rate": 2.187265502155996e-06, + "loss": 2.1267, + "step": 14735 + }, + { + "epoch": 2.99, + "learning_rate": 2.186187434634228e-06, + "loss": 2.0719, + "step": 14736 + }, + { + "epoch": 2.99, + "learning_rate": 2.1851096002524253e-06, + "loss": 2.0877, + "step": 14737 + }, + { + "epoch": 2.99, + "learning_rate": 2.1840319990427395e-06, + "loss": 2.1625, + "step": 14738 + }, + { + "epoch": 2.99, + "learning_rate": 2.1829546310373283e-06, + "loss": 2.0974, + "step": 14739 + }, + { + "epoch": 2.99, + "learning_rate": 2.1818774962683374e-06, + "loss": 2.1546, + "step": 14740 + }, + { + "epoch": 2.99, + "learning_rate": 2.180800594767901e-06, + "loss": 2.1259, + "step": 14741 + }, + { + "epoch": 2.99, + "learning_rate": 2.1797239265681535e-06, + "loss": 2.1972, + "step": 14742 + }, + { + "epoch": 2.99, + "learning_rate": 2.178647491701219e-06, + "loss": 2.1083, + "step": 14743 + }, + { + "epoch": 2.99, + "learning_rate": 2.1775712901992174e-06, + "loss": 2.182, + "step": 14744 + }, + { + "epoch": 2.99, + "learning_rate": 2.176495322094254e-06, + "loss": 2.1075, + "step": 14745 + }, + { + "epoch": 2.99, + "learning_rate": 2.1754195874184347e-06, + "loss": 2.1715, + "step": 14746 + }, + { + "epoch": 2.99, + "learning_rate": 2.174344086203857e-06, + "loss": 2.2102, + "step": 14747 + }, + { + "epoch": 2.99, + "learning_rate": 2.173268818482611e-06, + "loss": 2.111, + "step": 14748 + }, + { + "epoch": 2.99, + "learning_rate": 2.172193784286777e-06, + "loss": 2.1951, + "step": 14749 + }, + { + "epoch": 2.99, + "learning_rate": 2.171118983648435e-06, + "loss": 2.1428, + "step": 14750 + }, + { + "epoch": 2.99, + "learning_rate": 2.1700444165996436e-06, + "loss": 2.0696, + "step": 14751 + }, + { + "epoch": 2.99, + "learning_rate": 2.168970083172479e-06, + "loss": 2.1253, + "step": 14752 + }, + { + "epoch": 2.99, + "learning_rate": 2.1678959833989842e-06, + "loss": 2.0965, + "step": 14753 + }, + { + "epoch": 2.99, + "learning_rate": 2.166822117311216e-06, + "loss": 2.2124, + "step": 14754 + }, + { + "epoch": 3.0, + "learning_rate": 2.165748484941207e-06, + "loss": 2.1494, + "step": 14755 + }, + { + "epoch": 3.0, + "learning_rate": 2.1646750863209944e-06, + "loss": 2.1242, + "step": 14756 + }, + { + "epoch": 3.0, + "learning_rate": 2.1636019214826055e-06, + "loss": 2.138, + "step": 14757 + }, + { + "epoch": 3.0, + "learning_rate": 2.162528990458064e-06, + "loss": 2.0846, + "step": 14758 + }, + { + "epoch": 3.0, + "learning_rate": 2.161456293279375e-06, + "loss": 2.1017, + "step": 14759 + }, + { + "epoch": 3.0, + "learning_rate": 2.1603838299785486e-06, + "loss": 2.1497, + "step": 14760 + }, + { + "epoch": 3.0, + "learning_rate": 2.1593116005875837e-06, + "loss": 2.134, + "step": 14761 + }, + { + "epoch": 3.0, + "learning_rate": 2.158239605138476e-06, + "loss": 2.113, + "step": 14762 + }, + { + "epoch": 3.0, + "learning_rate": 2.1571678436632024e-06, + "loss": 2.0214, + "step": 14763 + }, + { + "epoch": 3.0, + "learning_rate": 2.1560963161937467e-06, + "loss": 2.0964, + "step": 14764 + }, + { + "epoch": 3.0, + "learning_rate": 2.1550250227620807e-06, + "loss": 2.0699, + "step": 14765 + }, + { + "epoch": 3.0, + "learning_rate": 2.153953963400164e-06, + "loss": 2.1253, + "step": 14766 + }, + { + "epoch": 3.0, + "learning_rate": 2.152883138139955e-06, + "loss": 2.1644, + "step": 14767 + }, + { + "epoch": 3.0, + "learning_rate": 2.1518125470134053e-06, + "loss": 2.1019, + "step": 14768 + }, + { + "epoch": 3.0, + "learning_rate": 2.1507421900524605e-06, + "loss": 2.0925, + "step": 14769 + }, + { + "epoch": 3.0, + "learning_rate": 2.1496720672890516e-06, + "loss": 2.1227, + "step": 14770 + }, + { + "epoch": 3.0, + "learning_rate": 2.1486021787551092e-06, + "loss": 2.1656, + "step": 14771 + }, + { + "epoch": 3.0, + "learning_rate": 2.1475325244825564e-06, + "loss": 2.1942, + "step": 14772 + }, + { + "epoch": 3.0, + "learning_rate": 2.14646310450331e-06, + "loss": 2.0894, + "step": 14773 + }, + { + "epoch": 3.0, + "learning_rate": 2.1453939188492755e-06, + "loss": 2.0779, + "step": 14774 + }, + { + "epoch": 3.0, + "learning_rate": 2.1443249675523536e-06, + "loss": 2.0937, + "step": 14775 + }, + { + "epoch": 3.0, + "learning_rate": 2.14325625064444e-06, + "loss": 2.1542, + "step": 14776 + }, + { + "epoch": 3.0, + "learning_rate": 2.1421877681574254e-06, + "loss": 2.0984, + "step": 14777 + }, + { + "epoch": 3.0, + "learning_rate": 2.1411195201231825e-06, + "loss": 2.0979, + "step": 14778 + }, + { + "epoch": 3.0, + "learning_rate": 2.1400515065735884e-06, + "loss": 2.1272, + "step": 14779 + }, + { + "epoch": 3.0, + "learning_rate": 2.13898372754051e-06, + "loss": 2.122, + "step": 14780 + }, + { + "epoch": 3.0, + "learning_rate": 2.1379161830558083e-06, + "loss": 2.1669, + "step": 14781 + }, + { + "epoch": 3.0, + "learning_rate": 2.1368488731513302e-06, + "loss": 2.1829, + "step": 14782 + }, + { + "epoch": 3.0, + "learning_rate": 2.1357817978589235e-06, + "loss": 2.0819, + "step": 14783 + }, + { + "epoch": 3.0, + "learning_rate": 2.1347149572104297e-06, + "loss": 2.0329, + "step": 14784 + }, + { + "epoch": 3.0, + "learning_rate": 2.1336483512376736e-06, + "loss": 2.1933, + "step": 14785 + }, + { + "epoch": 3.0, + "learning_rate": 2.1325819799724833e-06, + "loss": 2.1515, + "step": 14786 + }, + { + "epoch": 3.0, + "learning_rate": 2.1315158434466767e-06, + "loss": 2.0986, + "step": 14787 + }, + { + "epoch": 3.0, + "learning_rate": 2.1304499416920642e-06, + "loss": 2.1167, + "step": 14788 + }, + { + "epoch": 3.0, + "learning_rate": 2.1293842747404447e-06, + "loss": 2.0934, + "step": 14789 + }, + { + "epoch": 3.0, + "learning_rate": 2.128318842623618e-06, + "loss": 2.0947, + "step": 14790 + }, + { + "epoch": 3.0, + "learning_rate": 2.1272536453733715e-06, + "loss": 2.1416, + "step": 14791 + }, + { + "epoch": 3.0, + "learning_rate": 2.126188683021493e-06, + "loss": 2.1315, + "step": 14792 + }, + { + "epoch": 3.0, + "learning_rate": 2.1251239555997493e-06, + "loss": 2.1767, + "step": 14793 + }, + { + "epoch": 3.0, + "learning_rate": 2.124059463139916e-06, + "loss": 2.1686, + "step": 14794 + }, + { + "epoch": 3.0, + "learning_rate": 2.1229952056737445e-06, + "loss": 2.0639, + "step": 14795 + }, + { + "epoch": 3.0, + "learning_rate": 2.1219311832330002e-06, + "loss": 2.1577, + "step": 14796 + }, + { + "epoch": 3.0, + "learning_rate": 2.120867395849423e-06, + "loss": 2.1231, + "step": 14797 + }, + { + "epoch": 3.0, + "learning_rate": 2.1198038435547584e-06, + "loss": 2.0717, + "step": 14798 + }, + { + "epoch": 3.0, + "learning_rate": 2.1187405263807337e-06, + "loss": 2.1597, + "step": 14799 + }, + { + "epoch": 3.0, + "learning_rate": 2.1176774443590763e-06, + "loss": 2.0939, + "step": 14800 + }, + { + "epoch": 3.0, + "learning_rate": 2.116614597521508e-06, + "loss": 2.2164, + "step": 14801 + }, + { + "epoch": 3.0, + "learning_rate": 2.1155519858997408e-06, + "loss": 2.1255, + "step": 14802 + }, + { + "epoch": 3.0, + "learning_rate": 2.1144896095254774e-06, + "loss": 2.205, + "step": 14803 + }, + { + "epoch": 3.0, + "learning_rate": 2.1134274684304156e-06, + "loss": 2.085, + "step": 14804 + }, + { + "epoch": 3.01, + "learning_rate": 2.112365562646248e-06, + "loss": 2.0876, + "step": 14805 + }, + { + "epoch": 3.01, + "learning_rate": 2.1113038922046603e-06, + "loss": 2.1165, + "step": 14806 + }, + { + "epoch": 3.01, + "learning_rate": 2.1102424571373247e-06, + "loss": 2.1234, + "step": 14807 + }, + { + "epoch": 3.01, + "learning_rate": 2.1091812574759154e-06, + "loss": 2.0504, + "step": 14808 + }, + { + "epoch": 3.01, + "learning_rate": 2.108120293252094e-06, + "loss": 2.0931, + "step": 14809 + }, + { + "epoch": 3.01, + "learning_rate": 2.1070595644975145e-06, + "loss": 2.1766, + "step": 14810 + }, + { + "epoch": 3.01, + "learning_rate": 2.1059990712438273e-06, + "loss": 2.0968, + "step": 14811 + }, + { + "epoch": 3.01, + "learning_rate": 2.1049388135226744e-06, + "loss": 2.2237, + "step": 14812 + }, + { + "epoch": 3.01, + "learning_rate": 2.1038787913656935e-06, + "loss": 2.0514, + "step": 14813 + }, + { + "epoch": 3.01, + "learning_rate": 2.102819004804506e-06, + "loss": 2.063, + "step": 14814 + }, + { + "epoch": 3.01, + "learning_rate": 2.1017594538707364e-06, + "loss": 2.0775, + "step": 14815 + }, + { + "epoch": 3.01, + "learning_rate": 2.100700138595998e-06, + "loss": 2.0269, + "step": 14816 + }, + { + "epoch": 3.01, + "learning_rate": 2.0996410590118997e-06, + "loss": 2.1338, + "step": 14817 + }, + { + "epoch": 3.01, + "learning_rate": 2.0985822151500367e-06, + "loss": 2.0712, + "step": 14818 + }, + { + "epoch": 3.01, + "learning_rate": 2.097523607042007e-06, + "loss": 2.0611, + "step": 14819 + }, + { + "epoch": 3.01, + "learning_rate": 2.096465234719386e-06, + "loss": 2.0831, + "step": 14820 + }, + { + "epoch": 3.01, + "learning_rate": 2.0954070982137666e-06, + "loss": 2.1346, + "step": 14821 + }, + { + "epoch": 3.01, + "learning_rate": 2.0943491975567087e-06, + "loss": 2.0965, + "step": 14822 + }, + { + "epoch": 3.01, + "learning_rate": 2.093291532779782e-06, + "loss": 2.062, + "step": 14823 + }, + { + "epoch": 3.01, + "learning_rate": 2.0922341039145468e-06, + "loss": 2.1809, + "step": 14824 + }, + { + "epoch": 3.01, + "learning_rate": 2.091176910992545e-06, + "loss": 2.125, + "step": 14825 + }, + { + "epoch": 3.01, + "learning_rate": 2.0901199540453254e-06, + "loss": 2.1044, + "step": 14826 + }, + { + "epoch": 3.01, + "learning_rate": 2.0890632331044247e-06, + "loss": 2.0795, + "step": 14827 + }, + { + "epoch": 3.01, + "learning_rate": 2.088006748201372e-06, + "loss": 2.1322, + "step": 14828 + }, + { + "epoch": 3.01, + "learning_rate": 2.086950499367687e-06, + "loss": 2.0118, + "step": 14829 + }, + { + "epoch": 3.01, + "learning_rate": 2.0858944866348875e-06, + "loss": 2.134, + "step": 14830 + }, + { + "epoch": 3.01, + "learning_rate": 2.0848387100344793e-06, + "loss": 2.0855, + "step": 14831 + }, + { + "epoch": 3.01, + "learning_rate": 2.083783169597968e-06, + "loss": 2.1235, + "step": 14832 + }, + { + "epoch": 3.01, + "learning_rate": 2.0827278653568416e-06, + "loss": 2.1147, + "step": 14833 + }, + { + "epoch": 3.01, + "learning_rate": 2.0816727973425933e-06, + "loss": 2.1175, + "step": 14834 + }, + { + "epoch": 3.01, + "learning_rate": 2.0806179655866943e-06, + "loss": 2.0827, + "step": 14835 + }, + { + "epoch": 3.01, + "learning_rate": 2.0795633701206275e-06, + "loss": 2.1695, + "step": 14836 + }, + { + "epoch": 3.01, + "learning_rate": 2.078509010975852e-06, + "loss": 2.1722, + "step": 14837 + }, + { + "epoch": 3.01, + "learning_rate": 2.077454888183832e-06, + "loss": 2.1979, + "step": 14838 + }, + { + "epoch": 3.01, + "learning_rate": 2.0764010017760138e-06, + "loss": 2.1108, + "step": 14839 + }, + { + "epoch": 3.01, + "learning_rate": 2.075347351783844e-06, + "loss": 2.079, + "step": 14840 + }, + { + "epoch": 3.01, + "learning_rate": 2.0742939382387604e-06, + "loss": 2.099, + "step": 14841 + }, + { + "epoch": 3.01, + "learning_rate": 2.073240761172197e-06, + "loss": 2.1083, + "step": 14842 + }, + { + "epoch": 3.01, + "learning_rate": 2.072187820615572e-06, + "loss": 2.103, + "step": 14843 + }, + { + "epoch": 3.01, + "learning_rate": 2.0711351166003035e-06, + "loss": 2.1423, + "step": 14844 + }, + { + "epoch": 3.01, + "learning_rate": 2.0700826491578018e-06, + "loss": 2.1763, + "step": 14845 + }, + { + "epoch": 3.01, + "learning_rate": 2.069030418319472e-06, + "loss": 2.113, + "step": 14846 + }, + { + "epoch": 3.01, + "learning_rate": 2.0679784241167034e-06, + "loss": 2.135, + "step": 14847 + }, + { + "epoch": 3.01, + "learning_rate": 2.066926666580887e-06, + "loss": 2.1671, + "step": 14848 + }, + { + "epoch": 3.01, + "learning_rate": 2.065875145743407e-06, + "loss": 2.1333, + "step": 14849 + }, + { + "epoch": 3.01, + "learning_rate": 2.064823861635633e-06, + "loss": 2.148, + "step": 14850 + }, + { + "epoch": 3.01, + "learning_rate": 2.0637728142889334e-06, + "loss": 2.1348, + "step": 14851 + }, + { + "epoch": 3.01, + "learning_rate": 2.0627220037346686e-06, + "loss": 2.1293, + "step": 14852 + }, + { + "epoch": 3.01, + "learning_rate": 2.061671430004194e-06, + "loss": 2.1427, + "step": 14853 + }, + { + "epoch": 3.02, + "learning_rate": 2.0606210931288506e-06, + "loss": 2.0569, + "step": 14854 + }, + { + "epoch": 3.02, + "learning_rate": 2.0595709931399797e-06, + "loss": 2.1135, + "step": 14855 + }, + { + "epoch": 3.02, + "learning_rate": 2.058521130068912e-06, + "loss": 2.1591, + "step": 14856 + }, + { + "epoch": 3.02, + "learning_rate": 2.057471503946977e-06, + "loss": 2.0667, + "step": 14857 + }, + { + "epoch": 3.02, + "learning_rate": 2.0564221148054863e-06, + "loss": 2.138, + "step": 14858 + }, + { + "epoch": 3.02, + "learning_rate": 2.0553729626757546e-06, + "loss": 2.1104, + "step": 14859 + }, + { + "epoch": 3.02, + "learning_rate": 2.054324047589078e-06, + "loss": 2.1266, + "step": 14860 + }, + { + "epoch": 3.02, + "learning_rate": 2.0532753695767627e-06, + "loss": 2.186, + "step": 14861 + }, + { + "epoch": 3.02, + "learning_rate": 2.0522269286700914e-06, + "loss": 2.1417, + "step": 14862 + }, + { + "epoch": 3.02, + "learning_rate": 2.0511787249003514e-06, + "loss": 2.1611, + "step": 14863 + }, + { + "epoch": 3.02, + "learning_rate": 2.0501307582988096e-06, + "loss": 2.0869, + "step": 14864 + }, + { + "epoch": 3.02, + "learning_rate": 2.0490830288967443e-06, + "loss": 2.0273, + "step": 14865 + }, + { + "epoch": 3.02, + "learning_rate": 2.048035536725409e-06, + "loss": 2.1176, + "step": 14866 + }, + { + "epoch": 3.02, + "learning_rate": 2.0469882818160627e-06, + "loss": 2.1405, + "step": 14867 + }, + { + "epoch": 3.02, + "learning_rate": 2.0459412641999476e-06, + "loss": 2.0991, + "step": 14868 + }, + { + "epoch": 3.02, + "learning_rate": 2.0448944839083053e-06, + "loss": 2.0826, + "step": 14869 + }, + { + "epoch": 3.02, + "learning_rate": 2.043847940972369e-06, + "loss": 2.1628, + "step": 14870 + }, + { + "epoch": 3.02, + "learning_rate": 2.042801635423366e-06, + "loss": 2.133, + "step": 14871 + }, + { + "epoch": 3.02, + "learning_rate": 2.0417555672925105e-06, + "loss": 2.0874, + "step": 14872 + }, + { + "epoch": 3.02, + "learning_rate": 2.040709736611016e-06, + "loss": 2.109, + "step": 14873 + }, + { + "epoch": 3.02, + "learning_rate": 2.0396641434100882e-06, + "loss": 2.116, + "step": 14874 + }, + { + "epoch": 3.02, + "learning_rate": 2.0386187877209228e-06, + "loss": 2.0713, + "step": 14875 + }, + { + "epoch": 3.02, + "learning_rate": 2.037573669574713e-06, + "loss": 2.0912, + "step": 14876 + }, + { + "epoch": 3.02, + "learning_rate": 2.0365287890026364e-06, + "loss": 2.1433, + "step": 14877 + }, + { + "epoch": 3.02, + "learning_rate": 2.0354841460358754e-06, + "loss": 2.0477, + "step": 14878 + }, + { + "epoch": 3.02, + "learning_rate": 2.0344397407055896e-06, + "loss": 2.0763, + "step": 14879 + }, + { + "epoch": 3.02, + "learning_rate": 2.033395573042952e-06, + "loss": 2.1635, + "step": 14880 + }, + { + "epoch": 3.02, + "learning_rate": 2.03235164307911e-06, + "loss": 2.1104, + "step": 14881 + }, + { + "epoch": 3.02, + "learning_rate": 2.0313079508452162e-06, + "loss": 2.1973, + "step": 14882 + }, + { + "epoch": 3.02, + "learning_rate": 2.0302644963724038e-06, + "loss": 2.1387, + "step": 14883 + }, + { + "epoch": 3.02, + "learning_rate": 2.0292212796918122e-06, + "loss": 2.066, + "step": 14884 + }, + { + "epoch": 3.02, + "learning_rate": 2.028178300834566e-06, + "loss": 2.111, + "step": 14885 + }, + { + "epoch": 3.02, + "learning_rate": 2.0271355598317876e-06, + "loss": 2.1565, + "step": 14886 + }, + { + "epoch": 3.02, + "learning_rate": 2.026093056714583e-06, + "loss": 2.0975, + "step": 14887 + }, + { + "epoch": 3.02, + "learning_rate": 2.0250507915140603e-06, + "loss": 2.071, + "step": 14888 + }, + { + "epoch": 3.02, + "learning_rate": 2.024008764261318e-06, + "loss": 2.1022, + "step": 14889 + }, + { + "epoch": 3.02, + "learning_rate": 2.022966974987449e-06, + "loss": 2.1464, + "step": 14890 + }, + { + "epoch": 3.02, + "learning_rate": 2.021925423723533e-06, + "loss": 2.1265, + "step": 14891 + }, + { + "epoch": 3.02, + "learning_rate": 2.020884110500647e-06, + "loss": 2.1107, + "step": 14892 + }, + { + "epoch": 3.02, + "learning_rate": 2.0198430353498653e-06, + "loss": 2.1058, + "step": 14893 + }, + { + "epoch": 3.02, + "learning_rate": 2.0188021983022453e-06, + "loss": 2.1261, + "step": 14894 + }, + { + "epoch": 3.02, + "learning_rate": 2.017761599388842e-06, + "loss": 2.1151, + "step": 14895 + }, + { + "epoch": 3.02, + "learning_rate": 2.0167212386407074e-06, + "loss": 2.0974, + "step": 14896 + }, + { + "epoch": 3.02, + "learning_rate": 2.0156811160888835e-06, + "loss": 2.1079, + "step": 14897 + }, + { + "epoch": 3.02, + "learning_rate": 2.014641231764398e-06, + "loss": 2.0975, + "step": 14898 + }, + { + "epoch": 3.02, + "learning_rate": 2.0136015856982827e-06, + "loss": 2.1983, + "step": 14899 + }, + { + "epoch": 3.02, + "learning_rate": 2.0125621779215552e-06, + "loss": 2.1504, + "step": 14900 + }, + { + "epoch": 3.02, + "learning_rate": 2.0115230084652326e-06, + "loss": 2.0882, + "step": 14901 + }, + { + "epoch": 3.02, + "learning_rate": 2.0104840773603153e-06, + "loss": 2.0858, + "step": 14902 + }, + { + "epoch": 3.03, + "learning_rate": 2.009445384637805e-06, + "loss": 2.0893, + "step": 14903 + }, + { + "epoch": 3.03, + "learning_rate": 2.008406930328687e-06, + "loss": 2.0765, + "step": 14904 + }, + { + "epoch": 3.03, + "learning_rate": 2.0073687144639554e-06, + "loss": 2.1522, + "step": 14905 + }, + { + "epoch": 3.03, + "learning_rate": 2.0063307370745797e-06, + "loss": 2.0862, + "step": 14906 + }, + { + "epoch": 3.03, + "learning_rate": 2.005292998191536e-06, + "loss": 2.1093, + "step": 14907 + }, + { + "epoch": 3.03, + "learning_rate": 2.004255497845781e-06, + "loss": 2.1776, + "step": 14908 + }, + { + "epoch": 3.03, + "learning_rate": 2.0032182360682715e-06, + "loss": 2.0894, + "step": 14909 + }, + { + "epoch": 3.03, + "learning_rate": 2.00218121288996e-06, + "loss": 2.1429, + "step": 14910 + }, + { + "epoch": 3.03, + "learning_rate": 2.0011444283417867e-06, + "loss": 2.0048, + "step": 14911 + }, + { + "epoch": 3.03, + "learning_rate": 2.0001078824546836e-06, + "loss": 2.0664, + "step": 14912 + }, + { + "epoch": 3.03, + "learning_rate": 1.99907157525958e-06, + "loss": 2.0657, + "step": 14913 + }, + { + "epoch": 3.03, + "learning_rate": 1.998035506787396e-06, + "loss": 2.1521, + "step": 14914 + }, + { + "epoch": 3.03, + "learning_rate": 1.996999677069046e-06, + "loss": 2.1635, + "step": 14915 + }, + { + "epoch": 3.03, + "learning_rate": 1.9959640861354325e-06, + "loss": 2.1167, + "step": 14916 + }, + { + "epoch": 3.03, + "learning_rate": 1.994928734017456e-06, + "loss": 2.0585, + "step": 14917 + }, + { + "epoch": 3.03, + "learning_rate": 1.9938936207460114e-06, + "loss": 2.1549, + "step": 14918 + }, + { + "epoch": 3.03, + "learning_rate": 1.992858746351978e-06, + "loss": 2.1374, + "step": 14919 + }, + { + "epoch": 3.03, + "learning_rate": 1.991824110866235e-06, + "loss": 2.1493, + "step": 14920 + }, + { + "epoch": 3.03, + "learning_rate": 1.990789714319653e-06, + "loss": 2.2047, + "step": 14921 + }, + { + "epoch": 3.03, + "learning_rate": 1.989755556743099e-06, + "loss": 2.0894, + "step": 14922 + }, + { + "epoch": 3.03, + "learning_rate": 1.9887216381674225e-06, + "loss": 2.1314, + "step": 14923 + }, + { + "epoch": 3.03, + "learning_rate": 1.9876879586234753e-06, + "loss": 2.025, + "step": 14924 + }, + { + "epoch": 3.03, + "learning_rate": 1.9866545181420995e-06, + "loss": 2.1354, + "step": 14925 + }, + { + "epoch": 3.03, + "learning_rate": 1.9856213167541316e-06, + "loss": 2.1566, + "step": 14926 + }, + { + "epoch": 3.03, + "learning_rate": 1.9845883544903956e-06, + "loss": 2.0241, + "step": 14927 + }, + { + "epoch": 3.03, + "learning_rate": 1.983555631381713e-06, + "loss": 2.1454, + "step": 14928 + }, + { + "epoch": 3.03, + "learning_rate": 1.9825231474588978e-06, + "loss": 2.111, + "step": 14929 + }, + { + "epoch": 3.03, + "learning_rate": 1.9814909027527584e-06, + "loss": 2.0598, + "step": 14930 + }, + { + "epoch": 3.03, + "learning_rate": 1.9804588972940884e-06, + "loss": 2.1623, + "step": 14931 + }, + { + "epoch": 3.03, + "learning_rate": 1.9794271311136827e-06, + "loss": 2.0758, + "step": 14932 + }, + { + "epoch": 3.03, + "learning_rate": 1.978395604242328e-06, + "loss": 2.1552, + "step": 14933 + }, + { + "epoch": 3.03, + "learning_rate": 1.977364316710797e-06, + "loss": 2.1433, + "step": 14934 + }, + { + "epoch": 3.03, + "learning_rate": 1.9763332685498625e-06, + "loss": 2.0272, + "step": 14935 + }, + { + "epoch": 3.03, + "learning_rate": 1.975302459790289e-06, + "loss": 2.1266, + "step": 14936 + }, + { + "epoch": 3.03, + "learning_rate": 1.9742718904628334e-06, + "loss": 2.1227, + "step": 14937 + }, + { + "epoch": 3.03, + "learning_rate": 1.973241560598241e-06, + "loss": 2.1578, + "step": 14938 + }, + { + "epoch": 3.03, + "learning_rate": 1.9722114702272554e-06, + "loss": 2.0664, + "step": 14939 + }, + { + "epoch": 3.03, + "learning_rate": 1.971181619380611e-06, + "loss": 2.0618, + "step": 14940 + }, + { + "epoch": 3.03, + "learning_rate": 1.970152008089039e-06, + "loss": 2.1347, + "step": 14941 + }, + { + "epoch": 3.03, + "learning_rate": 1.969122636383255e-06, + "loss": 2.1417, + "step": 14942 + }, + { + "epoch": 3.03, + "learning_rate": 1.9680935042939755e-06, + "loss": 2.1363, + "step": 14943 + }, + { + "epoch": 3.03, + "learning_rate": 1.9670646118519e-06, + "loss": 2.183, + "step": 14944 + }, + { + "epoch": 3.03, + "learning_rate": 1.966035959087738e-06, + "loss": 2.1507, + "step": 14945 + }, + { + "epoch": 3.03, + "learning_rate": 1.9650075460321727e-06, + "loss": 2.0845, + "step": 14946 + }, + { + "epoch": 3.03, + "learning_rate": 1.963979372715895e-06, + "loss": 2.1524, + "step": 14947 + }, + { + "epoch": 3.03, + "learning_rate": 1.962951439169577e-06, + "loss": 2.1949, + "step": 14948 + }, + { + "epoch": 3.03, + "learning_rate": 1.9619237454238903e-06, + "loss": 2.1271, + "step": 14949 + }, + { + "epoch": 3.03, + "learning_rate": 1.9608962915095e-06, + "loss": 2.0771, + "step": 14950 + }, + { + "epoch": 3.03, + "learning_rate": 1.959869077457064e-06, + "loss": 2.167, + "step": 14951 + }, + { + "epoch": 3.04, + "learning_rate": 1.958842103297226e-06, + "loss": 2.0398, + "step": 14952 + }, + { + "epoch": 3.04, + "learning_rate": 1.9578153690606292e-06, + "loss": 2.1548, + "step": 14953 + }, + { + "epoch": 3.04, + "learning_rate": 1.9567888747779105e-06, + "loss": 2.0937, + "step": 14954 + }, + { + "epoch": 3.04, + "learning_rate": 1.955762620479699e-06, + "loss": 2.0618, + "step": 14955 + }, + { + "epoch": 3.04, + "learning_rate": 1.954736606196608e-06, + "loss": 2.1256, + "step": 14956 + }, + { + "epoch": 3.04, + "learning_rate": 1.9537108319592547e-06, + "loss": 2.0505, + "step": 14957 + }, + { + "epoch": 3.04, + "learning_rate": 1.9526852977982446e-06, + "loss": 2.2141, + "step": 14958 + }, + { + "epoch": 3.04, + "learning_rate": 1.95166000374418e-06, + "loss": 2.0001, + "step": 14959 + }, + { + "epoch": 3.04, + "learning_rate": 1.9506349498276465e-06, + "loss": 2.1599, + "step": 14960 + }, + { + "epoch": 3.04, + "learning_rate": 1.9496101360792317e-06, + "loss": 2.1625, + "step": 14961 + }, + { + "epoch": 3.04, + "learning_rate": 1.948585562529515e-06, + "loss": 2.1283, + "step": 14962 + }, + { + "epoch": 3.04, + "learning_rate": 1.9475612292090605e-06, + "loss": 2.1959, + "step": 14963 + }, + { + "epoch": 3.04, + "learning_rate": 1.9465371361484353e-06, + "loss": 2.1228, + "step": 14964 + }, + { + "epoch": 3.04, + "learning_rate": 1.945513283378194e-06, + "loss": 2.2096, + "step": 14965 + }, + { + "epoch": 3.04, + "learning_rate": 1.9444896709288887e-06, + "loss": 2.1309, + "step": 14966 + }, + { + "epoch": 3.04, + "learning_rate": 1.943466298831055e-06, + "loss": 2.0708, + "step": 14967 + }, + { + "epoch": 3.04, + "learning_rate": 1.942443167115229e-06, + "loss": 2.1013, + "step": 14968 + }, + { + "epoch": 3.04, + "learning_rate": 1.9414202758119392e-06, + "loss": 2.1325, + "step": 14969 + }, + { + "epoch": 3.04, + "learning_rate": 1.940397624951709e-06, + "loss": 2.0882, + "step": 14970 + }, + { + "epoch": 3.04, + "learning_rate": 1.9393752145650422e-06, + "loss": 2.1725, + "step": 14971 + }, + { + "epoch": 3.04, + "learning_rate": 1.938353044682454e-06, + "loss": 2.0476, + "step": 14972 + }, + { + "epoch": 3.04, + "learning_rate": 1.937331115334431e-06, + "loss": 2.1071, + "step": 14973 + }, + { + "epoch": 3.04, + "learning_rate": 1.9363094265514783e-06, + "loss": 2.0597, + "step": 14974 + }, + { + "epoch": 3.04, + "learning_rate": 1.9352879783640697e-06, + "loss": 2.107, + "step": 14975 + }, + { + "epoch": 3.04, + "learning_rate": 1.934266770802685e-06, + "loss": 2.1287, + "step": 14976 + }, + { + "epoch": 3.04, + "learning_rate": 1.9332458038977975e-06, + "loss": 2.1668, + "step": 14977 + }, + { + "epoch": 3.04, + "learning_rate": 1.9322250776798648e-06, + "loss": 2.1585, + "step": 14978 + }, + { + "epoch": 3.04, + "learning_rate": 1.9312045921793422e-06, + "loss": 2.0699, + "step": 14979 + }, + { + "epoch": 3.04, + "learning_rate": 1.9301843474266812e-06, + "loss": 2.2169, + "step": 14980 + }, + { + "epoch": 3.04, + "learning_rate": 1.929164343452322e-06, + "loss": 2.0893, + "step": 14981 + }, + { + "epoch": 3.04, + "learning_rate": 1.9281445802866962e-06, + "loss": 2.0537, + "step": 14982 + }, + { + "epoch": 3.04, + "learning_rate": 1.9271250579602307e-06, + "loss": 2.1847, + "step": 14983 + }, + { + "epoch": 3.04, + "learning_rate": 1.9261057765033462e-06, + "loss": 2.1194, + "step": 14984 + }, + { + "epoch": 3.04, + "learning_rate": 1.9250867359464575e-06, + "loss": 2.0531, + "step": 14985 + }, + { + "epoch": 3.04, + "learning_rate": 1.9240679363199642e-06, + "loss": 2.1447, + "step": 14986 + }, + { + "epoch": 3.04, + "learning_rate": 1.923049377654268e-06, + "loss": 2.1093, + "step": 14987 + }, + { + "epoch": 3.04, + "learning_rate": 1.922031059979754e-06, + "loss": 2.1465, + "step": 14988 + }, + { + "epoch": 3.04, + "learning_rate": 1.921012983326814e-06, + "loss": 2.1859, + "step": 14989 + }, + { + "epoch": 3.04, + "learning_rate": 1.9199951477258184e-06, + "loss": 2.1011, + "step": 14990 + }, + { + "epoch": 3.04, + "learning_rate": 1.9189775532071397e-06, + "loss": 2.1602, + "step": 14991 + }, + { + "epoch": 3.04, + "learning_rate": 1.9179601998011354e-06, + "loss": 2.1435, + "step": 14992 + }, + { + "epoch": 3.04, + "learning_rate": 1.9169430875381636e-06, + "loss": 2.1487, + "step": 14993 + }, + { + "epoch": 3.04, + "learning_rate": 1.915926216448569e-06, + "loss": 2.0969, + "step": 14994 + }, + { + "epoch": 3.04, + "learning_rate": 1.9149095865626975e-06, + "loss": 2.0387, + "step": 14995 + }, + { + "epoch": 3.04, + "learning_rate": 1.9138931979108766e-06, + "loss": 2.0908, + "step": 14996 + }, + { + "epoch": 3.04, + "learning_rate": 1.912877050523433e-06, + "loss": 2.039, + "step": 14997 + }, + { + "epoch": 3.04, + "learning_rate": 1.9118611444306865e-06, + "loss": 2.1243, + "step": 14998 + }, + { + "epoch": 3.04, + "learning_rate": 1.910845479662952e-06, + "loss": 2.1866, + "step": 14999 + }, + { + "epoch": 3.04, + "learning_rate": 1.9098300562505266e-06, + "loss": 1.98, + "step": 15000 + }, + { + "epoch": 3.04, + "step": 15000, + "total_flos": 1.946614537125888e+19, + "train_loss": 2.1418474456548693, + "train_runtime": 225797.9012, + "train_samples_per_second": 8.503, + "train_steps_per_second": 0.066 + } + ], + "max_steps": 15000, + "num_train_epochs": 4, + "total_flos": 1.946614537125888e+19, + "trial_name": null, + "trial_params": null +}