{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9974450689831375, "eval_steps": 500, "global_step": 244, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4999999999999999e-05, "loss": 2.0754, "step": 1 }, { "epoch": 0.01, "learning_rate": 2.9999999999999997e-05, "loss": 2.1342, "step": 2 }, { "epoch": 0.01, "learning_rate": 4.4999999999999996e-05, "loss": 2.1035, "step": 3 }, { "epoch": 0.02, "learning_rate": 5.9999999999999995e-05, "loss": 2.09, "step": 4 }, { "epoch": 0.02, "learning_rate": 7.5e-05, "loss": 2.0643, "step": 5 }, { "epoch": 0.02, "learning_rate": 8.999999999999999e-05, "loss": 2.124, "step": 6 }, { "epoch": 0.03, "learning_rate": 0.00010499999999999999, "loss": 2.1867, "step": 7 }, { "epoch": 0.03, "learning_rate": 0.00011999999999999999, "loss": 2.175, "step": 8 }, { "epoch": 0.04, "learning_rate": 0.000135, "loss": 2.0837, "step": 9 }, { "epoch": 0.04, "learning_rate": 0.00015, "loss": 2.0431, "step": 10 }, { "epoch": 0.04, "learning_rate": 0.00014999838015426563, "loss": 2.0665, "step": 11 }, { "epoch": 0.05, "learning_rate": 0.00014999352068703324, "loss": 2.0668, "step": 12 }, { "epoch": 0.05, "learning_rate": 0.0001499854218082118, "loss": 2.1312, "step": 13 }, { "epoch": 0.06, "learning_rate": 0.00014997408386763957, "loss": 2.0758, "step": 14 }, { "epoch": 0.06, "learning_rate": 0.00014995950735506895, "loss": 2.1173, "step": 15 }, { "epoch": 0.07, "learning_rate": 0.00014994169290014528, "loss": 2.0044, "step": 16 }, { "epoch": 0.07, "learning_rate": 0.00014992064127237976, "loss": 2.0054, "step": 17 }, { "epoch": 0.07, "learning_rate": 0.00014989635338111612, "loss": 2.0509, "step": 18 }, { "epoch": 0.08, "learning_rate": 0.0001498688302754913, "loss": 2.1071, "step": 19 }, { "epoch": 0.08, "learning_rate": 0.0001498380731443903, "loss": 1.9549, "step": 20 }, { "epoch": 0.09, "learning_rate": 0.00014980408331639463, "loss": 1.895, "step": 21 }, { "epoch": 0.09, "learning_rate": 0.000149766862259725, "loss": 1.9979, "step": 22 }, { "epoch": 0.09, "learning_rate": 0.000149726411582178, "loss": 2.0151, "step": 23 }, { "epoch": 0.1, "learning_rate": 0.00014968273303105645, "loss": 1.9236, "step": 24 }, { "epoch": 0.1, "learning_rate": 0.0001496358284930941, "loss": 1.9613, "step": 25 }, { "epoch": 0.11, "learning_rate": 0.00014958569999437403, "loss": 1.9975, "step": 26 }, { "epoch": 0.11, "learning_rate": 0.00014953234970024114, "loss": 1.9804, "step": 27 }, { "epoch": 0.11, "learning_rate": 0.00014947577991520874, "loss": 1.9211, "step": 28 }, { "epoch": 0.12, "learning_rate": 0.00014941599308285872, "loss": 2.0278, "step": 29 }, { "epoch": 0.12, "learning_rate": 0.0001493529917857364, "loss": 1.9682, "step": 30 }, { "epoch": 0.13, "learning_rate": 0.0001492867787452386, "loss": 1.8728, "step": 31 }, { "epoch": 0.13, "learning_rate": 0.00014921735682149628, "loss": 2.0241, "step": 32 }, { "epoch": 0.13, "learning_rate": 0.00014914472901325095, "loss": 1.9737, "step": 33 }, { "epoch": 0.14, "learning_rate": 0.00014906889845772516, "loss": 1.9329, "step": 34 }, { "epoch": 0.14, "learning_rate": 0.00014898986843048698, "loss": 1.9861, "step": 35 }, { "epoch": 0.15, "learning_rate": 0.00014890764234530847, "loss": 1.8872, "step": 36 }, { "epoch": 0.15, "learning_rate": 0.00014882222375401822, "loss": 1.9282, "step": 37 }, { "epoch": 0.16, "learning_rate": 0.00014873361634634805, "loss": 2.0487, "step": 38 }, { "epoch": 0.16, "learning_rate": 0.00014864182394977337, "loss": 1.9907, "step": 39 }, { "epoch": 0.16, "learning_rate": 0.0001485468505293482, "loss": 2.0665, "step": 40 }, { "epoch": 0.17, "learning_rate": 0.00014844870018753355, "loss": 2.0757, "step": 41 }, { "epoch": 0.17, "learning_rate": 0.00014834737716402043, "loss": 1.9599, "step": 42 }, { "epoch": 0.18, "learning_rate": 0.0001482428858355466, "loss": 1.9655, "step": 43 }, { "epoch": 0.18, "learning_rate": 0.0001481352307157077, "loss": 1.9234, "step": 44 }, { "epoch": 0.18, "learning_rate": 0.00014802441645476192, "loss": 2.0089, "step": 45 }, { "epoch": 0.19, "learning_rate": 0.00014791044783942956, "loss": 1.94, "step": 46 }, { "epoch": 0.19, "learning_rate": 0.0001477933297926859, "loss": 1.9925, "step": 47 }, { "epoch": 0.2, "learning_rate": 0.00014767306737354885, "loss": 1.9783, "step": 48 }, { "epoch": 0.2, "learning_rate": 0.00014754966577686007, "loss": 1.97, "step": 49 }, { "epoch": 0.2, "learning_rate": 0.000147423130333061, "loss": 1.9398, "step": 50 }, { "epoch": 0.21, "learning_rate": 0.00014729346650796223, "loss": 1.963, "step": 51 }, { "epoch": 0.21, "learning_rate": 0.00014716067990250758, "loss": 1.9428, "step": 52 }, { "epoch": 0.22, "learning_rate": 0.0001470247762525322, "loss": 1.8047, "step": 53 }, { "epoch": 0.22, "learning_rate": 0.00014688576142851467, "loss": 1.9507, "step": 54 }, { "epoch": 0.22, "learning_rate": 0.00014674364143532352, "loss": 1.9653, "step": 55 }, { "epoch": 0.23, "learning_rate": 0.0001465984224119578, "loss": 1.9433, "step": 56 }, { "epoch": 0.23, "learning_rate": 0.00014645011063128192, "loss": 1.9655, "step": 57 }, { "epoch": 0.24, "learning_rate": 0.0001462987124997547, "loss": 2.0388, "step": 58 }, { "epoch": 0.24, "learning_rate": 0.00014614423455715263, "loss": 1.8811, "step": 59 }, { "epoch": 0.25, "learning_rate": 0.00014598668347628733, "loss": 1.956, "step": 60 }, { "epoch": 0.25, "learning_rate": 0.00014582606606271736, "loss": 1.9236, "step": 61 }, { "epoch": 0.25, "learning_rate": 0.0001456623892544542, "loss": 1.9662, "step": 62 }, { "epoch": 0.26, "learning_rate": 0.00014549566012166275, "loss": 1.9648, "step": 63 }, { "epoch": 0.26, "learning_rate": 0.00014532588586635558, "loss": 1.913, "step": 64 }, { "epoch": 0.27, "learning_rate": 0.00014515307382208215, "loss": 2.0282, "step": 65 }, { "epoch": 0.27, "learning_rate": 0.00014497723145361183, "loss": 2.0331, "step": 66 }, { "epoch": 0.27, "learning_rate": 0.0001447983663566116, "loss": 2.1089, "step": 67 }, { "epoch": 0.28, "learning_rate": 0.00014461648625731783, "loss": 2.0694, "step": 68 }, { "epoch": 0.28, "learning_rate": 0.00014443159901220262, "loss": 1.9098, "step": 69 }, { "epoch": 0.29, "learning_rate": 0.0001442437126076343, "loss": 1.8582, "step": 70 }, { "epoch": 0.29, "learning_rate": 0.00014405283515953277, "loss": 1.8942, "step": 71 }, { "epoch": 0.29, "learning_rate": 0.00014385897491301844, "loss": 1.9396, "step": 72 }, { "epoch": 0.3, "learning_rate": 0.00014366214024205654, "loss": 1.9624, "step": 73 }, { "epoch": 0.3, "learning_rate": 0.00014346233964909508, "loss": 1.9123, "step": 74 }, { "epoch": 0.31, "learning_rate": 0.00014325958176469777, "loss": 1.9146, "step": 75 }, { "epoch": 0.31, "learning_rate": 0.0001430538753471711, "loss": 1.9646, "step": 76 }, { "epoch": 0.31, "learning_rate": 0.00014284522928218612, "loss": 1.8241, "step": 77 }, { "epoch": 0.32, "learning_rate": 0.0001426336525823945, "loss": 1.968, "step": 78 }, { "epoch": 0.32, "learning_rate": 0.00014241915438703928, "loss": 1.8584, "step": 79 }, { "epoch": 0.33, "learning_rate": 0.00014220174396156014, "loss": 1.9979, "step": 80 }, { "epoch": 0.33, "learning_rate": 0.00014198143069719306, "loss": 1.9972, "step": 81 }, { "epoch": 0.34, "learning_rate": 0.00014175822411056476, "loss": 1.8241, "step": 82 }, { "epoch": 0.34, "learning_rate": 0.00014153213384328158, "loss": 1.9249, "step": 83 }, { "epoch": 0.34, "learning_rate": 0.00014130316966151296, "loss": 2.076, "step": 84 }, { "epoch": 0.35, "learning_rate": 0.00014107134145556968, "loss": 1.9242, "step": 85 }, { "epoch": 0.35, "learning_rate": 0.00014083665923947652, "loss": 1.9075, "step": 86 }, { "epoch": 0.36, "learning_rate": 0.0001405991331505398, "loss": 1.9701, "step": 87 }, { "epoch": 0.36, "learning_rate": 0.00014035877344890945, "loss": 1.9521, "step": 88 }, { "epoch": 0.36, "learning_rate": 0.0001401155905171358, "loss": 1.9354, "step": 89 }, { "epoch": 0.37, "learning_rate": 0.00013986959485972112, "loss": 2.0025, "step": 90 }, { "epoch": 0.37, "learning_rate": 0.0001396207971026658, "loss": 1.9459, "step": 91 }, { "epoch": 0.38, "learning_rate": 0.0001393692079930095, "loss": 1.9798, "step": 92 }, { "epoch": 0.38, "learning_rate": 0.00013911483839836676, "loss": 1.8871, "step": 93 }, { "epoch": 0.38, "learning_rate": 0.00013885769930645767, "loss": 1.9457, "step": 94 }, { "epoch": 0.39, "learning_rate": 0.0001385978018246332, "loss": 1.9094, "step": 95 }, { "epoch": 0.39, "learning_rate": 0.00013833515717939538, "loss": 1.9651, "step": 96 }, { "epoch": 0.4, "learning_rate": 0.00013806977671591245, "loss": 2.0097, "step": 97 }, { "epoch": 0.4, "learning_rate": 0.00013780167189752872, "loss": 1.9944, "step": 98 }, { "epoch": 0.4, "learning_rate": 0.00013753085430526945, "loss": 1.8378, "step": 99 }, { "epoch": 0.41, "learning_rate": 0.0001372573356373405, "loss": 1.9796, "step": 100 }, { "epoch": 0.41, "learning_rate": 0.00013698112770862319, "loss": 1.8866, "step": 101 }, { "epoch": 0.42, "learning_rate": 0.00013670224245016375, "loss": 2.0077, "step": 102 }, { "epoch": 0.42, "learning_rate": 0.00013642069190865808, "loss": 1.8807, "step": 103 }, { "epoch": 0.43, "learning_rate": 0.00013613648824593137, "loss": 2.0118, "step": 104 }, { "epoch": 0.43, "learning_rate": 0.0001358496437384127, "loss": 1.9495, "step": 105 }, { "epoch": 0.43, "learning_rate": 0.0001355601707766048, "loss": 1.927, "step": 106 }, { "epoch": 0.44, "learning_rate": 0.0001352680818645488, "loss": 1.9418, "step": 107 }, { "epoch": 0.44, "learning_rate": 0.00013497338961928406, "loss": 1.9557, "step": 108 }, { "epoch": 0.45, "learning_rate": 0.00013467610677030337, "loss": 1.9239, "step": 109 }, { "epoch": 0.45, "learning_rate": 0.0001343762461590028, "loss": 2.0298, "step": 110 }, { "epoch": 0.45, "learning_rate": 0.00013407382073812724, "loss": 1.9778, "step": 111 }, { "epoch": 0.46, "learning_rate": 0.00013376884357121075, "loss": 1.8697, "step": 112 }, { "epoch": 0.46, "learning_rate": 0.00013346132783201233, "loss": 1.8659, "step": 113 }, { "epoch": 0.47, "learning_rate": 0.0001331512868039469, "loss": 1.9608, "step": 114 }, { "epoch": 0.47, "learning_rate": 0.00013283873387951142, "loss": 1.9881, "step": 115 }, { "epoch": 0.47, "learning_rate": 0.0001325236825597065, "loss": 1.8963, "step": 116 }, { "epoch": 0.48, "learning_rate": 0.00013220614645345304, "loss": 1.9821, "step": 117 }, { "epoch": 0.48, "learning_rate": 0.00013188613927700462, "loss": 1.8786, "step": 118 }, { "epoch": 0.49, "learning_rate": 0.00013156367485335483, "loss": 1.9391, "step": 119 }, { "epoch": 0.49, "learning_rate": 0.0001312387671116402, "loss": 1.8749, "step": 120 }, { "epoch": 0.49, "learning_rate": 0.00013091143008653864, "loss": 1.9711, "step": 121 }, { "epoch": 0.5, "learning_rate": 0.000130581677917663, "loss": 1.9973, "step": 122 }, { "epoch": 0.5, "learning_rate": 0.00013024952484895047, "loss": 1.9736, "step": 123 }, { "epoch": 0.51, "learning_rate": 0.00012991498522804725, "loss": 1.9482, "step": 124 }, { "epoch": 0.51, "learning_rate": 0.0001295780735056887, "loss": 1.8969, "step": 125 }, { "epoch": 0.52, "learning_rate": 0.0001292388042350753, "loss": 1.8856, "step": 126 }, { "epoch": 0.52, "learning_rate": 0.00012889719207124386, "loss": 1.896, "step": 127 }, { "epoch": 0.52, "learning_rate": 0.00012855325177043455, "loss": 1.9585, "step": 128 }, { "epoch": 0.53, "learning_rate": 0.00012820699818945344, "loss": 2.0194, "step": 129 }, { "epoch": 0.53, "learning_rate": 0.00012785844628503088, "loss": 2.0445, "step": 130 }, { "epoch": 0.54, "learning_rate": 0.00012750761111317527, "loss": 1.9695, "step": 131 }, { "epoch": 0.54, "learning_rate": 0.00012715450782852281, "loss": 1.9849, "step": 132 }, { "epoch": 0.54, "learning_rate": 0.00012679915168368276, "loss": 1.9426, "step": 133 }, { "epoch": 0.55, "learning_rate": 0.00012644155802857878, "loss": 1.8934, "step": 134 }, { "epoch": 0.55, "learning_rate": 0.00012608174230978572, "loss": 1.9616, "step": 135 }, { "epoch": 0.56, "learning_rate": 0.00012571972006986237, "loss": 1.926, "step": 136 }, { "epoch": 0.56, "learning_rate": 0.00012535550694668034, "loss": 1.9333, "step": 137 }, { "epoch": 0.56, "learning_rate": 0.00012498911867274816, "loss": 1.953, "step": 138 }, { "epoch": 0.57, "learning_rate": 0.0001246205710745321, "loss": 1.9753, "step": 139 }, { "epoch": 0.57, "learning_rate": 0.0001242498800717723, "loss": 1.8901, "step": 140 }, { "epoch": 0.58, "learning_rate": 0.00012387706167679507, "loss": 1.8817, "step": 141 }, { "epoch": 0.58, "learning_rate": 0.00012350213199382147, "loss": 1.939, "step": 142 }, { "epoch": 0.58, "learning_rate": 0.0001231251072182714, "loss": 1.9321, "step": 143 }, { "epoch": 0.59, "learning_rate": 0.00012274600363606418, "loss": 2.0088, "step": 144 }, { "epoch": 0.59, "learning_rate": 0.00012236483762291504, "loss": 1.9488, "step": 145 }, { "epoch": 0.6, "learning_rate": 0.00012198162564362771, "loss": 2.0998, "step": 146 }, { "epoch": 0.6, "learning_rate": 0.00012159638425138327, "loss": 1.8385, "step": 147 }, { "epoch": 0.61, "learning_rate": 0.00012120913008702508, "loss": 1.9502, "step": 148 }, { "epoch": 0.61, "learning_rate": 0.00012081987987833996, "loss": 1.9521, "step": 149 }, { "epoch": 0.61, "learning_rate": 0.00012042865043933565, "loss": 2.01, "step": 150 }, { "epoch": 0.62, "learning_rate": 0.00012003545866951448, "loss": 1.945, "step": 151 }, { "epoch": 0.62, "learning_rate": 0.00011964032155314345, "loss": 1.9757, "step": 152 }, { "epoch": 0.63, "learning_rate": 0.00011924325615852046, "loss": 1.9674, "step": 153 }, { "epoch": 0.63, "learning_rate": 0.00011884427963723716, "loss": 1.938, "step": 154 }, { "epoch": 0.63, "learning_rate": 0.00011844340922343792, "loss": 2.0233, "step": 155 }, { "epoch": 0.64, "learning_rate": 0.0001180406622330756, "loss": 2.0303, "step": 156 }, { "epoch": 0.64, "learning_rate": 0.00011763605606316337, "loss": 1.9139, "step": 157 }, { "epoch": 0.65, "learning_rate": 0.0001172296081910233, "loss": 1.8695, "step": 158 }, { "epoch": 0.65, "learning_rate": 0.00011682133617353145, "loss": 1.9399, "step": 159 }, { "epoch": 0.65, "learning_rate": 0.00011641125764635947, "loss": 1.9003, "step": 160 }, { "epoch": 0.66, "learning_rate": 0.00011599939032321271, "loss": 1.9971, "step": 161 }, { "epoch": 0.66, "learning_rate": 0.00011558575199506527, "loss": 2.0124, "step": 162 }, { "epoch": 0.67, "learning_rate": 0.00011517036052939132, "loss": 1.9316, "step": 163 }, { "epoch": 0.67, "learning_rate": 0.00011475323386939331, "loss": 2.0054, "step": 164 }, { "epoch": 0.67, "learning_rate": 0.00011433439003322706, "loss": 1.7991, "step": 165 }, { "epoch": 0.68, "learning_rate": 0.00011391384711322323, "loss": 1.9603, "step": 166 }, { "epoch": 0.68, "learning_rate": 0.00011349162327510597, "loss": 1.8788, "step": 167 }, { "epoch": 0.69, "learning_rate": 0.00011306773675720816, "loss": 1.8611, "step": 168 }, { "epoch": 0.69, "learning_rate": 0.00011264220586968362, "loss": 2.0317, "step": 169 }, { "epoch": 0.69, "learning_rate": 0.00011221504899371616, "loss": 1.8378, "step": 170 }, { "epoch": 0.7, "learning_rate": 0.00011178628458072557, "loss": 1.9995, "step": 171 }, { "epoch": 0.7, "learning_rate": 0.00011135593115157072, "loss": 1.8996, "step": 172 }, { "epoch": 0.71, "learning_rate": 0.00011092400729574934, "loss": 1.8958, "step": 173 }, { "epoch": 0.71, "learning_rate": 0.00011049053167059518, "loss": 2.0009, "step": 174 }, { "epoch": 0.72, "learning_rate": 0.00011005552300047206, "loss": 1.9154, "step": 175 }, { "epoch": 0.72, "learning_rate": 0.00010961900007596499, "loss": 1.9606, "step": 176 }, { "epoch": 0.72, "learning_rate": 0.00010918098175306857, "loss": 1.8051, "step": 177 }, { "epoch": 0.73, "learning_rate": 0.00010874148695237245, "loss": 1.9606, "step": 178 }, { "epoch": 0.73, "learning_rate": 0.00010830053465824405, "loss": 2.0241, "step": 179 }, { "epoch": 0.74, "learning_rate": 0.0001078581439180085, "loss": 1.8999, "step": 180 }, { "epoch": 0.74, "learning_rate": 0.0001074143338411259, "loss": 1.8892, "step": 181 }, { "epoch": 0.74, "learning_rate": 0.00010696912359836585, "loss": 1.9695, "step": 182 }, { "epoch": 0.75, "learning_rate": 0.00010652253242097937, "loss": 1.9828, "step": 183 }, { "epoch": 0.75, "learning_rate": 0.00010607457959986809, "loss": 1.899, "step": 184 }, { "epoch": 0.76, "learning_rate": 0.0001056252844847512, "loss": 2.0682, "step": 185 }, { "epoch": 0.76, "learning_rate": 0.00010517466648332935, "loss": 1.8665, "step": 186 }, { "epoch": 0.76, "learning_rate": 0.00010472274506044646, "loss": 1.9554, "step": 187 }, { "epoch": 0.77, "learning_rate": 0.00010426953973724894, "loss": 2.0293, "step": 188 }, { "epoch": 0.77, "learning_rate": 0.00010381507009034231, "loss": 1.9689, "step": 189 }, { "epoch": 0.78, "learning_rate": 0.00010335935575094584, "loss": 1.9393, "step": 190 }, { "epoch": 0.78, "learning_rate": 0.0001029024164040442, "loss": 1.8898, "step": 191 }, { "epoch": 0.78, "learning_rate": 0.00010244427178753751, "loss": 1.9101, "step": 192 }, { "epoch": 0.79, "learning_rate": 0.00010198494169138847, "loss": 2.0164, "step": 193 }, { "epoch": 0.79, "learning_rate": 0.0001015244459567677, "loss": 1.8392, "step": 194 }, { "epoch": 0.8, "learning_rate": 0.00010106280447519656, "loss": 1.9871, "step": 195 }, { "epoch": 0.8, "learning_rate": 0.00010060003718768793, "loss": 1.9528, "step": 196 }, { "epoch": 0.81, "learning_rate": 0.000100136164083885, "loss": 2.0022, "step": 197 }, { "epoch": 0.81, "learning_rate": 9.967120520119751e-05, "loss": 2.0728, "step": 198 }, { "epoch": 0.81, "learning_rate": 9.920518062393657e-05, "loss": 1.9121, "step": 199 }, { "epoch": 0.82, "learning_rate": 9.873811048244671e-05, "loss": 1.8309, "step": 200 }, { "epoch": 0.82, "learning_rate": 9.82700149522367e-05, "loss": 1.8731, "step": 201 }, { "epoch": 0.83, "learning_rate": 9.780091425310777e-05, "loss": 1.9089, "step": 202 }, { "epoch": 0.83, "learning_rate": 9.733082864828036e-05, "loss": 1.8871, "step": 203 }, { "epoch": 0.83, "learning_rate": 9.685977844351884e-05, "loss": 1.932, "step": 204 }, { "epoch": 0.84, "learning_rate": 9.638778398625421e-05, "loss": 1.8818, "step": 205 }, { "epoch": 0.84, "learning_rate": 9.59148656647054e-05, "loss": 2.0086, "step": 206 }, { "epoch": 0.85, "learning_rate": 9.54410439069984e-05, "loss": 1.9073, "step": 207 }, { "epoch": 0.85, "learning_rate": 9.496633918028397e-05, "loss": 1.868, "step": 208 }, { "epoch": 0.85, "learning_rate": 9.44907719898535e-05, "loss": 1.9662, "step": 209 }, { "epoch": 0.86, "learning_rate": 9.401436287825326e-05, "loss": 1.8944, "step": 210 }, { "epoch": 0.86, "learning_rate": 9.3537132424397e-05, "loss": 1.9545, "step": 211 }, { "epoch": 0.87, "learning_rate": 9.305910124267716e-05, "loss": 2.0606, "step": 212 }, { "epoch": 0.87, "learning_rate": 9.258028998207425e-05, "loss": 1.9157, "step": 213 }, { "epoch": 0.87, "learning_rate": 9.210071932526506e-05, "loss": 1.965, "step": 214 }, { "epoch": 0.88, "learning_rate": 9.162040998772913e-05, "loss": 1.9563, "step": 215 }, { "epoch": 0.88, "learning_rate": 9.113938271685392e-05, "loss": 1.8179, "step": 216 }, { "epoch": 0.89, "learning_rate": 9.065765829103874e-05, "loss": 1.9514, "step": 217 }, { "epoch": 0.89, "learning_rate": 9.01752575187971e-05, "loss": 1.9616, "step": 218 }, { "epoch": 0.9, "learning_rate": 8.969220123785786e-05, "loss": 1.9105, "step": 219 }, { "epoch": 0.9, "learning_rate": 8.920851031426521e-05, "loss": 1.9349, "step": 220 }, { "epoch": 0.9, "learning_rate": 8.872420564147722e-05, "loss": 1.902, "step": 221 }, { "epoch": 0.91, "learning_rate": 8.823930813946351e-05, "loss": 2.0122, "step": 222 }, { "epoch": 0.91, "learning_rate": 8.775383875380138e-05, "loss": 1.9486, "step": 223 }, { "epoch": 0.92, "learning_rate": 8.72678184547712e-05, "loss": 1.9728, "step": 224 }, { "epoch": 0.92, "learning_rate": 8.678126823645052e-05, "loss": 1.919, "step": 225 }, { "epoch": 0.92, "learning_rate": 8.62942091158072e-05, "loss": 1.8079, "step": 226 }, { "epoch": 0.93, "learning_rate": 8.580666213179165e-05, "loss": 1.9733, "step": 227 }, { "epoch": 0.93, "learning_rate": 8.531864834442792e-05, "loss": 1.9641, "step": 228 }, { "epoch": 0.94, "learning_rate": 8.483018883390405e-05, "loss": 2.0346, "step": 229 }, { "epoch": 0.94, "learning_rate": 8.434130469966152e-05, "loss": 1.919, "step": 230 }, { "epoch": 0.94, "learning_rate": 8.385201705948375e-05, "loss": 1.9831, "step": 231 }, { "epoch": 0.95, "learning_rate": 8.336234704858398e-05, "loss": 2.0287, "step": 232 }, { "epoch": 0.95, "learning_rate": 8.287231581869235e-05, "loss": 2.0557, "step": 233 }, { "epoch": 0.96, "learning_rate": 8.238194453714209e-05, "loss": 1.8527, "step": 234 }, { "epoch": 0.96, "learning_rate": 8.189125438595531e-05, "loss": 1.8578, "step": 235 }, { "epoch": 0.96, "learning_rate": 8.140026656092794e-05, "loss": 1.9493, "step": 236 }, { "epoch": 0.97, "learning_rate": 8.090900227071425e-05, "loss": 1.9758, "step": 237 }, { "epoch": 0.97, "learning_rate": 8.04174827359106e-05, "loss": 2.0026, "step": 238 }, { "epoch": 0.98, "learning_rate": 7.992572918813897e-05, "loss": 1.9079, "step": 239 }, { "epoch": 0.98, "learning_rate": 7.943376286912961e-05, "loss": 2.0169, "step": 240 }, { "epoch": 0.99, "learning_rate": 7.894160502980368e-05, "loss": 1.937, "step": 241 }, { "epoch": 0.99, "learning_rate": 7.844927692935528e-05, "loss": 1.912, "step": 242 }, { "epoch": 0.99, "learning_rate": 7.795679983433301e-05, "loss": 1.9314, "step": 243 }, { "epoch": 1.0, "learning_rate": 7.74641950177214e-05, "loss": 1.8367, "step": 244 } ], "logging_steps": 1, "max_steps": 488, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 244, "total_flos": 3.2149768164228465e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }