|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.687523259161477, |
|
"global_step": 810000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9824459158667857e-05, |
|
"loss": 1.0594, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.964891831733571e-05, |
|
"loss": 1.0381, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.947337747600357e-05, |
|
"loss": 1.014, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9297836634671426e-05, |
|
"loss": 0.9997, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.912229579333928e-05, |
|
"loss": 0.9936, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8946754952007134e-05, |
|
"loss": 0.9765, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.877121411067499e-05, |
|
"loss": 0.975, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.859567326934284e-05, |
|
"loss": 0.9627, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.84201324280107e-05, |
|
"loss": 0.9641, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.8244591586678564e-05, |
|
"loss": 0.9562, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.806905074534642e-05, |
|
"loss": 0.9634, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.789350990401427e-05, |
|
"loss": 0.9532, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.771796906268213e-05, |
|
"loss": 0.9539, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.754242822134998e-05, |
|
"loss": 0.9599, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.7366887380017835e-05, |
|
"loss": 0.9437, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.7191346538685696e-05, |
|
"loss": 0.9395, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.701580569735355e-05, |
|
"loss": 0.9385, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.6840264856021404e-05, |
|
"loss": 0.947, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.666472401468926e-05, |
|
"loss": 0.9402, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.648918317335711e-05, |
|
"loss": 0.9317, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.631364233202497e-05, |
|
"loss": 0.9316, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.613810149069283e-05, |
|
"loss": 0.9399, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.596256064936068e-05, |
|
"loss": 0.9395, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.5787019808028536e-05, |
|
"loss": 0.9303, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.561147896669639e-05, |
|
"loss": 0.9304, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.5435938125364244e-05, |
|
"loss": 0.9126, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.5260397284032105e-05, |
|
"loss": 0.9165, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.5084856442699966e-05, |
|
"loss": 0.9311, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 4.490931560136782e-05, |
|
"loss": 0.9139, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 4.4733774760035675e-05, |
|
"loss": 0.9097, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.455823391870353e-05, |
|
"loss": 0.9086, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.438269307737138e-05, |
|
"loss": 0.9172, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.420715223603924e-05, |
|
"loss": 0.9106, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.40316113947071e-05, |
|
"loss": 0.9001, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.385607055337495e-05, |
|
"loss": 0.9108, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.3680529712042806e-05, |
|
"loss": 0.911, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.350498887071066e-05, |
|
"loss": 0.9007, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.3329448029378515e-05, |
|
"loss": 0.8908, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.315390718804637e-05, |
|
"loss": 0.897, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.297836634671423e-05, |
|
"loss": 0.902, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.2802825505382084e-05, |
|
"loss": 0.8918, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.262728466404994e-05, |
|
"loss": 0.889, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.245174382271779e-05, |
|
"loss": 0.8929, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.2276202981385646e-05, |
|
"loss": 0.8974, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.210066214005351e-05, |
|
"loss": 0.8932, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.192512129872136e-05, |
|
"loss": 0.8901, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.174958045738922e-05, |
|
"loss": 0.8849, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.1574039616057077e-05, |
|
"loss": 0.8801, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.139849877472493e-05, |
|
"loss": 0.8807, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.1222957933392785e-05, |
|
"loss": 0.8847, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.104741709206064e-05, |
|
"loss": 0.8753, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.08718762507285e-05, |
|
"loss": 0.8764, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.0696335409396354e-05, |
|
"loss": 0.8748, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.052079456806421e-05, |
|
"loss": 0.8789, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 4.034525372673206e-05, |
|
"loss": 0.875, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.016971288539992e-05, |
|
"loss": 0.8711, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.999417204406777e-05, |
|
"loss": 0.8688, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.981863120273563e-05, |
|
"loss": 0.8594, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.9643090361403486e-05, |
|
"loss": 0.8595, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.946754952007134e-05, |
|
"loss": 0.8536, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.9292008678739194e-05, |
|
"loss": 0.8618, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 3.911646783740705e-05, |
|
"loss": 0.8511, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.894092699607491e-05, |
|
"loss": 0.8543, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.8765386154742763e-05, |
|
"loss": 0.856, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.8589845313410624e-05, |
|
"loss": 0.8608, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.841430447207848e-05, |
|
"loss": 0.8449, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.823876363074633e-05, |
|
"loss": 0.8527, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.806322278941419e-05, |
|
"loss": 0.8441, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.788768194808204e-05, |
|
"loss": 0.8436, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.77121411067499e-05, |
|
"loss": 0.8448, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.7536600265417756e-05, |
|
"loss": 0.8348, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.736105942408561e-05, |
|
"loss": 0.8466, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 3.7185518582753464e-05, |
|
"loss": 0.8354, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.700997774142132e-05, |
|
"loss": 0.8421, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.683443690008917e-05, |
|
"loss": 0.8354, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.6658896058757034e-05, |
|
"loss": 0.8358, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.648335521742489e-05, |
|
"loss": 0.8414, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.630781437609274e-05, |
|
"loss": 0.8428, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.6132273534760596e-05, |
|
"loss": 0.8303, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.595673269342845e-05, |
|
"loss": 0.834, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.578119185209631e-05, |
|
"loss": 0.8311, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.5605651010764165e-05, |
|
"loss": 0.8245, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 3.5430110169432026e-05, |
|
"loss": 0.8211, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.525456932809988e-05, |
|
"loss": 0.8154, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.5079028486767735e-05, |
|
"loss": 0.8196, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 3.490348764543559e-05, |
|
"loss": 0.8188, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.472794680410344e-05, |
|
"loss": 0.81, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.45524059627713e-05, |
|
"loss": 0.8054, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.437686512143916e-05, |
|
"loss": 0.8156, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.420132428010701e-05, |
|
"loss": 0.8032, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.4025783438774866e-05, |
|
"loss": 0.8109, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.385024259744272e-05, |
|
"loss": 0.812, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.3674701756110575e-05, |
|
"loss": 0.8102, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.3499160914778436e-05, |
|
"loss": 0.8032, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.332362007344629e-05, |
|
"loss": 0.8098, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.3148079232114144e-05, |
|
"loss": 0.8057, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 3.2972538390782e-05, |
|
"loss": 0.8026, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.279699754944985e-05, |
|
"loss": 0.8039, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 3.262145670811771e-05, |
|
"loss": 0.8024, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.244591586678557e-05, |
|
"loss": 0.8018, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.227037502545343e-05, |
|
"loss": 0.7953, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.209483418412128e-05, |
|
"loss": 0.7895, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 3.191929334278914e-05, |
|
"loss": 0.7878, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.174375250145699e-05, |
|
"loss": 0.794, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.1568211660124845e-05, |
|
"loss": 0.7998, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 3.13926708187927e-05, |
|
"loss": 0.7819, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.121712997746056e-05, |
|
"loss": 0.7917, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 3.1041589136128414e-05, |
|
"loss": 0.7827, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.086604829479627e-05, |
|
"loss": 0.7874, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.069050745346412e-05, |
|
"loss": 0.7788, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.051496661213198e-05, |
|
"loss": 0.7821, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 3.0339425770799834e-05, |
|
"loss": 0.7828, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.016388492946769e-05, |
|
"loss": 0.7769, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 2.9988344088135546e-05, |
|
"loss": 0.7779, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 2.98128032468034e-05, |
|
"loss": 0.7633, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 2.963726240547126e-05, |
|
"loss": 0.765, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 2.9461721564139115e-05, |
|
"loss": 0.7614, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.9286180722806973e-05, |
|
"loss": 0.7669, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.9110639881474827e-05, |
|
"loss": 0.7721, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.893509904014268e-05, |
|
"loss": 0.7633, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 2.875955819881054e-05, |
|
"loss": 0.7618, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 2.8584017357478393e-05, |
|
"loss": 0.7603, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.8408476516146247e-05, |
|
"loss": 0.7594, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.8232935674814104e-05, |
|
"loss": 0.7612, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.805739483348196e-05, |
|
"loss": 0.7616, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 2.7881853992149816e-05, |
|
"loss": 0.7628, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2.770631315081767e-05, |
|
"loss": 0.7638, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.7530772309485525e-05, |
|
"loss": 0.7469, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 2.7355231468153382e-05, |
|
"loss": 0.7477, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 2.7179690626821236e-05, |
|
"loss": 0.7501, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 2.700414978548909e-05, |
|
"loss": 0.7513, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.6828608944156948e-05, |
|
"loss": 0.751, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 2.6653068102824802e-05, |
|
"loss": 0.7491, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 2.6477527261492663e-05, |
|
"loss": 0.7516, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.6301986420160517e-05, |
|
"loss": 0.7511, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"eval_loss": 0.6953830122947693, |
|
"eval_runtime": 12.1778, |
|
"eval_samples_per_second": 82.116, |
|
"eval_steps_per_second": 10.265, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.6126445578828375e-05, |
|
"loss": 0.7395, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_loss": 0.6852219104766846, |
|
"eval_runtime": 12.2181, |
|
"eval_samples_per_second": 81.846, |
|
"eval_steps_per_second": 10.231, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 2.595090473749623e-05, |
|
"loss": 0.7445, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_loss": 0.7140026688575745, |
|
"eval_runtime": 12.2246, |
|
"eval_samples_per_second": 81.802, |
|
"eval_steps_per_second": 10.225, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 2.5775363896164083e-05, |
|
"loss": 0.7448, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_loss": 0.678001880645752, |
|
"eval_runtime": 12.1996, |
|
"eval_samples_per_second": 81.97, |
|
"eval_steps_per_second": 10.246, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.559982305483194e-05, |
|
"loss": 0.7392, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 0.6525120139122009, |
|
"eval_runtime": 12.2116, |
|
"eval_samples_per_second": 81.889, |
|
"eval_steps_per_second": 10.236, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.5424282213499795e-05, |
|
"loss": 0.7499, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 0.6543171405792236, |
|
"eval_runtime": 12.2207, |
|
"eval_samples_per_second": 81.828, |
|
"eval_steps_per_second": 10.229, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.524874137216765e-05, |
|
"loss": 0.7393, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 0.665242075920105, |
|
"eval_runtime": 5.3758, |
|
"eval_samples_per_second": 186.02, |
|
"eval_steps_per_second": 23.253, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 2.5073200530835506e-05, |
|
"loss": 0.7324, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 0.6618428826332092, |
|
"eval_runtime": 5.3906, |
|
"eval_samples_per_second": 185.507, |
|
"eval_steps_per_second": 23.188, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 2.489765968950336e-05, |
|
"loss": 0.7261, |
|
"step": 715000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 0.6423526406288147, |
|
"eval_runtime": 5.3892, |
|
"eval_samples_per_second": 185.555, |
|
"eval_steps_per_second": 23.194, |
|
"step": 715000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.4722118848171215e-05, |
|
"loss": 0.7327, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"eval_loss": 0.6585870385169983, |
|
"eval_runtime": 5.3853, |
|
"eval_samples_per_second": 185.69, |
|
"eval_steps_per_second": 23.211, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.4546578006839072e-05, |
|
"loss": 0.7265, |
|
"step": 725000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 0.6187921762466431, |
|
"eval_runtime": 5.3825, |
|
"eval_samples_per_second": 185.787, |
|
"eval_steps_per_second": 23.223, |
|
"step": 725000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 2.4371037165506926e-05, |
|
"loss": 0.7247, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 0.6582339406013489, |
|
"eval_runtime": 5.3823, |
|
"eval_samples_per_second": 185.796, |
|
"eval_steps_per_second": 23.224, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 2.419549632417478e-05, |
|
"loss": 0.7265, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"eval_loss": 0.7226254940032959, |
|
"eval_runtime": 5.3797, |
|
"eval_samples_per_second": 185.883, |
|
"eval_steps_per_second": 23.235, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 2.401995548284264e-05, |
|
"loss": 0.7166, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 0.6698991656303406, |
|
"eval_runtime": 5.3768, |
|
"eval_samples_per_second": 185.986, |
|
"eval_steps_per_second": 23.248, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 2.3844414641510496e-05, |
|
"loss": 0.7214, |
|
"step": 745000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"eval_loss": 0.6653444170951843, |
|
"eval_runtime": 5.3777, |
|
"eval_samples_per_second": 185.954, |
|
"eval_steps_per_second": 23.244, |
|
"step": 745000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 2.366887380017835e-05, |
|
"loss": 0.7268, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_loss": 0.6490678787231445, |
|
"eval_runtime": 5.3962, |
|
"eval_samples_per_second": 185.314, |
|
"eval_steps_per_second": 23.164, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 2.3493332958846207e-05, |
|
"loss": 0.7177, |
|
"step": 755000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"eval_loss": 0.6720253825187683, |
|
"eval_runtime": 5.3813, |
|
"eval_samples_per_second": 185.828, |
|
"eval_steps_per_second": 23.229, |
|
"step": 755000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 2.331779211751406e-05, |
|
"loss": 0.7173, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_loss": 0.636309027671814, |
|
"eval_runtime": 5.3741, |
|
"eval_samples_per_second": 186.079, |
|
"eval_steps_per_second": 23.26, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 2.3142251276181916e-05, |
|
"loss": 0.7222, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"eval_loss": 0.6736326813697815, |
|
"eval_runtime": 5.3844, |
|
"eval_samples_per_second": 185.723, |
|
"eval_steps_per_second": 23.215, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 2.2966710434849773e-05, |
|
"loss": 0.7189, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 0.6502253413200378, |
|
"eval_runtime": 5.3808, |
|
"eval_samples_per_second": 185.846, |
|
"eval_steps_per_second": 23.231, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 2.2791169593517627e-05, |
|
"loss": 0.7142, |
|
"step": 775000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"eval_loss": 0.6675522327423096, |
|
"eval_runtime": 5.3769, |
|
"eval_samples_per_second": 185.982, |
|
"eval_steps_per_second": 23.248, |
|
"step": 775000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 2.2615628752185485e-05, |
|
"loss": 0.7123, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"eval_loss": 0.7307547330856323, |
|
"eval_runtime": 5.3752, |
|
"eval_samples_per_second": 186.041, |
|
"eval_steps_per_second": 23.255, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 2.2440087910853343e-05, |
|
"loss": 0.7149, |
|
"step": 785000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"eval_loss": 0.6528046727180481, |
|
"eval_runtime": 5.3874, |
|
"eval_samples_per_second": 185.618, |
|
"eval_steps_per_second": 23.202, |
|
"step": 785000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 2.2264547069521197e-05, |
|
"loss": 0.7111, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 0.6415424942970276, |
|
"eval_runtime": 5.3848, |
|
"eval_samples_per_second": 185.708, |
|
"eval_steps_per_second": 23.213, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 2.208900622818905e-05, |
|
"loss": 0.7126, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_loss": 0.664243221282959, |
|
"eval_runtime": 5.3818, |
|
"eval_samples_per_second": 185.812, |
|
"eval_steps_per_second": 23.226, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 2.191346538685691e-05, |
|
"loss": 0.7075, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"eval_loss": 0.6190058588981628, |
|
"eval_runtime": 5.3768, |
|
"eval_samples_per_second": 185.985, |
|
"eval_steps_per_second": 23.248, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 2.1737924545524763e-05, |
|
"loss": 0.7047, |
|
"step": 805000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_loss": 0.645745038986206, |
|
"eval_runtime": 5.3814, |
|
"eval_samples_per_second": 185.824, |
|
"eval_steps_per_second": 23.228, |
|
"step": 805000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.1562383704192617e-05, |
|
"loss": 0.7002, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 0.6156101226806641, |
|
"eval_runtime": 5.3698, |
|
"eval_samples_per_second": 186.226, |
|
"eval_steps_per_second": 23.278, |
|
"step": 810000 |
|
} |
|
], |
|
"max_steps": 1424170, |
|
"num_train_epochs": 10, |
|
"total_flos": 3.3150958919481446e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|