{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21150219433526624, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9823745064861824e-05, "loss": 1.064, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.964749012972363e-05, "loss": 0.9951, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.9471235194585454e-05, "loss": 1.0051, "step": 150 }, { "epoch": 0.01, "learning_rate": 4.929498025944726e-05, "loss": 1.0086, "step": 200 }, { "epoch": 0.02, "learning_rate": 4.9118725324309084e-05, "loss": 1.1256, "step": 250 }, { "epoch": 0.02, "learning_rate": 4.89424703891709e-05, "loss": 1.0419, "step": 300 }, { "epoch": 0.02, "learning_rate": 4.8766215454032714e-05, "loss": 1.0368, "step": 350 }, { "epoch": 0.03, "learning_rate": 4.8589960518894536e-05, "loss": 0.9884, "step": 400 }, { "epoch": 0.03, "learning_rate": 4.8413705583756344e-05, "loss": 1.01, "step": 450 }, { "epoch": 0.04, "learning_rate": 4.8237450648618166e-05, "loss": 1.0734, "step": 500 }, { "epoch": 0.04, "learning_rate": 4.806119571347998e-05, "loss": 1.0388, "step": 550 }, { "epoch": 0.04, "learning_rate": 4.7884940778341796e-05, "loss": 1.0576, "step": 600 }, { "epoch": 0.05, "learning_rate": 4.770868584320361e-05, "loss": 1.0449, "step": 650 }, { "epoch": 0.05, "learning_rate": 4.7532430908065426e-05, "loss": 1.0809, "step": 700 }, { "epoch": 0.05, "learning_rate": 4.735617597292725e-05, "loss": 1.0888, "step": 750 }, { "epoch": 0.06, "learning_rate": 4.717992103778906e-05, "loss": 1.031, "step": 800 }, { "epoch": 0.06, "learning_rate": 4.700366610265088e-05, "loss": 1.0546, "step": 850 }, { "epoch": 0.06, "learning_rate": 4.682741116751269e-05, "loss": 1.093, "step": 900 }, { "epoch": 0.07, "learning_rate": 4.665468133107727e-05, "loss": 1.0729, "step": 950 }, { "epoch": 0.07, "learning_rate": 4.647842639593909e-05, "loss": 1.0771, "step": 1000 }, { "epoch": 0.07, "learning_rate": 4.6302171460800905e-05, "loss": 1.1318, "step": 1050 }, { "epoch": 0.08, "learning_rate": 4.612591652566272e-05, "loss": 1.0114, "step": 1100 }, { "epoch": 0.08, "learning_rate": 4.5949661590524535e-05, "loss": 1.0272, "step": 1150 }, { "epoch": 0.08, "learning_rate": 4.577340665538635e-05, "loss": 1.0503, "step": 1200 }, { "epoch": 0.09, "learning_rate": 4.559715172024817e-05, "loss": 0.9816, "step": 1250 }, { "epoch": 0.09, "learning_rate": 4.542794698251551e-05, "loss": 1.001, "step": 1300 }, { "epoch": 0.1, "learning_rate": 4.525169204737733e-05, "loss": 1.1184, "step": 1350 }, { "epoch": 0.1, "learning_rate": 4.507543711223914e-05, "loss": 1.0958, "step": 1400 }, { "epoch": 0.1, "learning_rate": 4.4899182177100965e-05, "loss": 1.0912, "step": 1450 }, { "epoch": 0.11, "learning_rate": 4.472292724196277e-05, "loss": 1.0226, "step": 1500 }, { "epoch": 0.11, "learning_rate": 4.4546672306824595e-05, "loss": 1.0038, "step": 1550 }, { "epoch": 0.11, "learning_rate": 4.437041737168641e-05, "loss": 0.9825, "step": 1600 }, { "epoch": 0.12, "learning_rate": 4.4194162436548225e-05, "loss": 1.0476, "step": 1650 }, { "epoch": 0.12, "learning_rate": 4.401790750141005e-05, "loss": 1.0642, "step": 1700 }, { "epoch": 0.12, "learning_rate": 4.3841652566271855e-05, "loss": 1.0254, "step": 1750 }, { "epoch": 0.13, "learning_rate": 4.366539763113368e-05, "loss": 1.0941, "step": 1800 }, { "epoch": 0.13, "learning_rate": 4.3489142695995485e-05, "loss": 0.9739, "step": 1850 }, { "epoch": 0.13, "learning_rate": 4.331288776085731e-05, "loss": 1.0468, "step": 1900 }, { "epoch": 0.14, "learning_rate": 4.313663282571912e-05, "loss": 1.0706, "step": 1950 }, { "epoch": 0.14, "learning_rate": 4.296037789058094e-05, "loss": 0.9796, "step": 2000 }, { "epoch": 0.14, "learning_rate": 4.278412295544276e-05, "loss": 1.0202, "step": 2050 }, { "epoch": 0.15, "learning_rate": 4.260786802030457e-05, "loss": 1.0263, "step": 2100 }, { "epoch": 0.15, "learning_rate": 4.243161308516639e-05, "loss": 0.9823, "step": 2150 }, { "epoch": 0.16, "learning_rate": 4.2255358150028204e-05, "loss": 1.0187, "step": 2200 }, { "epoch": 0.16, "learning_rate": 4.207910321489002e-05, "loss": 1.0219, "step": 2250 }, { "epoch": 0.16, "learning_rate": 4.1902848279751834e-05, "loss": 1.0641, "step": 2300 }, { "epoch": 0.17, "learning_rate": 4.172659334461365e-05, "loss": 0.9979, "step": 2350 }, { "epoch": 0.17, "learning_rate": 4.155033840947547e-05, "loss": 0.9762, "step": 2400 }, { "epoch": 0.17, "learning_rate": 4.1374083474337286e-05, "loss": 0.983, "step": 2450 }, { "epoch": 0.18, "learning_rate": 4.11978285391991e-05, "loss": 1.0849, "step": 2500 }, { "epoch": 0.18, "learning_rate": 4.1021573604060916e-05, "loss": 1.0281, "step": 2550 }, { "epoch": 0.18, "learning_rate": 4.084531866892273e-05, "loss": 1.0041, "step": 2600 }, { "epoch": 0.19, "learning_rate": 4.0669063733784546e-05, "loss": 1.0055, "step": 2650 }, { "epoch": 0.19, "learning_rate": 4.049280879864636e-05, "loss": 1.014, "step": 2700 }, { "epoch": 0.19, "learning_rate": 4.0316553863508176e-05, "loss": 1.0347, "step": 2750 }, { "epoch": 0.2, "learning_rate": 4.014029892837e-05, "loss": 0.9675, "step": 2800 }, { "epoch": 0.2, "learning_rate": 3.996404399323181e-05, "loss": 1.0086, "step": 2850 }, { "epoch": 0.2, "learning_rate": 3.978778905809363e-05, "loss": 0.9836, "step": 2900 }, { "epoch": 0.21, "learning_rate": 3.961153412295544e-05, "loss": 1.0441, "step": 2950 }, { "epoch": 0.21, "learning_rate": 3.943527918781726e-05, "loss": 1.0613, "step": 3000 } ], "logging_steps": 50, "max_steps": 14184, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1.0058830184448e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }