|
{ |
|
"best_metric": 0.20319828391075134, |
|
"best_model_checkpoint": "./results/checkpoint-3500", |
|
"epoch": 2.708978328173375, |
|
"eval_steps": 500, |
|
"global_step": 3500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019999868960045492, |
|
"loss": 0.7358, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019845929936213215, |
|
"loss": 0.7562, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.481257826089859, |
|
"eval_runtime": 49.5763, |
|
"eval_samples_per_second": 12.183, |
|
"eval_steps_per_second": 3.046, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019405971991583108, |
|
"loss": 0.7465, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.000186927756656608, |
|
"loss": 0.71, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00017727058924629164, |
|
"loss": 0.71, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 0.40068519115448, |
|
"eval_runtime": 49.5272, |
|
"eval_samples_per_second": 12.195, |
|
"eval_steps_per_second": 3.049, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00016536875315675275, |
|
"loss": 0.6628, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00015156799026670633, |
|
"loss": 0.6006, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 0.3466373383998871, |
|
"eval_runtime": 49.4923, |
|
"eval_samples_per_second": 12.204, |
|
"eval_steps_per_second": 3.051, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00013626920524778533, |
|
"loss": 0.5302, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00011991681950141926, |
|
"loss": 0.5161, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00010298586095833151, |
|
"loss": 0.4935, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.28296753764152527, |
|
"eval_runtime": 49.4347, |
|
"eval_samples_per_second": 12.218, |
|
"eval_steps_per_second": 3.055, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.596816477497136e-05, |
|
"loss": 0.5046, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 6.93580857891615e-05, |
|
"loss": 0.5042, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 0.2403416633605957, |
|
"eval_runtime": 49.3424, |
|
"eval_samples_per_second": 12.241, |
|
"eval_steps_per_second": 3.06, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.3638137780368736e-05, |
|
"loss": 0.4561, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.9264976706293624e-05, |
|
"loss": 0.3455, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.6656135095147604e-05, |
|
"loss": 0.356, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 0.2102939337491989, |
|
"eval_runtime": 49.4972, |
|
"eval_samples_per_second": 12.203, |
|
"eval_steps_per_second": 3.051, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.6177892952323237e-05, |
|
"loss": 0.3659, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.134637525034839e-06, |
|
"loss": 0.3393, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 0.20319828391075134, |
|
"eval_runtime": 49.6088, |
|
"eval_samples_per_second": 12.175, |
|
"eval_steps_per_second": 3.044, |
|
"step": 3500 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 3876, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 7247048796733440.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|