|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 195, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 0.3267529010772705, |
|
"learning_rate": 3.984759689788058e-05, |
|
"loss": 3.9964, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 0.26488664746284485, |
|
"learning_rate": 3.923287969397442e-05, |
|
"loss": 3.6742, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.46153846153846156, |
|
"grad_norm": 0.3106290102005005, |
|
"learning_rate": 3.816256236058198e-05, |
|
"loss": 3.333, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 0.2936558425426483, |
|
"learning_rate": 3.6664941776944834e-05, |
|
"loss": 3.1105, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 0.29774364829063416, |
|
"learning_rate": 3.477961179759822e-05, |
|
"loss": 2.8481, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 0.2768242657184601, |
|
"learning_rate": 3.255641647634906e-05, |
|
"loss": 2.5763, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0769230769230769, |
|
"grad_norm": 0.20273493230342865, |
|
"learning_rate": 3.005413229711771e-05, |
|
"loss": 2.3194, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 0.17480742931365967, |
|
"learning_rate": 2.7338914250614635e-05, |
|
"loss": 2.161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.3846153846153846, |
|
"grad_norm": 0.16872847080230713, |
|
"learning_rate": 2.4482546839229335e-05, |
|
"loss": 2.1031, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.15632902085781097, |
|
"learning_rate": 2.1560546249785927e-05, |
|
"loss": 2.0517, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.6923076923076923, |
|
"grad_norm": 0.15166400372982025, |
|
"learning_rate": 1.8650163868619736e-05, |
|
"loss": 2.0657, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"grad_norm": 0.15363118052482605, |
|
"learning_rate": 1.582834392172553e-05, |
|
"loss": 2.0917, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.1388818472623825, |
|
"learning_rate": 1.3169689235561926e-05, |
|
"loss": 1.9709, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.1538461538461537, |
|
"grad_norm": 0.13172218203544617, |
|
"learning_rate": 1.074448889940397e-05, |
|
"loss": 2.0095, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 0.13791440427303314, |
|
"learning_rate": 8.616859973592406e-06, |
|
"loss": 1.9759, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.4615384615384617, |
|
"grad_norm": 0.1420830637216568, |
|
"learning_rate": 6.843052372901353e-06, |
|
"loss": 2.0029, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.6153846153846154, |
|
"grad_norm": 0.13697461783885956, |
|
"learning_rate": 5.469961740247712e-06, |
|
"loss": 2.0174, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"grad_norm": 0.13703729212284088, |
|
"learning_rate": 4.533889627150585e-06, |
|
"loss": 2.0217, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.9230769230769234, |
|
"grad_norm": 0.1383812576532364, |
|
"learning_rate": 4.059583759092255e-06, |
|
"loss": 1.9305, |
|
"step": 190 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 195, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8825076824813568.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|