TianyiQ's picture
Upload ./trainer_state.json with huggingface_hub
3354e2c verified
raw
history blame
6.24 kB
{
"best_metric": 1.9378653764724731,
"best_model_checkpoint": "./output/training_results/C020_Meta-Llama-3-8B_pretrain_20240726_033210/checkpoint-32778",
"epoch": 4.0,
"eval_steps": 3642,
"global_step": 36412,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00010985389432055367,
"grad_norm": 0.0,
"learning_rate": 0.0,
"loss": 2.0915,
"step": 1
},
{
"epoch": 0.20004394155772823,
"grad_norm": 1.880800447163735,
"learning_rate": 1.9904796777737092e-06,
"loss": 2.0663,
"step": 1821
},
{
"epoch": 0.40008788311545646,
"grad_norm": 1.91582729652135,
"learning_rate": 2.239936089625888e-06,
"loss": 2.007,
"step": 3642
},
{
"epoch": 0.40008788311545646,
"eval_loss": 1.9830611944198608,
"eval_runtime": 322.5516,
"eval_samples_per_second": 200.684,
"eval_steps_per_second": 1.569,
"step": 3642
},
{
"epoch": 0.6001318246731847,
"grad_norm": 1.8799884350488187,
"learning_rate": 1.2178786981582618e-06,
"loss": 1.9789,
"step": 5463
},
{
"epoch": 0.8001757662309129,
"grad_norm": 2.167713398694089,
"learning_rate": 6.502057578924368e-07,
"loss": 1.9635,
"step": 7284
},
{
"epoch": 0.8001757662309129,
"eval_loss": 1.9486902952194214,
"eval_runtime": 315.8318,
"eval_samples_per_second": 204.954,
"eval_steps_per_second": 1.602,
"step": 7284
},
{
"epoch": 1.000219707788641,
"grad_norm": 1.9185025050401976,
"learning_rate": 3.4539250265217177e-07,
"loss": 1.9518,
"step": 9105
},
{
"epoch": 1.2002636493463694,
"grad_norm": 1.8537277337784435,
"learning_rate": 1.8838570953925226e-07,
"loss": 1.8917,
"step": 10926
},
{
"epoch": 1.2002636493463694,
"eval_loss": 1.9423131942749023,
"eval_runtime": 315.8691,
"eval_samples_per_second": 204.93,
"eval_steps_per_second": 1.602,
"step": 10926
},
{
"epoch": 1.4003075909040976,
"grad_norm": 1.8472997131423734,
"learning_rate": 1.113186862393777e-07,
"loss": 1.8886,
"step": 12747
},
{
"epoch": 1.6003515324618258,
"grad_norm": 1.895662682121472,
"learning_rate": 7.547123756350748e-08,
"loss": 1.8884,
"step": 14568
},
{
"epoch": 1.6003515324618258,
"eval_loss": 1.9401631355285645,
"eval_runtime": 315.8338,
"eval_samples_per_second": 204.953,
"eval_steps_per_second": 1.602,
"step": 14568
},
{
"epoch": 1.800395474019554,
"grad_norm": 1.8596122602328744,
"learning_rate": 5.97978448731285e-08,
"loss": 1.891,
"step": 16389
},
{
"epoch": 2.000439415577282,
"grad_norm": 1.752487380981475,
"learning_rate": 5.343917594361068e-08,
"loss": 1.8872,
"step": 18210
},
{
"epoch": 2.000439415577282,
"eval_loss": 1.9390411376953125,
"eval_runtime": 316.1594,
"eval_samples_per_second": 204.742,
"eval_steps_per_second": 1.6,
"step": 18210
},
{
"epoch": 2.2004833571350106,
"grad_norm": 1.8941258580557327,
"learning_rate": 5.108265610728981e-08,
"loss": 1.8788,
"step": 20031
},
{
"epoch": 2.400527298692739,
"grad_norm": 1.8532314600998243,
"learning_rate": 5.0297076317689476e-08,
"loss": 1.8811,
"step": 21852
},
{
"epoch": 2.400527298692739,
"eval_loss": 1.9393320083618164,
"eval_runtime": 316.5674,
"eval_samples_per_second": 204.478,
"eval_steps_per_second": 1.598,
"step": 21852
},
{
"epoch": 2.600571240250467,
"grad_norm": 1.856636820022415,
"learning_rate": 5.006877574024932e-08,
"loss": 1.8807,
"step": 23673
},
{
"epoch": 2.8006151818081952,
"grad_norm": 1.8975919928137213,
"learning_rate": 5.001268969632882e-08,
"loss": 1.8782,
"step": 25494
},
{
"epoch": 2.8006151818081952,
"eval_loss": 1.938640832901001,
"eval_runtime": 316.1355,
"eval_samples_per_second": 204.757,
"eval_steps_per_second": 1.601,
"step": 25494
},
{
"epoch": 3.0006591233659234,
"grad_norm": 1.8638851197626498,
"learning_rate": 5.000172130703981e-08,
"loss": 1.8764,
"step": 27315
},
{
"epoch": 3.2007030649236516,
"grad_norm": 1.9182088121869934,
"learning_rate": 5.000014937976813e-08,
"loss": 1.8742,
"step": 29136
},
{
"epoch": 3.2007030649236516,
"eval_loss": 1.9384987354278564,
"eval_runtime": 316.0484,
"eval_samples_per_second": 204.814,
"eval_steps_per_second": 1.601,
"step": 29136
},
{
"epoch": 3.40074700648138,
"grad_norm": 1.9601402086429087,
"learning_rate": 5.000000644319432e-08,
"loss": 1.8775,
"step": 30957
},
{
"epoch": 3.600790948039108,
"grad_norm": 1.8139760695838174,
"learning_rate": 5.000000007721787e-08,
"loss": 1.8756,
"step": 32778
},
{
"epoch": 3.600790948039108,
"eval_loss": 1.9378653764724731,
"eval_runtime": 316.2121,
"eval_samples_per_second": 204.708,
"eval_steps_per_second": 1.6,
"step": 32778
},
{
"epoch": 3.8008348895968362,
"grad_norm": 1.9502862375669288,
"learning_rate": 5.000000000004247e-08,
"loss": 1.871,
"step": 34599
},
{
"epoch": 4.0,
"step": 36412,
"total_flos": 3807150279229440.0,
"train_loss": 1.9091134535458327,
"train_runtime": 51834.1311,
"train_samples_per_second": 44.957,
"train_steps_per_second": 0.702
}
],
"logging_steps": 1821,
"max_steps": 36412,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 3642,
"total_flos": 3807150279229440.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}