|
{ |
|
"best_metric": 0.05124881863594055, |
|
"best_model_checkpoint": "my_Pytorch_pii_model/checkpoint-20926", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 20926, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04778744146038421, |
|
"grad_norm": 1.7837579250335693, |
|
"learning_rate": 1.997195242749148e-05, |
|
"loss": 0.8081, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09557488292076842, |
|
"grad_norm": 2.2397186756134033, |
|
"learning_rate": 1.9887742837017725e-05, |
|
"loss": 0.2477, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.14336232438115262, |
|
"grad_norm": 1.895089030265808, |
|
"learning_rate": 1.9747845385097936e-05, |
|
"loss": 0.1609, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.19114976584153684, |
|
"grad_norm": 1.4316082000732422, |
|
"learning_rate": 1.9553047975508295e-05, |
|
"loss": 0.1269, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.23893720730192106, |
|
"grad_norm": 0.5299736857414246, |
|
"learning_rate": 1.9304447709107316e-05, |
|
"loss": 0.1052, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28672464876230525, |
|
"grad_norm": 1.0913931131362915, |
|
"learning_rate": 1.9003444704953408e-05, |
|
"loss": 0.0967, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.33451209022268946, |
|
"grad_norm": 1.8749414682388306, |
|
"learning_rate": 1.865248697176191e-05, |
|
"loss": 0.0889, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.3822995316830737, |
|
"grad_norm": 2.014411211013794, |
|
"learning_rate": 1.825214512447262e-05, |
|
"loss": 0.0843, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.4300869731434579, |
|
"grad_norm": 0.6492653489112854, |
|
"learning_rate": 1.780532711756845e-05, |
|
"loss": 0.0812, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.4778744146038421, |
|
"grad_norm": 1.5026785135269165, |
|
"learning_rate": 1.7314549434302465e-05, |
|
"loss": 0.0779, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.5256618560642263, |
|
"grad_norm": 0.5166222453117371, |
|
"learning_rate": 1.6782576139241983e-05, |
|
"loss": 0.0719, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.5734492975246105, |
|
"grad_norm": 0.9983482956886292, |
|
"learning_rate": 1.62124033110307e-05, |
|
"loss": 0.0746, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.6212367389849948, |
|
"grad_norm": 0.8525393605232239, |
|
"learning_rate": 1.5608485174626132e-05, |
|
"loss": 0.0713, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.6690241804453789, |
|
"grad_norm": 1.3193893432617188, |
|
"learning_rate": 1.4971803628070286e-05, |
|
"loss": 0.067, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.7168116219057632, |
|
"grad_norm": 1.7481768131256104, |
|
"learning_rate": 1.4307120836322233e-05, |
|
"loss": 0.0677, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.7645990633661474, |
|
"grad_norm": 0.7293727993965149, |
|
"learning_rate": 1.3618180299169794e-05, |
|
"loss": 0.0649, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.8123865048265316, |
|
"grad_norm": 0.4748859405517578, |
|
"learning_rate": 1.2908862136252995e-05, |
|
"loss": 0.0675, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.8601739462869158, |
|
"grad_norm": 0.9915094971656799, |
|
"learning_rate": 1.218462628244987e-05, |
|
"loss": 0.0648, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.9079613877473001, |
|
"grad_norm": 2.1113200187683105, |
|
"learning_rate": 1.1446650257897558e-05, |
|
"loss": 0.0638, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.9557488292076842, |
|
"grad_norm": 0.7165895700454712, |
|
"learning_rate": 1.0700526685370893e-05, |
|
"loss": 0.0615, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9751704843504109, |
|
"eval_f1": 0.9315204370483973, |
|
"eval_loss": 0.05543896183371544, |
|
"eval_precision": 0.9214454994188416, |
|
"eval_recall": 0.9418181257135804, |
|
"eval_runtime": 190.6513, |
|
"eval_samples_per_second": 109.766, |
|
"eval_steps_per_second": 6.861, |
|
"step": 10463 |
|
}, |
|
{ |
|
"epoch": 1.0035362706680684, |
|
"grad_norm": 0.7908827066421509, |
|
"learning_rate": 9.950457739907535e-06, |
|
"loss": 0.0591, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.0513237121284527, |
|
"grad_norm": 0.8281264901161194, |
|
"learning_rate": 9.200667816922936e-06, |
|
"loss": 0.0559, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.099111153588837, |
|
"grad_norm": 0.6375882625579834, |
|
"learning_rate": 8.456863027173128e-06, |
|
"loss": 0.0528, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.146898595049221, |
|
"grad_norm": 0.8089446425437927, |
|
"learning_rate": 7.720252707524596e-06, |
|
"loss": 0.0561, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.1946860365096053, |
|
"grad_norm": 0.5335150957107544, |
|
"learning_rate": 6.9964819462513174e-06, |
|
"loss": 0.0563, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.2424734779699895, |
|
"grad_norm": 0.7552749514579773, |
|
"learning_rate": 6.289627027145937e-06, |
|
"loss": 0.053, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.2902609194303736, |
|
"grad_norm": 1.121811866760254, |
|
"learning_rate": 5.603668963817577e-06, |
|
"loss": 0.0537, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.3380483608907578, |
|
"grad_norm": 1.5103745460510254, |
|
"learning_rate": 4.942471078585398e-06, |
|
"loss": 0.0529, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.3858358023511421, |
|
"grad_norm": 1.125802755355835, |
|
"learning_rate": 4.309757244210676e-06, |
|
"loss": 0.0558, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.4336232438115264, |
|
"grad_norm": 2.0246198177337646, |
|
"learning_rate": 3.709090911010067e-06, |
|
"loss": 0.051, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.4814106852719107, |
|
"grad_norm": 0.31454652547836304, |
|
"learning_rate": 3.1449479986513345e-06, |
|
"loss": 0.0503, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.5291981267322947, |
|
"grad_norm": 1.1729323863983154, |
|
"learning_rate": 2.618245735460083e-06, |
|
"loss": 0.0496, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.576985568192679, |
|
"grad_norm": 1.942895531654358, |
|
"learning_rate": 2.1331175823046777e-06, |
|
"loss": 0.0508, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.624773009653063, |
|
"grad_norm": 0.7527912259101868, |
|
"learning_rate": 1.692295785545267e-06, |
|
"loss": 0.0474, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.6725604511134473, |
|
"grad_norm": 0.9754999279975891, |
|
"learning_rate": 1.299002907484831e-06, |
|
"loss": 0.0505, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.7203478925738316, |
|
"grad_norm": 1.4431662559509277, |
|
"learning_rate": 9.538783949020436e-07, |
|
"loss": 0.0507, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.7681353340342159, |
|
"grad_norm": 0.9095632433891296, |
|
"learning_rate": 6.602380824095455e-07, |
|
"loss": 0.0484, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.8159227754946001, |
|
"grad_norm": 0.47108370065689087, |
|
"learning_rate": 4.185593937350141e-07, |
|
"loss": 0.0503, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.8637102169549842, |
|
"grad_norm": 1.9777565002441406, |
|
"learning_rate": 2.3084346943755388e-07, |
|
"loss": 0.0492, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.9114976584153685, |
|
"grad_norm": 0.7806472182273865, |
|
"learning_rate": 9.814752738334654e-08, |
|
"loss": 0.0493, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.9592850998757525, |
|
"grad_norm": 0.42741918563842773, |
|
"learning_rate": 2.1218912376697043e-08, |
|
"loss": 0.051, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9768446160123027, |
|
"eval_f1": 0.9388393674827523, |
|
"eval_loss": 0.05124881863594055, |
|
"eval_precision": 0.9288864055090761, |
|
"eval_recall": 0.9490079303854105, |
|
"eval_runtime": 115.4429, |
|
"eval_samples_per_second": 181.276, |
|
"eval_steps_per_second": 11.33, |
|
"step": 20926 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 20926, |
|
"total_flos": 1.64925784065024e+16, |
|
"train_loss": 0.0881471913733378, |
|
"train_runtime": 3285.9718, |
|
"train_samples_per_second": 101.893, |
|
"train_steps_per_second": 6.368 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9768446160123027, |
|
"eval_f1": 0.9388393674827523, |
|
"eval_loss": 0.05124881863594055, |
|
"eval_precision": 0.9288864055090761, |
|
"eval_recall": 0.9490079303854105, |
|
"eval_runtime": 122.2255, |
|
"eval_samples_per_second": 171.216, |
|
"eval_steps_per_second": 10.702, |
|
"step": 20926 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 20926, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 1.64925784065024e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|