{ "best_metric": 0.05124881863594055, "best_model_checkpoint": "my_Pytorch_pii_model/checkpoint-20926", "epoch": 2.0, "eval_steps": 500, "global_step": 20926, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04778744146038421, "grad_norm": 1.7837579250335693, "learning_rate": 1.997195242749148e-05, "loss": 0.8081, "step": 500 }, { "epoch": 0.09557488292076842, "grad_norm": 2.2397186756134033, "learning_rate": 1.9887742837017725e-05, "loss": 0.2477, "step": 1000 }, { "epoch": 0.14336232438115262, "grad_norm": 1.895089030265808, "learning_rate": 1.9747845385097936e-05, "loss": 0.1609, "step": 1500 }, { "epoch": 0.19114976584153684, "grad_norm": 1.4316082000732422, "learning_rate": 1.9553047975508295e-05, "loss": 0.1269, "step": 2000 }, { "epoch": 0.23893720730192106, "grad_norm": 0.5299736857414246, "learning_rate": 1.9304447709107316e-05, "loss": 0.1052, "step": 2500 }, { "epoch": 0.28672464876230525, "grad_norm": 1.0913931131362915, "learning_rate": 1.9003444704953408e-05, "loss": 0.0967, "step": 3000 }, { "epoch": 0.33451209022268946, "grad_norm": 1.8749414682388306, "learning_rate": 1.865248697176191e-05, "loss": 0.0889, "step": 3500 }, { "epoch": 0.3822995316830737, "grad_norm": 2.014411211013794, "learning_rate": 1.825214512447262e-05, "loss": 0.0843, "step": 4000 }, { "epoch": 0.4300869731434579, "grad_norm": 0.6492653489112854, "learning_rate": 1.780532711756845e-05, "loss": 0.0812, "step": 4500 }, { "epoch": 0.4778744146038421, "grad_norm": 1.5026785135269165, "learning_rate": 1.7314549434302465e-05, "loss": 0.0779, "step": 5000 }, { "epoch": 0.5256618560642263, "grad_norm": 0.5166222453117371, "learning_rate": 1.6782576139241983e-05, "loss": 0.0719, "step": 5500 }, { "epoch": 0.5734492975246105, "grad_norm": 0.9983482956886292, "learning_rate": 1.62124033110307e-05, "loss": 0.0746, "step": 6000 }, { "epoch": 0.6212367389849948, "grad_norm": 0.8525393605232239, "learning_rate": 1.5608485174626132e-05, "loss": 0.0713, "step": 6500 }, { "epoch": 0.6690241804453789, "grad_norm": 1.3193893432617188, "learning_rate": 1.4971803628070286e-05, "loss": 0.067, "step": 7000 }, { "epoch": 0.7168116219057632, "grad_norm": 1.7481768131256104, "learning_rate": 1.4307120836322233e-05, "loss": 0.0677, "step": 7500 }, { "epoch": 0.7645990633661474, "grad_norm": 0.7293727993965149, "learning_rate": 1.3618180299169794e-05, "loss": 0.0649, "step": 8000 }, { "epoch": 0.8123865048265316, "grad_norm": 0.4748859405517578, "learning_rate": 1.2908862136252995e-05, "loss": 0.0675, "step": 8500 }, { "epoch": 0.8601739462869158, "grad_norm": 0.9915094971656799, "learning_rate": 1.218462628244987e-05, "loss": 0.0648, "step": 9000 }, { "epoch": 0.9079613877473001, "grad_norm": 2.1113200187683105, "learning_rate": 1.1446650257897558e-05, "loss": 0.0638, "step": 9500 }, { "epoch": 0.9557488292076842, "grad_norm": 0.7165895700454712, "learning_rate": 1.0700526685370893e-05, "loss": 0.0615, "step": 10000 }, { "epoch": 1.0, "eval_accuracy": 0.9751704843504109, "eval_f1": 0.9315204370483973, "eval_loss": 0.05543896183371544, "eval_precision": 0.9214454994188416, "eval_recall": 0.9418181257135804, "eval_runtime": 190.6513, "eval_samples_per_second": 109.766, "eval_steps_per_second": 6.861, "step": 10463 }, { "epoch": 1.0035362706680684, "grad_norm": 0.7908827066421509, "learning_rate": 9.950457739907535e-06, "loss": 0.0591, "step": 10500 }, { "epoch": 1.0513237121284527, "grad_norm": 0.8281264901161194, "learning_rate": 9.200667816922936e-06, "loss": 0.0559, "step": 11000 }, { "epoch": 1.099111153588837, "grad_norm": 0.6375882625579834, "learning_rate": 8.456863027173128e-06, "loss": 0.0528, "step": 11500 }, { "epoch": 1.146898595049221, "grad_norm": 0.8089446425437927, "learning_rate": 7.720252707524596e-06, "loss": 0.0561, "step": 12000 }, { "epoch": 1.1946860365096053, "grad_norm": 0.5335150957107544, "learning_rate": 6.9964819462513174e-06, "loss": 0.0563, "step": 12500 }, { "epoch": 1.2424734779699895, "grad_norm": 0.7552749514579773, "learning_rate": 6.289627027145937e-06, "loss": 0.053, "step": 13000 }, { "epoch": 1.2902609194303736, "grad_norm": 1.121811866760254, "learning_rate": 5.603668963817577e-06, "loss": 0.0537, "step": 13500 }, { "epoch": 1.3380483608907578, "grad_norm": 1.5103745460510254, "learning_rate": 4.942471078585398e-06, "loss": 0.0529, "step": 14000 }, { "epoch": 1.3858358023511421, "grad_norm": 1.125802755355835, "learning_rate": 4.309757244210676e-06, "loss": 0.0558, "step": 14500 }, { "epoch": 1.4336232438115264, "grad_norm": 2.0246198177337646, "learning_rate": 3.709090911010067e-06, "loss": 0.051, "step": 15000 }, { "epoch": 1.4814106852719107, "grad_norm": 0.31454652547836304, "learning_rate": 3.1449479986513345e-06, "loss": 0.0503, "step": 15500 }, { "epoch": 1.5291981267322947, "grad_norm": 1.1729323863983154, "learning_rate": 2.618245735460083e-06, "loss": 0.0496, "step": 16000 }, { "epoch": 1.576985568192679, "grad_norm": 1.942895531654358, "learning_rate": 2.1331175823046777e-06, "loss": 0.0508, "step": 16500 }, { "epoch": 1.624773009653063, "grad_norm": 0.7527912259101868, "learning_rate": 1.692295785545267e-06, "loss": 0.0474, "step": 17000 }, { "epoch": 1.6725604511134473, "grad_norm": 0.9754999279975891, "learning_rate": 1.299002907484831e-06, "loss": 0.0505, "step": 17500 }, { "epoch": 1.7203478925738316, "grad_norm": 1.4431662559509277, "learning_rate": 9.538783949020436e-07, "loss": 0.0507, "step": 18000 }, { "epoch": 1.7681353340342159, "grad_norm": 0.9095632433891296, "learning_rate": 6.602380824095455e-07, "loss": 0.0484, "step": 18500 }, { "epoch": 1.8159227754946001, "grad_norm": 0.47108370065689087, "learning_rate": 4.185593937350141e-07, "loss": 0.0503, "step": 19000 }, { "epoch": 1.8637102169549842, "grad_norm": 1.9777565002441406, "learning_rate": 2.3084346943755388e-07, "loss": 0.0492, "step": 19500 }, { "epoch": 1.9114976584153685, "grad_norm": 0.7806472182273865, "learning_rate": 9.814752738334654e-08, "loss": 0.0493, "step": 20000 }, { "epoch": 1.9592850998757525, "grad_norm": 0.42741918563842773, "learning_rate": 2.1218912376697043e-08, "loss": 0.051, "step": 20500 }, { "epoch": 2.0, "eval_accuracy": 0.9768446160123027, "eval_f1": 0.9388393674827523, "eval_loss": 0.05124881863594055, "eval_precision": 0.9288864055090761, "eval_recall": 0.9490079303854105, "eval_runtime": 115.4429, "eval_samples_per_second": 181.276, "eval_steps_per_second": 11.33, "step": 20926 }, { "epoch": 2.0, "step": 20926, "total_flos": 1.64925784065024e+16, "train_loss": 0.0881471913733378, "train_runtime": 3285.9718, "train_samples_per_second": 101.893, "train_steps_per_second": 6.368 }, { "epoch": 2.0, "eval_accuracy": 0.9768446160123027, "eval_f1": 0.9388393674827523, "eval_loss": 0.05124881863594055, "eval_precision": 0.9288864055090761, "eval_recall": 0.9490079303854105, "eval_runtime": 122.2255, "eval_samples_per_second": 171.216, "eval_steps_per_second": 10.702, "step": 20926 } ], "logging_steps": 500, "max_steps": 20926, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 1.64925784065024e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }