language-perceiver-title2genre / trainer_state.json
pszemraj's picture
End of training
756c99a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.0,
"eval_steps": 500,
"global_step": 496,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"learning_rate": 1.9596774193548388e-05,
"loss": 0.4886,
"step": 10
},
{
"epoch": 0.32,
"learning_rate": 1.9193548387096777e-05,
"loss": 0.3236,
"step": 20
},
{
"epoch": 0.48,
"learning_rate": 1.8790322580645163e-05,
"loss": 0.3137,
"step": 30
},
{
"epoch": 0.65,
"learning_rate": 1.838709677419355e-05,
"loss": 0.3073,
"step": 40
},
{
"epoch": 0.81,
"learning_rate": 1.7983870967741936e-05,
"loss": 0.3068,
"step": 50
},
{
"epoch": 0.97,
"learning_rate": 1.7580645161290325e-05,
"loss": 0.3059,
"step": 60
},
{
"epoch": 1.0,
"eval_f1": 0.32627646326276466,
"eval_loss": 0.2893124222755432,
"eval_runtime": 3.6709,
"eval_samples_per_second": 269.418,
"eval_steps_per_second": 8.445,
"step": 62
},
{
"epoch": 1.13,
"learning_rate": 1.717741935483871e-05,
"loss": 0.3038,
"step": 70
},
{
"epoch": 1.29,
"learning_rate": 1.6774193548387098e-05,
"loss": 0.2959,
"step": 80
},
{
"epoch": 1.45,
"learning_rate": 1.6370967741935487e-05,
"loss": 0.2953,
"step": 90
},
{
"epoch": 1.61,
"learning_rate": 1.596774193548387e-05,
"loss": 0.2908,
"step": 100
},
{
"epoch": 1.77,
"learning_rate": 1.556451612903226e-05,
"loss": 0.2902,
"step": 110
},
{
"epoch": 1.94,
"learning_rate": 1.5161290322580646e-05,
"loss": 0.2879,
"step": 120
},
{
"epoch": 2.0,
"eval_f1": 0.429018492176387,
"eval_loss": 0.2794504165649414,
"eval_runtime": 3.6769,
"eval_samples_per_second": 268.977,
"eval_steps_per_second": 8.431,
"step": 124
},
{
"epoch": 2.1,
"learning_rate": 1.4758064516129033e-05,
"loss": 0.2774,
"step": 130
},
{
"epoch": 2.26,
"learning_rate": 1.4354838709677421e-05,
"loss": 0.2762,
"step": 140
},
{
"epoch": 2.42,
"learning_rate": 1.3951612903225809e-05,
"loss": 0.2811,
"step": 150
},
{
"epoch": 2.58,
"learning_rate": 1.3548387096774194e-05,
"loss": 0.2734,
"step": 160
},
{
"epoch": 2.74,
"learning_rate": 1.3145161290322581e-05,
"loss": 0.279,
"step": 170
},
{
"epoch": 2.9,
"learning_rate": 1.274193548387097e-05,
"loss": 0.2729,
"step": 180
},
{
"epoch": 3.0,
"eval_f1": 0.4356266057664859,
"eval_loss": 0.27300506830215454,
"eval_runtime": 3.6734,
"eval_samples_per_second": 269.233,
"eval_steps_per_second": 8.439,
"step": 186
},
{
"epoch": 3.06,
"learning_rate": 1.2338709677419355e-05,
"loss": 0.2722,
"step": 190
},
{
"epoch": 3.23,
"learning_rate": 1.1935483870967743e-05,
"loss": 0.2605,
"step": 200
},
{
"epoch": 3.39,
"learning_rate": 1.1532258064516131e-05,
"loss": 0.2564,
"step": 210
},
{
"epoch": 3.55,
"learning_rate": 1.1129032258064516e-05,
"loss": 0.264,
"step": 220
},
{
"epoch": 3.71,
"learning_rate": 1.0725806451612903e-05,
"loss": 0.2627,
"step": 230
},
{
"epoch": 3.87,
"learning_rate": 1.0322580645161291e-05,
"loss": 0.2606,
"step": 240
},
{
"epoch": 4.0,
"eval_f1": 0.458980044345898,
"eval_loss": 0.272247850894928,
"eval_runtime": 3.6724,
"eval_samples_per_second": 269.307,
"eval_steps_per_second": 8.441,
"step": 248
},
{
"epoch": 4.03,
"learning_rate": 9.919354838709679e-06,
"loss": 0.2523,
"step": 250
},
{
"epoch": 4.19,
"learning_rate": 9.516129032258065e-06,
"loss": 0.2437,
"step": 260
},
{
"epoch": 4.35,
"learning_rate": 9.112903225806451e-06,
"loss": 0.2451,
"step": 270
},
{
"epoch": 4.52,
"learning_rate": 8.70967741935484e-06,
"loss": 0.2514,
"step": 280
},
{
"epoch": 4.68,
"learning_rate": 8.306451612903227e-06,
"loss": 0.2439,
"step": 290
},
{
"epoch": 4.84,
"learning_rate": 7.903225806451613e-06,
"loss": 0.2378,
"step": 300
},
{
"epoch": 5.0,
"learning_rate": 7.500000000000001e-06,
"loss": 0.2433,
"step": 310
},
{
"epoch": 5.0,
"eval_f1": 0.4775401069518716,
"eval_loss": 0.2747083902359009,
"eval_runtime": 3.674,
"eval_samples_per_second": 269.186,
"eval_steps_per_second": 8.438,
"step": 310
},
{
"epoch": 5.16,
"learning_rate": 7.096774193548388e-06,
"loss": 0.2302,
"step": 320
},
{
"epoch": 5.32,
"learning_rate": 6.693548387096774e-06,
"loss": 0.2292,
"step": 330
},
{
"epoch": 5.48,
"learning_rate": 6.290322580645162e-06,
"loss": 0.223,
"step": 340
},
{
"epoch": 5.65,
"learning_rate": 5.887096774193549e-06,
"loss": 0.2281,
"step": 350
},
{
"epoch": 5.81,
"learning_rate": 5.483870967741935e-06,
"loss": 0.2301,
"step": 360
},
{
"epoch": 5.97,
"learning_rate": 5.080645161290323e-06,
"loss": 0.227,
"step": 370
},
{
"epoch": 6.0,
"eval_f1": 0.49764027267960154,
"eval_loss": 0.2776886522769928,
"eval_runtime": 3.6732,
"eval_samples_per_second": 269.25,
"eval_steps_per_second": 8.44,
"step": 372
},
{
"epoch": 6.13,
"learning_rate": 4.67741935483871e-06,
"loss": 0.2188,
"step": 380
},
{
"epoch": 6.29,
"learning_rate": 4.274193548387097e-06,
"loss": 0.2195,
"step": 390
},
{
"epoch": 6.45,
"learning_rate": 3.870967741935484e-06,
"loss": 0.2123,
"step": 400
},
{
"epoch": 6.61,
"learning_rate": 3.4677419354838714e-06,
"loss": 0.2121,
"step": 410
},
{
"epoch": 6.77,
"learning_rate": 3.0645161290322584e-06,
"loss": 0.2136,
"step": 420
},
{
"epoch": 6.94,
"learning_rate": 2.6612903225806454e-06,
"loss": 0.207,
"step": 430
},
{
"epoch": 7.0,
"eval_f1": 0.5087719298245615,
"eval_loss": 0.28140273690223694,
"eval_runtime": 3.6742,
"eval_samples_per_second": 269.173,
"eval_steps_per_second": 8.437,
"step": 434
},
{
"epoch": 7.1,
"learning_rate": 2.2580645161290324e-06,
"loss": 0.2085,
"step": 440
},
{
"epoch": 7.26,
"learning_rate": 1.8548387096774196e-06,
"loss": 0.2071,
"step": 450
},
{
"epoch": 7.42,
"learning_rate": 1.4516129032258066e-06,
"loss": 0.2027,
"step": 460
},
{
"epoch": 7.58,
"learning_rate": 1.0483870967741936e-06,
"loss": 0.2017,
"step": 470
},
{
"epoch": 7.74,
"learning_rate": 6.451612903225807e-07,
"loss": 0.2017,
"step": 480
},
{
"epoch": 7.9,
"learning_rate": 2.4193548387096775e-07,
"loss": 0.1969,
"step": 490
},
{
"epoch": 8.0,
"eval_f1": 0.5107802874743327,
"eval_loss": 0.283179372549057,
"eval_runtime": 3.6742,
"eval_samples_per_second": 269.174,
"eval_steps_per_second": 8.437,
"step": 496
},
{
"epoch": 8.0,
"step": 496,
"total_flos": 0.0,
"train_loss": 0.2570930659290283,
"train_runtime": 822.741,
"train_samples_per_second": 76.953,
"train_steps_per_second": 0.603
}
],
"logging_steps": 10,
"max_steps": 496,
"num_train_epochs": 8,
"save_steps": 500,
"total_flos": 0.0,
"trial_name": null,
"trial_params": null
}