FinQwen2-0.5B / trainer_state.json
DavidNguyen's picture
Upload 13 files
edbad27 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.7748934521503293,
"eval_steps": 500,
"global_step": 6000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012914890869172156,
"grad_norm": 0.9891569018363953,
"learning_rate": 1.974170218261656e-05,
"loss": 1.6595,
"step": 100
},
{
"epoch": 0.025829781738344312,
"grad_norm": 0.24901358783245087,
"learning_rate": 1.9483404365233117e-05,
"loss": 1.4216,
"step": 200
},
{
"epoch": 0.03874467260751647,
"grad_norm": 1.108109951019287,
"learning_rate": 1.922510654784967e-05,
"loss": 1.383,
"step": 300
},
{
"epoch": 0.051659563476688625,
"grad_norm": 0.7391151785850525,
"learning_rate": 1.896680873046623e-05,
"loss": 1.408,
"step": 400
},
{
"epoch": 0.06457445434586077,
"grad_norm": 0.9414256811141968,
"learning_rate": 1.8708510913082787e-05,
"loss": 1.3439,
"step": 500
},
{
"epoch": 0.07748934521503294,
"grad_norm": 0.7522476315498352,
"learning_rate": 1.845021309569934e-05,
"loss": 1.272,
"step": 600
},
{
"epoch": 0.09040423608420509,
"grad_norm": 0.32411837577819824,
"learning_rate": 1.81919152783159e-05,
"loss": 1.16,
"step": 700
},
{
"epoch": 0.10331912695337725,
"grad_norm": 0.8460651636123657,
"learning_rate": 1.7933617460932457e-05,
"loss": 1.1753,
"step": 800
},
{
"epoch": 0.1162340178225494,
"grad_norm": 1.0593210458755493,
"learning_rate": 1.7675319643549015e-05,
"loss": 1.2417,
"step": 900
},
{
"epoch": 0.12914890869172155,
"grad_norm": 1.2058868408203125,
"learning_rate": 1.741702182616557e-05,
"loss": 1.381,
"step": 1000
},
{
"epoch": 0.1420637995608937,
"grad_norm": 0.36752256751060486,
"learning_rate": 1.7158724008782127e-05,
"loss": 1.2031,
"step": 1100
},
{
"epoch": 0.15497869043006587,
"grad_norm": 0.3617095351219177,
"learning_rate": 1.6900426191398685e-05,
"loss": 1.2186,
"step": 1200
},
{
"epoch": 0.167893581299238,
"grad_norm": 1.2740339040756226,
"learning_rate": 1.664212837401524e-05,
"loss": 1.2354,
"step": 1300
},
{
"epoch": 0.18080847216841017,
"grad_norm": 1.0895512104034424,
"learning_rate": 1.6383830556631797e-05,
"loss": 1.1308,
"step": 1400
},
{
"epoch": 0.19372336303758234,
"grad_norm": 0.28641125559806824,
"learning_rate": 1.6125532739248355e-05,
"loss": 1.1459,
"step": 1500
},
{
"epoch": 0.2066382539067545,
"grad_norm": 0.7633489966392517,
"learning_rate": 1.5867234921864912e-05,
"loss": 1.083,
"step": 1600
},
{
"epoch": 0.21955314477592663,
"grad_norm": 0.9854117035865784,
"learning_rate": 1.5608937104481467e-05,
"loss": 1.1526,
"step": 1700
},
{
"epoch": 0.2324680356450988,
"grad_norm": 1.0833749771118164,
"learning_rate": 1.5350639287098025e-05,
"loss": 1.0866,
"step": 1800
},
{
"epoch": 0.24538292651427096,
"grad_norm": 1.2186298370361328,
"learning_rate": 1.5092341469714582e-05,
"loss": 1.0898,
"step": 1900
},
{
"epoch": 0.2582978173834431,
"grad_norm": 0.976441502571106,
"learning_rate": 1.4834043652331138e-05,
"loss": 1.2018,
"step": 2000
},
{
"epoch": 0.2712127082526153,
"grad_norm": 1.0837169885635376,
"learning_rate": 1.4575745834947696e-05,
"loss": 1.1264,
"step": 2100
},
{
"epoch": 0.2841275991217874,
"grad_norm": 0.8157379031181335,
"learning_rate": 1.4317448017564252e-05,
"loss": 1.2216,
"step": 2200
},
{
"epoch": 0.29704248999095956,
"grad_norm": 0.4470981955528259,
"learning_rate": 1.4059150200180808e-05,
"loss": 1.1543,
"step": 2300
},
{
"epoch": 0.30995738086013175,
"grad_norm": 0.9872229695320129,
"learning_rate": 1.3800852382797368e-05,
"loss": 1.1293,
"step": 2400
},
{
"epoch": 0.3228722717293039,
"grad_norm": 1.2851194143295288,
"learning_rate": 1.3542554565413924e-05,
"loss": 1.1619,
"step": 2500
},
{
"epoch": 0.335787162598476,
"grad_norm": 1.0529409646987915,
"learning_rate": 1.328425674803048e-05,
"loss": 1.0468,
"step": 2600
},
{
"epoch": 0.3487020534676482,
"grad_norm": 1.2335134744644165,
"learning_rate": 1.3025958930647038e-05,
"loss": 1.1292,
"step": 2700
},
{
"epoch": 0.36161694433682035,
"grad_norm": 1.165204405784607,
"learning_rate": 1.2767661113263594e-05,
"loss": 1.0324,
"step": 2800
},
{
"epoch": 0.37453183520599254,
"grad_norm": 0.4545508623123169,
"learning_rate": 1.250936329588015e-05,
"loss": 1.0776,
"step": 2900
},
{
"epoch": 0.38744672607516467,
"grad_norm": 1.1217703819274902,
"learning_rate": 1.225106547849671e-05,
"loss": 1.0673,
"step": 3000
},
{
"epoch": 0.4003616169443368,
"grad_norm": 1.204528570175171,
"learning_rate": 1.1992767661113265e-05,
"loss": 1.1664,
"step": 3100
},
{
"epoch": 0.413276507813509,
"grad_norm": 1.8890794515609741,
"learning_rate": 1.1734469843729821e-05,
"loss": 1.0947,
"step": 3200
},
{
"epoch": 0.42619139868268113,
"grad_norm": 1.4399609565734863,
"learning_rate": 1.147617202634638e-05,
"loss": 1.115,
"step": 3300
},
{
"epoch": 0.43910628955185327,
"grad_norm": 0.8998225331306458,
"learning_rate": 1.1217874208962935e-05,
"loss": 1.0569,
"step": 3400
},
{
"epoch": 0.45202118042102546,
"grad_norm": 1.2747713327407837,
"learning_rate": 1.0959576391579491e-05,
"loss": 1.0758,
"step": 3500
},
{
"epoch": 0.4649360712901976,
"grad_norm": 1.2398267984390259,
"learning_rate": 1.0701278574196047e-05,
"loss": 1.0722,
"step": 3600
},
{
"epoch": 0.47785096215936973,
"grad_norm": 0.7766038179397583,
"learning_rate": 1.0442980756812607e-05,
"loss": 1.1556,
"step": 3700
},
{
"epoch": 0.4907658530285419,
"grad_norm": 1.2244646549224854,
"learning_rate": 1.0184682939429163e-05,
"loss": 1.1066,
"step": 3800
},
{
"epoch": 0.5036807438977141,
"grad_norm": 1.1265727281570435,
"learning_rate": 9.926385122045719e-06,
"loss": 1.0832,
"step": 3900
},
{
"epoch": 0.5165956347668862,
"grad_norm": 1.4965732097625732,
"learning_rate": 9.668087304662275e-06,
"loss": 1.1945,
"step": 4000
},
{
"epoch": 0.5295105256360584,
"grad_norm": 1.170291543006897,
"learning_rate": 9.409789487278833e-06,
"loss": 1.079,
"step": 4100
},
{
"epoch": 0.5424254165052306,
"grad_norm": 0.47167250514030457,
"learning_rate": 9.15149166989539e-06,
"loss": 1.0811,
"step": 4200
},
{
"epoch": 0.5553403073744027,
"grad_norm": 1.5530108213424683,
"learning_rate": 8.893193852511947e-06,
"loss": 1.0456,
"step": 4300
},
{
"epoch": 0.5682551982435748,
"grad_norm": 1.426579475402832,
"learning_rate": 8.634896035128504e-06,
"loss": 1.0797,
"step": 4400
},
{
"epoch": 0.581170089112747,
"grad_norm": 1.656485676765442,
"learning_rate": 8.37659821774506e-06,
"loss": 1.1384,
"step": 4500
},
{
"epoch": 0.5940849799819191,
"grad_norm": 1.8213322162628174,
"learning_rate": 8.118300400361617e-06,
"loss": 1.0484,
"step": 4600
},
{
"epoch": 0.6069998708510913,
"grad_norm": 0.9146257042884827,
"learning_rate": 7.860002582978174e-06,
"loss": 1.0798,
"step": 4700
},
{
"epoch": 0.6199147617202635,
"grad_norm": 0.8420510292053223,
"learning_rate": 7.601704765594732e-06,
"loss": 1.1371,
"step": 4800
},
{
"epoch": 0.6328296525894356,
"grad_norm": 1.728549599647522,
"learning_rate": 7.343406948211288e-06,
"loss": 1.1104,
"step": 4900
},
{
"epoch": 0.6457445434586078,
"grad_norm": 0.6792052388191223,
"learning_rate": 7.085109130827845e-06,
"loss": 1.1497,
"step": 5000
},
{
"epoch": 0.65865943432778,
"grad_norm": 1.167297124862671,
"learning_rate": 6.826811313444401e-06,
"loss": 1.1146,
"step": 5100
},
{
"epoch": 0.671574325196952,
"grad_norm": 0.5329355001449585,
"learning_rate": 6.568513496060959e-06,
"loss": 1.0569,
"step": 5200
},
{
"epoch": 0.6844892160661242,
"grad_norm": 1.3186862468719482,
"learning_rate": 6.310215678677516e-06,
"loss": 1.023,
"step": 5300
},
{
"epoch": 0.6974041069352964,
"grad_norm": 1.4624109268188477,
"learning_rate": 6.051917861294072e-06,
"loss": 1.1174,
"step": 5400
},
{
"epoch": 0.7103189978044685,
"grad_norm": 1.1120996475219727,
"learning_rate": 5.79362004391063e-06,
"loss": 1.048,
"step": 5500
},
{
"epoch": 0.7232338886736407,
"grad_norm": 1.1070384979248047,
"learning_rate": 5.535322226527187e-06,
"loss": 1.1,
"step": 5600
},
{
"epoch": 0.7361487795428129,
"grad_norm": 0.46000921726226807,
"learning_rate": 5.277024409143743e-06,
"loss": 1.0562,
"step": 5700
},
{
"epoch": 0.7490636704119851,
"grad_norm": 1.4388511180877686,
"learning_rate": 5.0187265917603005e-06,
"loss": 1.0263,
"step": 5800
},
{
"epoch": 0.7619785612811572,
"grad_norm": 1.0894064903259277,
"learning_rate": 4.7604287743768566e-06,
"loss": 1.0688,
"step": 5900
},
{
"epoch": 0.7748934521503293,
"grad_norm": 0.36538398265838623,
"learning_rate": 4.5021309569934135e-06,
"loss": 1.0626,
"step": 6000
}
],
"logging_steps": 100,
"max_steps": 7743,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9.988164550656e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}