|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7748934521503293, |
|
"eval_steps": 500, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012914890869172156, |
|
"grad_norm": 0.9891569018363953, |
|
"learning_rate": 1.974170218261656e-05, |
|
"loss": 1.6595, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.025829781738344312, |
|
"grad_norm": 0.24901358783245087, |
|
"learning_rate": 1.9483404365233117e-05, |
|
"loss": 1.4216, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03874467260751647, |
|
"grad_norm": 1.108109951019287, |
|
"learning_rate": 1.922510654784967e-05, |
|
"loss": 1.383, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.051659563476688625, |
|
"grad_norm": 0.7391151785850525, |
|
"learning_rate": 1.896680873046623e-05, |
|
"loss": 1.408, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.06457445434586077, |
|
"grad_norm": 0.9414256811141968, |
|
"learning_rate": 1.8708510913082787e-05, |
|
"loss": 1.3439, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07748934521503294, |
|
"grad_norm": 0.7522476315498352, |
|
"learning_rate": 1.845021309569934e-05, |
|
"loss": 1.272, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.09040423608420509, |
|
"grad_norm": 0.32411837577819824, |
|
"learning_rate": 1.81919152783159e-05, |
|
"loss": 1.16, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.10331912695337725, |
|
"grad_norm": 0.8460651636123657, |
|
"learning_rate": 1.7933617460932457e-05, |
|
"loss": 1.1753, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1162340178225494, |
|
"grad_norm": 1.0593210458755493, |
|
"learning_rate": 1.7675319643549015e-05, |
|
"loss": 1.2417, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12914890869172155, |
|
"grad_norm": 1.2058868408203125, |
|
"learning_rate": 1.741702182616557e-05, |
|
"loss": 1.381, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.1420637995608937, |
|
"grad_norm": 0.36752256751060486, |
|
"learning_rate": 1.7158724008782127e-05, |
|
"loss": 1.2031, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.15497869043006587, |
|
"grad_norm": 0.3617095351219177, |
|
"learning_rate": 1.6900426191398685e-05, |
|
"loss": 1.2186, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.167893581299238, |
|
"grad_norm": 1.2740339040756226, |
|
"learning_rate": 1.664212837401524e-05, |
|
"loss": 1.2354, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.18080847216841017, |
|
"grad_norm": 1.0895512104034424, |
|
"learning_rate": 1.6383830556631797e-05, |
|
"loss": 1.1308, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.19372336303758234, |
|
"grad_norm": 0.28641125559806824, |
|
"learning_rate": 1.6125532739248355e-05, |
|
"loss": 1.1459, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2066382539067545, |
|
"grad_norm": 0.7633489966392517, |
|
"learning_rate": 1.5867234921864912e-05, |
|
"loss": 1.083, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.21955314477592663, |
|
"grad_norm": 0.9854117035865784, |
|
"learning_rate": 1.5608937104481467e-05, |
|
"loss": 1.1526, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.2324680356450988, |
|
"grad_norm": 1.0833749771118164, |
|
"learning_rate": 1.5350639287098025e-05, |
|
"loss": 1.0866, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.24538292651427096, |
|
"grad_norm": 1.2186298370361328, |
|
"learning_rate": 1.5092341469714582e-05, |
|
"loss": 1.0898, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.2582978173834431, |
|
"grad_norm": 0.976441502571106, |
|
"learning_rate": 1.4834043652331138e-05, |
|
"loss": 1.2018, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2712127082526153, |
|
"grad_norm": 1.0837169885635376, |
|
"learning_rate": 1.4575745834947696e-05, |
|
"loss": 1.1264, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2841275991217874, |
|
"grad_norm": 0.8157379031181335, |
|
"learning_rate": 1.4317448017564252e-05, |
|
"loss": 1.2216, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.29704248999095956, |
|
"grad_norm": 0.4470981955528259, |
|
"learning_rate": 1.4059150200180808e-05, |
|
"loss": 1.1543, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.30995738086013175, |
|
"grad_norm": 0.9872229695320129, |
|
"learning_rate": 1.3800852382797368e-05, |
|
"loss": 1.1293, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3228722717293039, |
|
"grad_norm": 1.2851194143295288, |
|
"learning_rate": 1.3542554565413924e-05, |
|
"loss": 1.1619, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.335787162598476, |
|
"grad_norm": 1.0529409646987915, |
|
"learning_rate": 1.328425674803048e-05, |
|
"loss": 1.0468, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.3487020534676482, |
|
"grad_norm": 1.2335134744644165, |
|
"learning_rate": 1.3025958930647038e-05, |
|
"loss": 1.1292, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.36161694433682035, |
|
"grad_norm": 1.165204405784607, |
|
"learning_rate": 1.2767661113263594e-05, |
|
"loss": 1.0324, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.37453183520599254, |
|
"grad_norm": 0.4545508623123169, |
|
"learning_rate": 1.250936329588015e-05, |
|
"loss": 1.0776, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.38744672607516467, |
|
"grad_norm": 1.1217703819274902, |
|
"learning_rate": 1.225106547849671e-05, |
|
"loss": 1.0673, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.4003616169443368, |
|
"grad_norm": 1.204528570175171, |
|
"learning_rate": 1.1992767661113265e-05, |
|
"loss": 1.1664, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.413276507813509, |
|
"grad_norm": 1.8890794515609741, |
|
"learning_rate": 1.1734469843729821e-05, |
|
"loss": 1.0947, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.42619139868268113, |
|
"grad_norm": 1.4399609565734863, |
|
"learning_rate": 1.147617202634638e-05, |
|
"loss": 1.115, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.43910628955185327, |
|
"grad_norm": 0.8998225331306458, |
|
"learning_rate": 1.1217874208962935e-05, |
|
"loss": 1.0569, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.45202118042102546, |
|
"grad_norm": 1.2747713327407837, |
|
"learning_rate": 1.0959576391579491e-05, |
|
"loss": 1.0758, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4649360712901976, |
|
"grad_norm": 1.2398267984390259, |
|
"learning_rate": 1.0701278574196047e-05, |
|
"loss": 1.0722, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.47785096215936973, |
|
"grad_norm": 0.7766038179397583, |
|
"learning_rate": 1.0442980756812607e-05, |
|
"loss": 1.1556, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.4907658530285419, |
|
"grad_norm": 1.2244646549224854, |
|
"learning_rate": 1.0184682939429163e-05, |
|
"loss": 1.1066, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.5036807438977141, |
|
"grad_norm": 1.1265727281570435, |
|
"learning_rate": 9.926385122045719e-06, |
|
"loss": 1.0832, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.5165956347668862, |
|
"grad_norm": 1.4965732097625732, |
|
"learning_rate": 9.668087304662275e-06, |
|
"loss": 1.1945, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5295105256360584, |
|
"grad_norm": 1.170291543006897, |
|
"learning_rate": 9.409789487278833e-06, |
|
"loss": 1.079, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.5424254165052306, |
|
"grad_norm": 0.47167250514030457, |
|
"learning_rate": 9.15149166989539e-06, |
|
"loss": 1.0811, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5553403073744027, |
|
"grad_norm": 1.5530108213424683, |
|
"learning_rate": 8.893193852511947e-06, |
|
"loss": 1.0456, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5682551982435748, |
|
"grad_norm": 1.426579475402832, |
|
"learning_rate": 8.634896035128504e-06, |
|
"loss": 1.0797, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.581170089112747, |
|
"grad_norm": 1.656485676765442, |
|
"learning_rate": 8.37659821774506e-06, |
|
"loss": 1.1384, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5940849799819191, |
|
"grad_norm": 1.8213322162628174, |
|
"learning_rate": 8.118300400361617e-06, |
|
"loss": 1.0484, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.6069998708510913, |
|
"grad_norm": 0.9146257042884827, |
|
"learning_rate": 7.860002582978174e-06, |
|
"loss": 1.0798, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.6199147617202635, |
|
"grad_norm": 0.8420510292053223, |
|
"learning_rate": 7.601704765594732e-06, |
|
"loss": 1.1371, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.6328296525894356, |
|
"grad_norm": 1.728549599647522, |
|
"learning_rate": 7.343406948211288e-06, |
|
"loss": 1.1104, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.6457445434586078, |
|
"grad_norm": 0.6792052388191223, |
|
"learning_rate": 7.085109130827845e-06, |
|
"loss": 1.1497, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.65865943432778, |
|
"grad_norm": 1.167297124862671, |
|
"learning_rate": 6.826811313444401e-06, |
|
"loss": 1.1146, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.671574325196952, |
|
"grad_norm": 0.5329355001449585, |
|
"learning_rate": 6.568513496060959e-06, |
|
"loss": 1.0569, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6844892160661242, |
|
"grad_norm": 1.3186862468719482, |
|
"learning_rate": 6.310215678677516e-06, |
|
"loss": 1.023, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6974041069352964, |
|
"grad_norm": 1.4624109268188477, |
|
"learning_rate": 6.051917861294072e-06, |
|
"loss": 1.1174, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.7103189978044685, |
|
"grad_norm": 1.1120996475219727, |
|
"learning_rate": 5.79362004391063e-06, |
|
"loss": 1.048, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7232338886736407, |
|
"grad_norm": 1.1070384979248047, |
|
"learning_rate": 5.535322226527187e-06, |
|
"loss": 1.1, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.7361487795428129, |
|
"grad_norm": 0.46000921726226807, |
|
"learning_rate": 5.277024409143743e-06, |
|
"loss": 1.0562, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.7490636704119851, |
|
"grad_norm": 1.4388511180877686, |
|
"learning_rate": 5.0187265917603005e-06, |
|
"loss": 1.0263, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.7619785612811572, |
|
"grad_norm": 1.0894064903259277, |
|
"learning_rate": 4.7604287743768566e-06, |
|
"loss": 1.0688, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.7748934521503293, |
|
"grad_norm": 0.36538398265838623, |
|
"learning_rate": 4.5021309569934135e-06, |
|
"loss": 1.0626, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 7743, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9.988164550656e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|