MohamedAhmedAE
commited on
Commit
•
fe43866
1
Parent(s):
fd59336
Training in progress, step 95000, checkpoint
Browse files- last-checkpoint/adapter_config.json +3 -3
- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/tokenizer_config.json +0 -1
- last-checkpoint/trainer_state.json +430 -3
- last-checkpoint/training_args.bin +2 -2
last-checkpoint/adapter_config.json
CHANGED
@@ -20,13 +20,13 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
-
"down_proj",
|
24 |
"up_proj",
|
25 |
-
"q_proj",
|
26 |
"k_proj",
|
27 |
"gate_proj",
|
|
|
|
|
28 |
"v_proj",
|
29 |
-
"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
23 |
"up_proj",
|
|
|
24 |
"k_proj",
|
25 |
"gate_proj",
|
26 |
+
"down_proj",
|
27 |
+
"o_proj",
|
28 |
"v_proj",
|
29 |
+
"q_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5544997664
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c8c2bc861c786b2bf9d8ddb8858babedad8fc42c6e26fb00fe13b35096c6de7
|
3 |
size 5544997664
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 674093138
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36ebe5440e2bcb412e4131df2efca8e8fc88b5200168c85a419cb901604336b6
|
3 |
size 674093138
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0610f4aa7ed2f34398fce8dc77c3d7b14d52dfb0bc17dc7f64e8f6c2438e189b
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17feec1222485652df46ab05d04d0cb1b6896f1f053ea3ae8ca19c7cd689e6b7
|
3 |
size 1064
|
last-checkpoint/tokenizer_config.json
CHANGED
@@ -2072,7 +2072,6 @@
|
|
2072 |
"bos_token": "<|im_start|>",
|
2073 |
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
2074 |
"clean_up_tokenization_spaces": true,
|
2075 |
-
"device_map": "auto",
|
2076 |
"eos_token": "<|im_end|>",
|
2077 |
"max_length": 4096,
|
2078 |
"model_input_names": [
|
|
|
2072 |
"bos_token": "<|im_start|>",
|
2073 |
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
2074 |
"clean_up_tokenization_spaces": true,
|
|
|
2075 |
"eos_token": "<|im_end|>",
|
2076 |
"max_length": 4096,
|
2077 |
"model_input_names": [
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -6230,6 +6230,433 @@
|
|
6230 |
"learning_rate": 1.9991374234676826e-05,
|
6231 |
"loss": 1.5551,
|
6232 |
"step": 88900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6233 |
}
|
6234 |
],
|
6235 |
"logging_steps": 100,
|
@@ -6249,7 +6676,7 @@
|
|
6249 |
"attributes": {}
|
6250 |
}
|
6251 |
},
|
6252 |
-
"total_flos": 1.
|
6253 |
"train_batch_size": 1,
|
6254 |
"trial_name": null,
|
6255 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.07064982969672631,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 95000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
6230 |
"learning_rate": 1.9991374234676826e-05,
|
6231 |
"loss": 1.5551,
|
6232 |
"step": 88900
|
6233 |
+
},
|
6234 |
+
{
|
6235 |
+
"epoch": 0.06618773518956465,
|
6236 |
+
"grad_norm": 0.6733551621437073,
|
6237 |
+
"learning_rate": 1.999135481992186e-05,
|
6238 |
+
"loss": 1.4334,
|
6239 |
+
"step": 89000
|
6240 |
+
},
|
6241 |
+
{
|
6242 |
+
"epoch": 0.06626210343135068,
|
6243 |
+
"grad_norm": 0.8035016059875488,
|
6244 |
+
"learning_rate": 1.999133538335166e-05,
|
6245 |
+
"loss": 1.4872,
|
6246 |
+
"step": 89100
|
6247 |
+
},
|
6248 |
+
{
|
6249 |
+
"epoch": 0.06633647167313671,
|
6250 |
+
"grad_norm": 0.4339046776294708,
|
6251 |
+
"learning_rate": 1.9991315924966277e-05,
|
6252 |
+
"loss": 1.4869,
|
6253 |
+
"step": 89200
|
6254 |
+
},
|
6255 |
+
{
|
6256 |
+
"epoch": 0.06641083991492273,
|
6257 |
+
"grad_norm": 0.6680594086647034,
|
6258 |
+
"learning_rate": 1.9991296444765747e-05,
|
6259 |
+
"loss": 1.5103,
|
6260 |
+
"step": 89300
|
6261 |
+
},
|
6262 |
+
{
|
6263 |
+
"epoch": 0.06648520815670876,
|
6264 |
+
"grad_norm": 0.697487473487854,
|
6265 |
+
"learning_rate": 1.9991276942750117e-05,
|
6266 |
+
"loss": 1.4239,
|
6267 |
+
"step": 89400
|
6268 |
+
},
|
6269 |
+
{
|
6270 |
+
"epoch": 0.06655957639849479,
|
6271 |
+
"grad_norm": 0.587734043598175,
|
6272 |
+
"learning_rate": 1.9991257418919424e-05,
|
6273 |
+
"loss": 1.5856,
|
6274 |
+
"step": 89500
|
6275 |
+
},
|
6276 |
+
{
|
6277 |
+
"epoch": 0.06663394464028081,
|
6278 |
+
"grad_norm": 0.8574571013450623,
|
6279 |
+
"learning_rate": 1.999123787327372e-05,
|
6280 |
+
"loss": 1.4818,
|
6281 |
+
"step": 89600
|
6282 |
+
},
|
6283 |
+
{
|
6284 |
+
"epoch": 0.06670831288206684,
|
6285 |
+
"grad_norm": 1.0861676931381226,
|
6286 |
+
"learning_rate": 1.9991218305813035e-05,
|
6287 |
+
"loss": 1.4883,
|
6288 |
+
"step": 89700
|
6289 |
+
},
|
6290 |
+
{
|
6291 |
+
"epoch": 0.06678268112385287,
|
6292 |
+
"grad_norm": 1.0139306783676147,
|
6293 |
+
"learning_rate": 1.9991198716537422e-05,
|
6294 |
+
"loss": 1.5099,
|
6295 |
+
"step": 89800
|
6296 |
+
},
|
6297 |
+
{
|
6298 |
+
"epoch": 0.0668570493656389,
|
6299 |
+
"grad_norm": 0.6741511225700378,
|
6300 |
+
"learning_rate": 1.999117910544692e-05,
|
6301 |
+
"loss": 1.4746,
|
6302 |
+
"step": 89900
|
6303 |
+
},
|
6304 |
+
{
|
6305 |
+
"epoch": 0.06693141760742492,
|
6306 |
+
"grad_norm": 0.9702801704406738,
|
6307 |
+
"learning_rate": 1.999115947254157e-05,
|
6308 |
+
"loss": 1.5166,
|
6309 |
+
"step": 90000
|
6310 |
+
},
|
6311 |
+
{
|
6312 |
+
"epoch": 0.06700578584921095,
|
6313 |
+
"grad_norm": 0.7757803797721863,
|
6314 |
+
"learning_rate": 1.9991139817821416e-05,
|
6315 |
+
"loss": 1.5031,
|
6316 |
+
"step": 90100
|
6317 |
+
},
|
6318 |
+
{
|
6319 |
+
"epoch": 0.06708015409099698,
|
6320 |
+
"grad_norm": 0.7200698256492615,
|
6321 |
+
"learning_rate": 1.9991120141286502e-05,
|
6322 |
+
"loss": 1.5834,
|
6323 |
+
"step": 90200
|
6324 |
+
},
|
6325 |
+
{
|
6326 |
+
"epoch": 0.067154522332783,
|
6327 |
+
"grad_norm": 0.7415780425071716,
|
6328 |
+
"learning_rate": 1.999110044293687e-05,
|
6329 |
+
"loss": 1.5689,
|
6330 |
+
"step": 90300
|
6331 |
+
},
|
6332 |
+
{
|
6333 |
+
"epoch": 0.06722889057456903,
|
6334 |
+
"grad_norm": 0.5777677297592163,
|
6335 |
+
"learning_rate": 1.9991080722772564e-05,
|
6336 |
+
"loss": 1.5139,
|
6337 |
+
"step": 90400
|
6338 |
+
},
|
6339 |
+
{
|
6340 |
+
"epoch": 0.06730325881635506,
|
6341 |
+
"grad_norm": 0.6991866827011108,
|
6342 |
+
"learning_rate": 1.999106098079363e-05,
|
6343 |
+
"loss": 1.5073,
|
6344 |
+
"step": 90500
|
6345 |
+
},
|
6346 |
+
{
|
6347 |
+
"epoch": 0.06737762705814108,
|
6348 |
+
"grad_norm": 0.6112390160560608,
|
6349 |
+
"learning_rate": 1.9991041217000105e-05,
|
6350 |
+
"loss": 1.4773,
|
6351 |
+
"step": 90600
|
6352 |
+
},
|
6353 |
+
{
|
6354 |
+
"epoch": 0.06745199529992713,
|
6355 |
+
"grad_norm": 0.8287676572799683,
|
6356 |
+
"learning_rate": 1.9991021431392033e-05,
|
6357 |
+
"loss": 1.5425,
|
6358 |
+
"step": 90700
|
6359 |
+
},
|
6360 |
+
{
|
6361 |
+
"epoch": 0.06752636354171315,
|
6362 |
+
"grad_norm": 0.8582881689071655,
|
6363 |
+
"learning_rate": 1.999100162396946e-05,
|
6364 |
+
"loss": 1.5581,
|
6365 |
+
"step": 90800
|
6366 |
+
},
|
6367 |
+
{
|
6368 |
+
"epoch": 0.06760073178349918,
|
6369 |
+
"grad_norm": 0.5585276484489441,
|
6370 |
+
"learning_rate": 1.999098179473243e-05,
|
6371 |
+
"loss": 1.5015,
|
6372 |
+
"step": 90900
|
6373 |
+
},
|
6374 |
+
{
|
6375 |
+
"epoch": 0.0676751000252852,
|
6376 |
+
"grad_norm": 0.4237435460090637,
|
6377 |
+
"learning_rate": 1.9990961943680984e-05,
|
6378 |
+
"loss": 1.523,
|
6379 |
+
"step": 91000
|
6380 |
+
},
|
6381 |
+
{
|
6382 |
+
"epoch": 0.06774946826707123,
|
6383 |
+
"grad_norm": 0.5455594658851624,
|
6384 |
+
"learning_rate": 1.999094207081517e-05,
|
6385 |
+
"loss": 1.5448,
|
6386 |
+
"step": 91100
|
6387 |
+
},
|
6388 |
+
{
|
6389 |
+
"epoch": 0.06782383650885726,
|
6390 |
+
"grad_norm": 0.48855817317962646,
|
6391 |
+
"learning_rate": 1.999092217613502e-05,
|
6392 |
+
"loss": 1.4535,
|
6393 |
+
"step": 91200
|
6394 |
+
},
|
6395 |
+
{
|
6396 |
+
"epoch": 0.06789820475064329,
|
6397 |
+
"grad_norm": 0.5199916958808899,
|
6398 |
+
"learning_rate": 1.999090225964059e-05,
|
6399 |
+
"loss": 1.4921,
|
6400 |
+
"step": 91300
|
6401 |
+
},
|
6402 |
+
{
|
6403 |
+
"epoch": 0.06797257299242931,
|
6404 |
+
"grad_norm": 0.5790271162986755,
|
6405 |
+
"learning_rate": 1.9990882321331916e-05,
|
6406 |
+
"loss": 1.5773,
|
6407 |
+
"step": 91400
|
6408 |
+
},
|
6409 |
+
{
|
6410 |
+
"epoch": 0.06804694123421534,
|
6411 |
+
"grad_norm": 0.5524342656135559,
|
6412 |
+
"learning_rate": 1.9990862361209043e-05,
|
6413 |
+
"loss": 1.4619,
|
6414 |
+
"step": 91500
|
6415 |
+
},
|
6416 |
+
{
|
6417 |
+
"epoch": 0.06812130947600137,
|
6418 |
+
"grad_norm": 0.7153291702270508,
|
6419 |
+
"learning_rate": 1.999084237927202e-05,
|
6420 |
+
"loss": 1.6042,
|
6421 |
+
"step": 91600
|
6422 |
+
},
|
6423 |
+
{
|
6424 |
+
"epoch": 0.0681956777177874,
|
6425 |
+
"grad_norm": 0.957635223865509,
|
6426 |
+
"learning_rate": 1.9990822375520882e-05,
|
6427 |
+
"loss": 1.538,
|
6428 |
+
"step": 91700
|
6429 |
+
},
|
6430 |
+
{
|
6431 |
+
"epoch": 0.06827004595957342,
|
6432 |
+
"grad_norm": 0.38240477442741394,
|
6433 |
+
"learning_rate": 1.9990802349955678e-05,
|
6434 |
+
"loss": 1.5937,
|
6435 |
+
"step": 91800
|
6436 |
+
},
|
6437 |
+
{
|
6438 |
+
"epoch": 0.06834441420135945,
|
6439 |
+
"grad_norm": 0.8961233496665955,
|
6440 |
+
"learning_rate": 1.999078230257645e-05,
|
6441 |
+
"loss": 1.5119,
|
6442 |
+
"step": 91900
|
6443 |
+
},
|
6444 |
+
{
|
6445 |
+
"epoch": 0.06841878244314548,
|
6446 |
+
"grad_norm": 0.47433900833129883,
|
6447 |
+
"learning_rate": 1.999076223338324e-05,
|
6448 |
+
"loss": 1.5449,
|
6449 |
+
"step": 92000
|
6450 |
+
},
|
6451 |
+
{
|
6452 |
+
"epoch": 0.0684931506849315,
|
6453 |
+
"grad_norm": 0.8222399353981018,
|
6454 |
+
"learning_rate": 1.9990742142376098e-05,
|
6455 |
+
"loss": 1.5334,
|
6456 |
+
"step": 92100
|
6457 |
+
},
|
6458 |
+
{
|
6459 |
+
"epoch": 0.06856751892671753,
|
6460 |
+
"grad_norm": 0.464373916387558,
|
6461 |
+
"learning_rate": 1.999072202955506e-05,
|
6462 |
+
"loss": 1.5003,
|
6463 |
+
"step": 92200
|
6464 |
+
},
|
6465 |
+
{
|
6466 |
+
"epoch": 0.06864188716850356,
|
6467 |
+
"grad_norm": 0.8799763321876526,
|
6468 |
+
"learning_rate": 1.9990701894920176e-05,
|
6469 |
+
"loss": 1.581,
|
6470 |
+
"step": 92300
|
6471 |
+
},
|
6472 |
+
{
|
6473 |
+
"epoch": 0.06871625541028958,
|
6474 |
+
"grad_norm": 0.9567086100578308,
|
6475 |
+
"learning_rate": 1.999068173847149e-05,
|
6476 |
+
"loss": 1.4373,
|
6477 |
+
"step": 92400
|
6478 |
+
},
|
6479 |
+
{
|
6480 |
+
"epoch": 0.06879062365207561,
|
6481 |
+
"grad_norm": 0.440479576587677,
|
6482 |
+
"learning_rate": 1.999066156020904e-05,
|
6483 |
+
"loss": 1.5571,
|
6484 |
+
"step": 92500
|
6485 |
+
},
|
6486 |
+
{
|
6487 |
+
"epoch": 0.06886499189386165,
|
6488 |
+
"grad_norm": 0.7486180663108826,
|
6489 |
+
"learning_rate": 1.9990641360132876e-05,
|
6490 |
+
"loss": 1.4437,
|
6491 |
+
"step": 92600
|
6492 |
+
},
|
6493 |
+
{
|
6494 |
+
"epoch": 0.06893936013564768,
|
6495 |
+
"grad_norm": 0.7576742172241211,
|
6496 |
+
"learning_rate": 1.9990621138243037e-05,
|
6497 |
+
"loss": 1.5306,
|
6498 |
+
"step": 92700
|
6499 |
+
},
|
6500 |
+
{
|
6501 |
+
"epoch": 0.0690137283774337,
|
6502 |
+
"grad_norm": 0.6755186915397644,
|
6503 |
+
"learning_rate": 1.9990600894539574e-05,
|
6504 |
+
"loss": 1.5769,
|
6505 |
+
"step": 92800
|
6506 |
+
},
|
6507 |
+
{
|
6508 |
+
"epoch": 0.06908809661921973,
|
6509 |
+
"grad_norm": 0.6093853712081909,
|
6510 |
+
"learning_rate": 1.9990580629022526e-05,
|
6511 |
+
"loss": 1.5777,
|
6512 |
+
"step": 92900
|
6513 |
+
},
|
6514 |
+
{
|
6515 |
+
"epoch": 0.06916246486100576,
|
6516 |
+
"grad_norm": 0.5788242220878601,
|
6517 |
+
"learning_rate": 1.9990560341691938e-05,
|
6518 |
+
"loss": 1.494,
|
6519 |
+
"step": 93000
|
6520 |
+
},
|
6521 |
+
{
|
6522 |
+
"epoch": 0.06923683310279179,
|
6523 |
+
"grad_norm": 0.828676700592041,
|
6524 |
+
"learning_rate": 1.9990540032547855e-05,
|
6525 |
+
"loss": 1.5651,
|
6526 |
+
"step": 93100
|
6527 |
+
},
|
6528 |
+
{
|
6529 |
+
"epoch": 0.06931120134457781,
|
6530 |
+
"grad_norm": 0.5612863302230835,
|
6531 |
+
"learning_rate": 1.9990519701590322e-05,
|
6532 |
+
"loss": 1.5584,
|
6533 |
+
"step": 93200
|
6534 |
+
},
|
6535 |
+
{
|
6536 |
+
"epoch": 0.06938556958636384,
|
6537 |
+
"grad_norm": 0.965107262134552,
|
6538 |
+
"learning_rate": 1.999049934881938e-05,
|
6539 |
+
"loss": 1.497,
|
6540 |
+
"step": 93300
|
6541 |
+
},
|
6542 |
+
{
|
6543 |
+
"epoch": 0.06945993782814987,
|
6544 |
+
"grad_norm": 0.46939852833747864,
|
6545 |
+
"learning_rate": 1.9990478974235078e-05,
|
6546 |
+
"loss": 1.5716,
|
6547 |
+
"step": 93400
|
6548 |
+
},
|
6549 |
+
{
|
6550 |
+
"epoch": 0.0695343060699359,
|
6551 |
+
"grad_norm": 0.4986964464187622,
|
6552 |
+
"learning_rate": 1.999045857783746e-05,
|
6553 |
+
"loss": 1.5762,
|
6554 |
+
"step": 93500
|
6555 |
+
},
|
6556 |
+
{
|
6557 |
+
"epoch": 0.06960867431172192,
|
6558 |
+
"grad_norm": 0.4267128109931946,
|
6559 |
+
"learning_rate": 1.9990438159626566e-05,
|
6560 |
+
"loss": 1.5101,
|
6561 |
+
"step": 93600
|
6562 |
+
},
|
6563 |
+
{
|
6564 |
+
"epoch": 0.06968304255350795,
|
6565 |
+
"grad_norm": 0.411811888217926,
|
6566 |
+
"learning_rate": 1.9990417719602445e-05,
|
6567 |
+
"loss": 1.5623,
|
6568 |
+
"step": 93700
|
6569 |
+
},
|
6570 |
+
{
|
6571 |
+
"epoch": 0.06975741079529398,
|
6572 |
+
"grad_norm": 0.8761053681373596,
|
6573 |
+
"learning_rate": 1.999039725776514e-05,
|
6574 |
+
"loss": 1.4294,
|
6575 |
+
"step": 93800
|
6576 |
+
},
|
6577 |
+
{
|
6578 |
+
"epoch": 0.06983177903708,
|
6579 |
+
"grad_norm": 0.9531000852584839,
|
6580 |
+
"learning_rate": 1.99903767741147e-05,
|
6581 |
+
"loss": 1.4925,
|
6582 |
+
"step": 93900
|
6583 |
+
},
|
6584 |
+
{
|
6585 |
+
"epoch": 0.06990614727886603,
|
6586 |
+
"grad_norm": 0.516830325126648,
|
6587 |
+
"learning_rate": 1.999035626865116e-05,
|
6588 |
+
"loss": 1.5802,
|
6589 |
+
"step": 94000
|
6590 |
+
},
|
6591 |
+
{
|
6592 |
+
"epoch": 0.06998051552065206,
|
6593 |
+
"grad_norm": 0.47061294317245483,
|
6594 |
+
"learning_rate": 1.9990335741374572e-05,
|
6595 |
+
"loss": 1.5668,
|
6596 |
+
"step": 94100
|
6597 |
+
},
|
6598 |
+
{
|
6599 |
+
"epoch": 0.07005488376243808,
|
6600 |
+
"grad_norm": 0.7790777683258057,
|
6601 |
+
"learning_rate": 1.9990315192284978e-05,
|
6602 |
+
"loss": 1.5568,
|
6603 |
+
"step": 94200
|
6604 |
+
},
|
6605 |
+
{
|
6606 |
+
"epoch": 0.07012925200422411,
|
6607 |
+
"grad_norm": 0.75156170129776,
|
6608 |
+
"learning_rate": 1.9990294621382426e-05,
|
6609 |
+
"loss": 1.5217,
|
6610 |
+
"step": 94300
|
6611 |
+
},
|
6612 |
+
{
|
6613 |
+
"epoch": 0.07020362024601014,
|
6614 |
+
"grad_norm": 1.195028305053711,
|
6615 |
+
"learning_rate": 1.999027402866696e-05,
|
6616 |
+
"loss": 1.5662,
|
6617 |
+
"step": 94400
|
6618 |
+
},
|
6619 |
+
{
|
6620 |
+
"epoch": 0.07027798848779618,
|
6621 |
+
"grad_norm": 0.6215851306915283,
|
6622 |
+
"learning_rate": 1.999025341413862e-05,
|
6623 |
+
"loss": 1.5208,
|
6624 |
+
"step": 94500
|
6625 |
+
},
|
6626 |
+
{
|
6627 |
+
"epoch": 0.0703523567295822,
|
6628 |
+
"grad_norm": 0.509843647480011,
|
6629 |
+
"learning_rate": 1.9990232777797458e-05,
|
6630 |
+
"loss": 1.489,
|
6631 |
+
"step": 94600
|
6632 |
+
},
|
6633 |
+
{
|
6634 |
+
"epoch": 0.07042672497136823,
|
6635 |
+
"grad_norm": 1.2951029539108276,
|
6636 |
+
"learning_rate": 1.9990212119643516e-05,
|
6637 |
+
"loss": 1.4729,
|
6638 |
+
"step": 94700
|
6639 |
+
},
|
6640 |
+
{
|
6641 |
+
"epoch": 0.07050109321315426,
|
6642 |
+
"grad_norm": 0.5028135776519775,
|
6643 |
+
"learning_rate": 1.9990191439676838e-05,
|
6644 |
+
"loss": 1.5579,
|
6645 |
+
"step": 94800
|
6646 |
+
},
|
6647 |
+
{
|
6648 |
+
"epoch": 0.07057546145494029,
|
6649 |
+
"grad_norm": 0.7202877998352051,
|
6650 |
+
"learning_rate": 1.9990170737897473e-05,
|
6651 |
+
"loss": 1.5282,
|
6652 |
+
"step": 94900
|
6653 |
+
},
|
6654 |
+
{
|
6655 |
+
"epoch": 0.07064982969672631,
|
6656 |
+
"grad_norm": 0.9731516242027283,
|
6657 |
+
"learning_rate": 1.9990150014305462e-05,
|
6658 |
+
"loss": 1.5194,
|
6659 |
+
"step": 95000
|
6660 |
}
|
6661 |
],
|
6662 |
"logging_steps": 100,
|
|
|
6676 |
"attributes": {}
|
6677 |
}
|
6678 |
},
|
6679 |
+
"total_flos": 1.2945898144897352e+18,
|
6680 |
"train_batch_size": 1,
|
6681 |
"trial_name": null,
|
6682 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccc6594b62fe53f0b1bfeab5cb36a3d9d52c3d027d521d24a54039f0b55f3bd6
|
3 |
+
size 5560
|