Training in progress, step 176500

Browse files

Files changed (7) hide show

adapter_model.safetensors +1 -1
last-checkpoint/adapter_config.json +3 -3
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +3 -283

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bda92cbabe2b4ff0c8f4a3151d1b0b35846b1a3a5142626e4d9aba5a3bf59c49
 size 5544997664

 version https://git-lfs.github.com/spec/v1
+oid sha256:4829c8d97387ccb4c56f85fac2fec1891c54be6e06f2c40b8e1a50a7553e2513
 size 5544997664

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -22,11 +22,11 @@
   "target_modules": [
     "gate_proj",
     "up_proj",
     "k_proj",
     "o_proj",
-    "q_proj",
-    "v_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "target_modules": [
     "gate_proj",
     "up_proj",
+    "down_proj",
+    "q_proj",
     "k_proj",
     "o_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bda92cbabe2b4ff0c8f4a3151d1b0b35846b1a3a5142626e4d9aba5a3bf59c49
 size 5544997664

 version https://git-lfs.github.com/spec/v1
+oid sha256:827134f8bc4269f2b2e4de9a603020b794835578e2bbfc7f2c4377e360c08169
 size 5544997664

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16c3ec516bc58ccca31d6446ef83c6bfc154d7b679d6546b15e06cb842890989
 size 674093138

 version https://git-lfs.github.com/spec/v1
+oid sha256:33445f1bf4eea592138ef02c9d3845698e8dbe97f6d897cf7c712fd7f8bc406b
 size 674093138

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1411261aab03cbf996e6c77c2b97c974b11ae1fafb084e2bfcd7821ac26c5b4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3e1519b3525ee3b89552a8c064cc15f5a192f59a80e19fcabc9854e1e8b4732
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d6354b98e7a016c3da277fd46231595c7fa8312a8d3c92f9ab1a816df91ac117
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:244846527155197c1f1306848f61d350db80de94a16e52843db6a3c6063d2b04
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.13440747561578664,
   "eval_steps": 200,
-  "global_step": 176400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12075,286 +12075,6 @@
       "learning_rate": 1.996596052275932e-05,
       "loss": 1.5559,
       "step": 172400
-    },
-    {
-      "epoch": 0.13143588176713827,
-      "grad_norm": 0.708541989326477,
-      "learning_rate": 1.9965921043659414e-05,
-      "loss": 1.5405,
-      "step": 172500
-    },
-    {
-      "epoch": 0.1315120764812062,
-      "grad_norm": 0.5723733305931091,
-      "learning_rate": 1.9965881541717857e-05,
-      "loss": 1.5467,
-      "step": 172600
-    },
-    {
-      "epoch": 0.1315882711952741,
-      "grad_norm": 0.5531366467475891,
-      "learning_rate": 1.996584201693474e-05,
-      "loss": 1.5528,
-      "step": 172700
-    },
-    {
-      "epoch": 0.131664465909342,
-      "grad_norm": 0.5077796578407288,
-      "learning_rate": 1.9965802469310148e-05,
-      "loss": 1.5144,
-      "step": 172800
-    },
-    {
-      "epoch": 0.13174066062340992,
-      "grad_norm": 0.6950669884681702,
-      "learning_rate": 1.996576289884417e-05,
-      "loss": 1.4381,
-      "step": 172900
-    },
-    {
-      "epoch": 0.1318168553374778,
-      "grad_norm": 0.6486429572105408,
-      "learning_rate": 1.996572330553691e-05,
-      "loss": 1.5391,
-      "step": 173000
-    },
-    {
-      "epoch": 0.13189305005154572,
-      "grad_norm": 0.707037627696991,
-      "learning_rate": 1.9965683689388448e-05,
-      "loss": 1.5175,
-      "step": 173100
-    },
-    {
-      "epoch": 0.13196924476561364,
-      "grad_norm": 0.8228338956832886,
-      "learning_rate": 1.9965644050398875e-05,
-      "loss": 1.5695,
-      "step": 173200
-    },
-    {
-      "epoch": 0.13204543947968153,
-      "grad_norm": 0.8611860871315002,
-      "learning_rate": 1.9965604388568286e-05,
-      "loss": 1.5804,
-      "step": 173300
-    },
-    {
-      "epoch": 0.13212163419374945,
-      "grad_norm": 0.593332827091217,
-      "learning_rate": 1.996556470389677e-05,
-      "loss": 1.5022,
-      "step": 173400
-    },
-    {
-      "epoch": 0.13219782890781734,
-      "grad_norm": 0.4934659004211426,
-      "learning_rate": 1.9965524996384415e-05,
-      "loss": 1.5131,
-      "step": 173500
-    },
-    {
-      "epoch": 0.13227402362188526,
-      "grad_norm": 0.461875319480896,
-      "learning_rate": 1.9965485266031317e-05,
-      "loss": 1.4976,
-      "step": 173600
-    },
-    {
-      "epoch": 0.13235021833595317,
-      "grad_norm": 0.7695358991622925,
-      "learning_rate": 1.996544551283757e-05,
-      "loss": 1.4759,
-      "step": 173700
-    },
-    {
-      "epoch": 0.13242641305002106,
-      "grad_norm": 0.23193073272705078,
-      "learning_rate": 1.9965405736803248e-05,
-      "loss": 1.5739,
-      "step": 173800
-    },
-    {
-      "epoch": 0.13250260776408898,
-      "grad_norm": 0.5889491438865662,
-      "learning_rate": 1.9965365937928464e-05,
-      "loss": 1.6115,
-      "step": 173900
-    },
-    {
-      "epoch": 0.13257880247815687,
-      "grad_norm": 0.17042429745197296,
-      "learning_rate": 1.9965326116213294e-05,
-      "loss": 1.6021,
-      "step": 174000
-    },
-    {
-      "epoch": 0.1326549971922248,
-      "grad_norm": 0.7887442708015442,
-      "learning_rate": 1.9965286271657837e-05,
-      "loss": 1.5276,
-      "step": 174100
-    },
-    {
-      "epoch": 0.1327311919062927,
-      "grad_norm": 0.522075355052948,
-      "learning_rate": 1.996524640426218e-05,
-      "loss": 1.6293,
-      "step": 174200
-    },
-    {
-      "epoch": 0.1328073866203606,
-      "grad_norm": 0.8914234042167664,
-      "learning_rate": 1.996520651402642e-05,
-      "loss": 1.5579,
-      "step": 174300
-    },
-    {
-      "epoch": 0.1328835813344285,
-      "grad_norm": 0.5836375951766968,
-      "learning_rate": 1.9965166600950642e-05,
-      "loss": 1.5017,
-      "step": 174400
-    },
-    {
-      "epoch": 0.1329597760484964,
-      "grad_norm": 0.8345896005630493,
-      "learning_rate": 1.9965126665034943e-05,
-      "loss": 1.5726,
-      "step": 174500
-    },
-    {
-      "epoch": 0.13303597076256432,
-      "grad_norm": 0.5659860968589783,
-      "learning_rate": 1.9965086706279407e-05,
-      "loss": 1.5298,
-      "step": 174600
-    },
-    {
-      "epoch": 0.1331121654766322,
-      "grad_norm": 0.4374712109565735,
-      "learning_rate": 1.9965046724684136e-05,
-      "loss": 1.5165,
-      "step": 174700
-    },
-    {
-      "epoch": 0.13318836019070013,
-      "grad_norm": 0.5508346557617188,
-      "learning_rate": 1.9965006720249214e-05,
-      "loss": 1.5273,
-      "step": 174800
-    },
-    {
-      "epoch": 0.13326455490476805,
-      "grad_norm": 1.0864572525024414,
-      "learning_rate": 1.9964966692974733e-05,
-      "loss": 1.5108,
-      "step": 174900
-    },
-    {
-      "epoch": 0.13334074961883594,
-      "grad_norm": 0.3920418322086334,
-      "learning_rate": 1.9964926642860785e-05,
-      "loss": 1.5229,
-      "step": 175000
-    },
-    {
-      "epoch": 0.13341694433290385,
-      "grad_norm": 0.845400869846344,
-      "learning_rate": 1.9964886569907468e-05,
-      "loss": 1.4647,
-      "step": 175100
-    },
-    {
-      "epoch": 0.13349313904697174,
-      "grad_norm": 0.717110276222229,
-      "learning_rate": 1.9964846474114866e-05,
-      "loss": 1.5466,
-      "step": 175200
-    },
-    {
-      "epoch": 0.13356933376103966,
-      "grad_norm": 0.5370656847953796,
-      "learning_rate": 1.9964806355483074e-05,
-      "loss": 1.5762,
-      "step": 175300
-    },
-    {
-      "epoch": 0.13364552847510758,
-      "grad_norm": 1.161123514175415,
-      "learning_rate": 1.9964766214012184e-05,
-      "loss": 1.5829,
-      "step": 175400
-    },
-    {
-      "epoch": 0.13372172318917547,
-      "grad_norm": 0.6493296027183533,
-      "learning_rate": 1.996472604970229e-05,
-      "loss": 1.5684,
-      "step": 175500
-    },
-    {
-      "epoch": 0.13379791790324339,
-      "grad_norm": 1.104617714881897,
-      "learning_rate": 1.9964685862553482e-05,
-      "loss": 1.5545,
-      "step": 175600
-    },
-    {
-      "epoch": 0.13387411261731127,
-      "grad_norm": 1.4800457954406738,
-      "learning_rate": 1.996464565256585e-05,
-      "loss": 1.5038,
-      "step": 175700
-    },
-    {
-      "epoch": 0.1339503073313792,
-      "grad_norm": 0.966915488243103,
-      "learning_rate": 1.9964605419739488e-05,
-      "loss": 1.4949,
-      "step": 175800
-    },
-    {
-      "epoch": 0.1340265020454471,
-      "grad_norm": 0.9788563847541809,
-      "learning_rate": 1.9964565164074493e-05,
-      "loss": 1.5273,
-      "step": 175900
-    },
-    {
-      "epoch": 0.134102696759515,
-      "grad_norm": 0.5662705898284912,
-      "learning_rate": 1.9964524885570945e-05,
-      "loss": 1.495,
-      "step": 176000
-    },
-    {
-      "epoch": 0.13417889147358292,
-      "grad_norm": 1.1160712242126465,
-      "learning_rate": 1.9964484584228952e-05,
-      "loss": 1.5652,
-      "step": 176100
-    },
-    {
-      "epoch": 0.1342550861876508,
-      "grad_norm": 0.8404836058616638,
-      "learning_rate": 1.9964444260048593e-05,
-      "loss": 1.533,
-      "step": 176200
-    },
-    {
-      "epoch": 0.13433128090171872,
-      "grad_norm": 0.6309127807617188,
-      "learning_rate": 1.996440391302997e-05,
-      "loss": 1.5509,
-      "step": 176300
-    },
-    {
-      "epoch": 0.13440747561578664,
-      "grad_norm": 0.8548604249954224,
-      "learning_rate": 1.9964363543173166e-05,
-      "loss": 1.4678,
-      "step": 176400
     }
   ],
   "logging_steps": 100,
@@ -12374,7 +12094,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.4041619301059625e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.13135968705307038,
   "eval_steps": 200,
+  "global_step": 172400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.996596052275932e-05,
       "loss": 1.5559,
       "step": 172400
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 2.3486437306994196e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null