Training in progress, step 100900, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +416 -3

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
     "k_proj",
     "gate_proj",
     "down_proj",
-    "o_proj",
-    "v_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
     "k_proj",
     "gate_proj",
     "down_proj",
+    "q_proj",
+    "up_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c8c2bc861c786b2bf9d8ddb8858babedad8fc42c6e26fb00fe13b35096c6de7
 size 5544997664

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ff50e9a0eef14c00f32c5e550257295427f2d666e009aac32472aef43b0c78f
 size 5544997664

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36ebe5440e2bcb412e4131df2efca8e8fc88b5200168c85a419cb901604336b6
 size 674093138

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b1f3c8b9d1c8514057a760685418307853b2172387d395e371d81516debcc99
 size 674093138

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0610f4aa7ed2f34398fce8dc77c3d7b14d52dfb0bc17dc7f64e8f6c2438e189b
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d52e9778ae961a843d4efe5adba669832146332ec663eac9df46d71427724e3
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17feec1222485652df46ab05d04d0cb1b6896f1f053ea3ae8ca19c7cd689e6b7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1dd725a3e5295711459643d6e1204a1d04a7f905cc6416544fa87ecdfb18228
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.07064982969672631,
   "eval_steps": 200,
-  "global_step": 95000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -6657,6 +6657,419 @@
       "learning_rate": 1.9990150014305462e-05,
       "loss": 1.5194,
       "step": 95000
     }
   ],
   "logging_steps": 100,
@@ -6676,7 +7089,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2945898144897352e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07503755596210195,
   "eval_steps": 200,
+  "global_step": 100900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9990150014305462e-05,
       "loss": 1.5194,
       "step": 95000
+    },
+    {
+      "epoch": 0.07072419793851234,
+      "grad_norm": 0.7444689273834229,
+      "learning_rate": 1.9990129268900848e-05,
+      "loss": 1.5198,
+      "step": 95100
+    },
+    {
+      "epoch": 0.07079856618029837,
+      "grad_norm": 0.9299377202987671,
+      "learning_rate": 1.9990108501683685e-05,
+      "loss": 1.5393,
+      "step": 95200
+    },
+    {
+      "epoch": 0.0708729344220844,
+      "grad_norm": 0.6611402630805969,
+      "learning_rate": 1.999008771265401e-05,
+      "loss": 1.5351,
+      "step": 95300
+    },
+    {
+      "epoch": 0.07094730266387042,
+      "grad_norm": 0.4772530496120453,
+      "learning_rate": 1.9990066901811876e-05,
+      "loss": 1.5243,
+      "step": 95400
+    },
+    {
+      "epoch": 0.07102167090565645,
+      "grad_norm": 0.42998188734054565,
+      "learning_rate": 1.9990046069157322e-05,
+      "loss": 1.5877,
+      "step": 95500
+    },
+    {
+      "epoch": 0.07109603914744247,
+      "grad_norm": 0.7415347099304199,
+      "learning_rate": 1.9990025214690396e-05,
+      "loss": 1.5633,
+      "step": 95600
+    },
+    {
+      "epoch": 0.0711704073892285,
+      "grad_norm": 0.657112717628479,
+      "learning_rate": 1.999000433841114e-05,
+      "loss": 1.4555,
+      "step": 95700
+    },
+    {
+      "epoch": 0.07124477563101453,
+      "grad_norm": 0.9188429713249207,
+      "learning_rate": 1.998998344031961e-05,
+      "loss": 1.4329,
+      "step": 95800
+    },
+    {
+      "epoch": 0.07131914387280056,
+      "grad_norm": 0.8823667168617249,
+      "learning_rate": 1.9989962520415836e-05,
+      "loss": 1.4754,
+      "step": 95900
+    },
+    {
+      "epoch": 0.07139351211458658,
+      "grad_norm": 0.7276200652122498,
+      "learning_rate": 1.9989941578699878e-05,
+      "loss": 1.5286,
+      "step": 96000
+    },
+    {
+      "epoch": 0.07146788035637261,
+      "grad_norm": 0.941512405872345,
+      "learning_rate": 1.998992061517177e-05,
+      "loss": 1.5087,
+      "step": 96100
+    },
+    {
+      "epoch": 0.07154224859815864,
+      "grad_norm": 1.0310442447662354,
+      "learning_rate": 1.998989962983157e-05,
+      "loss": 1.5895,
+      "step": 96200
+    },
+    {
+      "epoch": 0.07161661683994466,
+      "grad_norm": 1.3620883226394653,
+      "learning_rate": 1.9989878622679317e-05,
+      "loss": 1.474,
+      "step": 96300
+    },
+    {
+      "epoch": 0.0716909850817307,
+      "grad_norm": 0.5119801163673401,
+      "learning_rate": 1.998985759371505e-05,
+      "loss": 1.5112,
+      "step": 96400
+    },
+    {
+      "epoch": 0.07176535332351673,
+      "grad_norm": 0.8966123461723328,
+      "learning_rate": 1.998983654293883e-05,
+      "loss": 1.4903,
+      "step": 96500
+    },
+    {
+      "epoch": 0.07183972156530276,
+      "grad_norm": 0.5336944460868835,
+      "learning_rate": 1.998981547035069e-05,
+      "loss": 1.5673,
+      "step": 96600
+    },
+    {
+      "epoch": 0.07191408980708879,
+      "grad_norm": 1.2533961534500122,
+      "learning_rate": 1.9989794375950688e-05,
+      "loss": 1.5039,
+      "step": 96700
+    },
+    {
+      "epoch": 0.07198845804887481,
+      "grad_norm": 1.3317081928253174,
+      "learning_rate": 1.9989773259738858e-05,
+      "loss": 1.567,
+      "step": 96800
+    },
+    {
+      "epoch": 0.07206282629066084,
+      "grad_norm": 0.49700722098350525,
+      "learning_rate": 1.998975212171525e-05,
+      "loss": 1.542,
+      "step": 96900
+    },
+    {
+      "epoch": 0.07213719453244687,
+      "grad_norm": 0.5809246301651001,
+      "learning_rate": 1.9989730961879913e-05,
+      "loss": 1.5097,
+      "step": 97000
+    },
+    {
+      "epoch": 0.07221156277423289,
+      "grad_norm": 0.6107625365257263,
+      "learning_rate": 1.9989709780232894e-05,
+      "loss": 1.536,
+      "step": 97100
+    },
+    {
+      "epoch": 0.07228593101601892,
+      "grad_norm": 0.5271338820457458,
+      "learning_rate": 1.9989688576774234e-05,
+      "loss": 1.5819,
+      "step": 97200
+    },
+    {
+      "epoch": 0.07236029925780495,
+      "grad_norm": 0.6692411303520203,
+      "learning_rate": 1.9989667351503988e-05,
+      "loss": 1.4833,
+      "step": 97300
+    },
+    {
+      "epoch": 0.07243466749959097,
+      "grad_norm": 1.0627728700637817,
+      "learning_rate": 1.998964610442219e-05,
+      "loss": 1.5404,
+      "step": 97400
+    },
+    {
+      "epoch": 0.072509035741377,
+      "grad_norm": 0.5696298480033875,
+      "learning_rate": 1.9989624835528896e-05,
+      "loss": 1.4491,
+      "step": 97500
+    },
+    {
+      "epoch": 0.07258340398316303,
+      "grad_norm": 0.5105301141738892,
+      "learning_rate": 1.998960354482415e-05,
+      "loss": 1.5188,
+      "step": 97600
+    },
+    {
+      "epoch": 0.07265777222494905,
+      "grad_norm": 0.53251713514328,
+      "learning_rate": 1.9989582232307998e-05,
+      "loss": 1.5367,
+      "step": 97700
+    },
+    {
+      "epoch": 0.07273214046673508,
+      "grad_norm": 0.6559078693389893,
+      "learning_rate": 1.9989560897980485e-05,
+      "loss": 1.4773,
+      "step": 97800
+    },
+    {
+      "epoch": 0.07280650870852111,
+      "grad_norm": 0.39833974838256836,
+      "learning_rate": 1.998953954184166e-05,
+      "loss": 1.6063,
+      "step": 97900
+    },
+    {
+      "epoch": 0.07288087695030714,
+      "grad_norm": 1.0479645729064941,
+      "learning_rate": 1.9989518163891566e-05,
+      "loss": 1.565,
+      "step": 98000
+    },
+    {
+      "epoch": 0.07295524519209316,
+      "grad_norm": 0.7905478477478027,
+      "learning_rate": 1.9989496764130253e-05,
+      "loss": 1.5266,
+      "step": 98100
+    },
+    {
+      "epoch": 0.07302961343387919,
+      "grad_norm": 0.4569951295852661,
+      "learning_rate": 1.998947534255777e-05,
+      "loss": 1.5295,
+      "step": 98200
+    },
+    {
+      "epoch": 0.07310398167566523,
+      "grad_norm": 0.5308849215507507,
+      "learning_rate": 1.9989453899174158e-05,
+      "loss": 1.5203,
+      "step": 98300
+    },
+    {
+      "epoch": 0.07317834991745126,
+      "grad_norm": 0.906802773475647,
+      "learning_rate": 1.998943243397947e-05,
+      "loss": 1.556,
+      "step": 98400
+    },
+    {
+      "epoch": 0.07325271815923728,
+      "grad_norm": 0.5071494579315186,
+      "learning_rate": 1.9989410946973747e-05,
+      "loss": 1.5627,
+      "step": 98500
+    },
+    {
+      "epoch": 0.07332708640102331,
+      "grad_norm": 0.5252199172973633,
+      "learning_rate": 1.9989389438157037e-05,
+      "loss": 1.5181,
+      "step": 98600
+    },
+    {
+      "epoch": 0.07340145464280934,
+      "grad_norm": 0.5738980174064636,
+      "learning_rate": 1.9989367907529394e-05,
+      "loss": 1.6101,
+      "step": 98700
+    },
+    {
+      "epoch": 0.07347582288459537,
+      "grad_norm": 0.6898683309555054,
+      "learning_rate": 1.9989346355090853e-05,
+      "loss": 1.579,
+      "step": 98800
+    },
+    {
+      "epoch": 0.07355019112638139,
+      "grad_norm": 0.5396860241889954,
+      "learning_rate": 1.998932478084147e-05,
+      "loss": 1.5645,
+      "step": 98900
+    },
+    {
+      "epoch": 0.07362455936816742,
+      "grad_norm": 0.5482293367385864,
+      "learning_rate": 1.998930318478129e-05,
+      "loss": 1.5453,
+      "step": 99000
+    },
+    {
+      "epoch": 0.07369892760995345,
+      "grad_norm": 0.8394240736961365,
+      "learning_rate": 1.9989281566910363e-05,
+      "loss": 1.5025,
+      "step": 99100
+    },
+    {
+      "epoch": 0.07377329585173947,
+      "grad_norm": 0.9409950971603394,
+      "learning_rate": 1.9989259927228725e-05,
+      "loss": 1.5489,
+      "step": 99200
+    },
+    {
+      "epoch": 0.0738476640935255,
+      "grad_norm": 0.5597321391105652,
+      "learning_rate": 1.9989238265736437e-05,
+      "loss": 1.5994,
+      "step": 99300
+    },
+    {
+      "epoch": 0.07392203233531153,
+      "grad_norm": 0.5139235258102417,
+      "learning_rate": 1.9989216582433538e-05,
+      "loss": 1.5478,
+      "step": 99400
+    },
+    {
+      "epoch": 0.07399640057709755,
+      "grad_norm": 0.6312362551689148,
+      "learning_rate": 1.998919487732008e-05,
+      "loss": 1.4989,
+      "step": 99500
+    },
+    {
+      "epoch": 0.07407076881888358,
+      "grad_norm": 0.6924223303794861,
+      "learning_rate": 1.9989173150396105e-05,
+      "loss": 1.4491,
+      "step": 99600
+    },
+    {
+      "epoch": 0.07414513706066961,
+      "grad_norm": 0.5490585565567017,
+      "learning_rate": 1.9989151401661666e-05,
+      "loss": 1.538,
+      "step": 99700
+    },
+    {
+      "epoch": 0.07421950530245564,
+      "grad_norm": 0.630455732345581,
+      "learning_rate": 1.998912963111681e-05,
+      "loss": 1.5286,
+      "step": 99800
+    },
+    {
+      "epoch": 0.07429387354424166,
+      "grad_norm": 0.8591504693031311,
+      "learning_rate": 1.998910783876158e-05,
+      "loss": 1.5612,
+      "step": 99900
+    },
+    {
+      "epoch": 0.07436824178602769,
+      "grad_norm": 1.0016669034957886,
+      "learning_rate": 1.9989086024596027e-05,
+      "loss": 1.5154,
+      "step": 100000
+    },
+    {
+      "epoch": 0.07444261002781372,
+      "grad_norm": 0.6513885259628296,
+      "learning_rate": 1.9989064188620197e-05,
+      "loss": 1.5446,
+      "step": 100100
+    },
+    {
+      "epoch": 0.07451697826959976,
+      "grad_norm": 0.6838514804840088,
+      "learning_rate": 1.998904233083414e-05,
+      "loss": 1.5336,
+      "step": 100200
+    },
+    {
+      "epoch": 0.07459134651138578,
+      "grad_norm": 0.46571242809295654,
+      "learning_rate": 1.9989020451237903e-05,
+      "loss": 1.4838,
+      "step": 100300
+    },
+    {
+      "epoch": 0.07466571475317181,
+      "grad_norm": 0.9936356544494629,
+      "learning_rate": 1.998899854983153e-05,
+      "loss": 1.5929,
+      "step": 100400
+    },
+    {
+      "epoch": 0.07474008299495784,
+      "grad_norm": 0.6591018438339233,
+      "learning_rate": 1.9988976626615075e-05,
+      "loss": 1.54,
+      "step": 100500
+    },
+    {
+      "epoch": 0.07481445123674386,
+      "grad_norm": 0.8453909754753113,
+      "learning_rate": 1.998895468158858e-05,
+      "loss": 1.5191,
+      "step": 100600
+    },
+    {
+      "epoch": 0.07488881947852989,
+      "grad_norm": 0.6555935144424438,
+      "learning_rate": 1.9988932714752095e-05,
+      "loss": 1.5734,
+      "step": 100700
+    },
+    {
+      "epoch": 0.07496318772031592,
+      "grad_norm": 0.6445733308792114,
+      "learning_rate": 1.998891072610567e-05,
+      "loss": 1.5516,
+      "step": 100800
+    },
+    {
+      "epoch": 0.07503755596210195,
+      "grad_norm": 0.534389078617096,
+      "learning_rate": 1.9988888715649357e-05,
+      "loss": 1.5441,
+      "step": 100900
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.3747108667853128e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null