MohamedAhmedAE commited on
Commit
f41837a
1 Parent(s): 5767298

Training in progress, step 176500

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bda92cbabe2b4ff0c8f4a3151d1b0b35846b1a3a5142626e4d9aba5a3bf59c49
3
  size 5544997664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4829c8d97387ccb4c56f85fac2fec1891c54be6e06f2c40b8e1a50a7553e2513
3
  size 5544997664
last-checkpoint/adapter_config.json CHANGED
@@ -22,11 +22,11 @@
22
  "target_modules": [
23
  "gate_proj",
24
  "up_proj",
 
 
25
  "k_proj",
26
  "o_proj",
27
- "q_proj",
28
- "v_proj",
29
- "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
22
  "target_modules": [
23
  "gate_proj",
24
  "up_proj",
25
+ "down_proj",
26
+ "q_proj",
27
  "k_proj",
28
  "o_proj",
29
+ "v_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bda92cbabe2b4ff0c8f4a3151d1b0b35846b1a3a5142626e4d9aba5a3bf59c49
3
  size 5544997664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:827134f8bc4269f2b2e4de9a603020b794835578e2bbfc7f2c4377e360c08169
3
  size 5544997664
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16c3ec516bc58ccca31d6446ef83c6bfc154d7b679d6546b15e06cb842890989
3
  size 674093138
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33445f1bf4eea592138ef02c9d3845698e8dbe97f6d897cf7c712fd7f8bc406b
3
  size 674093138
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1411261aab03cbf996e6c77c2b97c974b11ae1fafb084e2bfcd7821ac26c5b4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3e1519b3525ee3b89552a8c064cc15f5a192f59a80e19fcabc9854e1e8b4732
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6354b98e7a016c3da277fd46231595c7fa8312a8d3c92f9ab1a816df91ac117
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:244846527155197c1f1306848f61d350db80de94a16e52843db6a3c6063d2b04
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.13440747561578664,
5
  "eval_steps": 200,
6
- "global_step": 176400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12075,286 +12075,6 @@
12075
  "learning_rate": 1.996596052275932e-05,
12076
  "loss": 1.5559,
12077
  "step": 172400
12078
- },
12079
- {
12080
- "epoch": 0.13143588176713827,
12081
- "grad_norm": 0.708541989326477,
12082
- "learning_rate": 1.9965921043659414e-05,
12083
- "loss": 1.5405,
12084
- "step": 172500
12085
- },
12086
- {
12087
- "epoch": 0.1315120764812062,
12088
- "grad_norm": 0.5723733305931091,
12089
- "learning_rate": 1.9965881541717857e-05,
12090
- "loss": 1.5467,
12091
- "step": 172600
12092
- },
12093
- {
12094
- "epoch": 0.1315882711952741,
12095
- "grad_norm": 0.5531366467475891,
12096
- "learning_rate": 1.996584201693474e-05,
12097
- "loss": 1.5528,
12098
- "step": 172700
12099
- },
12100
- {
12101
- "epoch": 0.131664465909342,
12102
- "grad_norm": 0.5077796578407288,
12103
- "learning_rate": 1.9965802469310148e-05,
12104
- "loss": 1.5144,
12105
- "step": 172800
12106
- },
12107
- {
12108
- "epoch": 0.13174066062340992,
12109
- "grad_norm": 0.6950669884681702,
12110
- "learning_rate": 1.996576289884417e-05,
12111
- "loss": 1.4381,
12112
- "step": 172900
12113
- },
12114
- {
12115
- "epoch": 0.1318168553374778,
12116
- "grad_norm": 0.6486429572105408,
12117
- "learning_rate": 1.996572330553691e-05,
12118
- "loss": 1.5391,
12119
- "step": 173000
12120
- },
12121
- {
12122
- "epoch": 0.13189305005154572,
12123
- "grad_norm": 0.707037627696991,
12124
- "learning_rate": 1.9965683689388448e-05,
12125
- "loss": 1.5175,
12126
- "step": 173100
12127
- },
12128
- {
12129
- "epoch": 0.13196924476561364,
12130
- "grad_norm": 0.8228338956832886,
12131
- "learning_rate": 1.9965644050398875e-05,
12132
- "loss": 1.5695,
12133
- "step": 173200
12134
- },
12135
- {
12136
- "epoch": 0.13204543947968153,
12137
- "grad_norm": 0.8611860871315002,
12138
- "learning_rate": 1.9965604388568286e-05,
12139
- "loss": 1.5804,
12140
- "step": 173300
12141
- },
12142
- {
12143
- "epoch": 0.13212163419374945,
12144
- "grad_norm": 0.593332827091217,
12145
- "learning_rate": 1.996556470389677e-05,
12146
- "loss": 1.5022,
12147
- "step": 173400
12148
- },
12149
- {
12150
- "epoch": 0.13219782890781734,
12151
- "grad_norm": 0.4934659004211426,
12152
- "learning_rate": 1.9965524996384415e-05,
12153
- "loss": 1.5131,
12154
- "step": 173500
12155
- },
12156
- {
12157
- "epoch": 0.13227402362188526,
12158
- "grad_norm": 0.461875319480896,
12159
- "learning_rate": 1.9965485266031317e-05,
12160
- "loss": 1.4976,
12161
- "step": 173600
12162
- },
12163
- {
12164
- "epoch": 0.13235021833595317,
12165
- "grad_norm": 0.7695358991622925,
12166
- "learning_rate": 1.996544551283757e-05,
12167
- "loss": 1.4759,
12168
- "step": 173700
12169
- },
12170
- {
12171
- "epoch": 0.13242641305002106,
12172
- "grad_norm": 0.23193073272705078,
12173
- "learning_rate": 1.9965405736803248e-05,
12174
- "loss": 1.5739,
12175
- "step": 173800
12176
- },
12177
- {
12178
- "epoch": 0.13250260776408898,
12179
- "grad_norm": 0.5889491438865662,
12180
- "learning_rate": 1.9965365937928464e-05,
12181
- "loss": 1.6115,
12182
- "step": 173900
12183
- },
12184
- {
12185
- "epoch": 0.13257880247815687,
12186
- "grad_norm": 0.17042429745197296,
12187
- "learning_rate": 1.9965326116213294e-05,
12188
- "loss": 1.6021,
12189
- "step": 174000
12190
- },
12191
- {
12192
- "epoch": 0.1326549971922248,
12193
- "grad_norm": 0.7887442708015442,
12194
- "learning_rate": 1.9965286271657837e-05,
12195
- "loss": 1.5276,
12196
- "step": 174100
12197
- },
12198
- {
12199
- "epoch": 0.1327311919062927,
12200
- "grad_norm": 0.522075355052948,
12201
- "learning_rate": 1.996524640426218e-05,
12202
- "loss": 1.6293,
12203
- "step": 174200
12204
- },
12205
- {
12206
- "epoch": 0.1328073866203606,
12207
- "grad_norm": 0.8914234042167664,
12208
- "learning_rate": 1.996520651402642e-05,
12209
- "loss": 1.5579,
12210
- "step": 174300
12211
- },
12212
- {
12213
- "epoch": 0.1328835813344285,
12214
- "grad_norm": 0.5836375951766968,
12215
- "learning_rate": 1.9965166600950642e-05,
12216
- "loss": 1.5017,
12217
- "step": 174400
12218
- },
12219
- {
12220
- "epoch": 0.1329597760484964,
12221
- "grad_norm": 0.8345896005630493,
12222
- "learning_rate": 1.9965126665034943e-05,
12223
- "loss": 1.5726,
12224
- "step": 174500
12225
- },
12226
- {
12227
- "epoch": 0.13303597076256432,
12228
- "grad_norm": 0.5659860968589783,
12229
- "learning_rate": 1.9965086706279407e-05,
12230
- "loss": 1.5298,
12231
- "step": 174600
12232
- },
12233
- {
12234
- "epoch": 0.1331121654766322,
12235
- "grad_norm": 0.4374712109565735,
12236
- "learning_rate": 1.9965046724684136e-05,
12237
- "loss": 1.5165,
12238
- "step": 174700
12239
- },
12240
- {
12241
- "epoch": 0.13318836019070013,
12242
- "grad_norm": 0.5508346557617188,
12243
- "learning_rate": 1.9965006720249214e-05,
12244
- "loss": 1.5273,
12245
- "step": 174800
12246
- },
12247
- {
12248
- "epoch": 0.13326455490476805,
12249
- "grad_norm": 1.0864572525024414,
12250
- "learning_rate": 1.9964966692974733e-05,
12251
- "loss": 1.5108,
12252
- "step": 174900
12253
- },
12254
- {
12255
- "epoch": 0.13334074961883594,
12256
- "grad_norm": 0.3920418322086334,
12257
- "learning_rate": 1.9964926642860785e-05,
12258
- "loss": 1.5229,
12259
- "step": 175000
12260
- },
12261
- {
12262
- "epoch": 0.13341694433290385,
12263
- "grad_norm": 0.845400869846344,
12264
- "learning_rate": 1.9964886569907468e-05,
12265
- "loss": 1.4647,
12266
- "step": 175100
12267
- },
12268
- {
12269
- "epoch": 0.13349313904697174,
12270
- "grad_norm": 0.717110276222229,
12271
- "learning_rate": 1.9964846474114866e-05,
12272
- "loss": 1.5466,
12273
- "step": 175200
12274
- },
12275
- {
12276
- "epoch": 0.13356933376103966,
12277
- "grad_norm": 0.5370656847953796,
12278
- "learning_rate": 1.9964806355483074e-05,
12279
- "loss": 1.5762,
12280
- "step": 175300
12281
- },
12282
- {
12283
- "epoch": 0.13364552847510758,
12284
- "grad_norm": 1.161123514175415,
12285
- "learning_rate": 1.9964766214012184e-05,
12286
- "loss": 1.5829,
12287
- "step": 175400
12288
- },
12289
- {
12290
- "epoch": 0.13372172318917547,
12291
- "grad_norm": 0.6493296027183533,
12292
- "learning_rate": 1.996472604970229e-05,
12293
- "loss": 1.5684,
12294
- "step": 175500
12295
- },
12296
- {
12297
- "epoch": 0.13379791790324339,
12298
- "grad_norm": 1.104617714881897,
12299
- "learning_rate": 1.9964685862553482e-05,
12300
- "loss": 1.5545,
12301
- "step": 175600
12302
- },
12303
- {
12304
- "epoch": 0.13387411261731127,
12305
- "grad_norm": 1.4800457954406738,
12306
- "learning_rate": 1.996464565256585e-05,
12307
- "loss": 1.5038,
12308
- "step": 175700
12309
- },
12310
- {
12311
- "epoch": 0.1339503073313792,
12312
- "grad_norm": 0.966915488243103,
12313
- "learning_rate": 1.9964605419739488e-05,
12314
- "loss": 1.4949,
12315
- "step": 175800
12316
- },
12317
- {
12318
- "epoch": 0.1340265020454471,
12319
- "grad_norm": 0.9788563847541809,
12320
- "learning_rate": 1.9964565164074493e-05,
12321
- "loss": 1.5273,
12322
- "step": 175900
12323
- },
12324
- {
12325
- "epoch": 0.134102696759515,
12326
- "grad_norm": 0.5662705898284912,
12327
- "learning_rate": 1.9964524885570945e-05,
12328
- "loss": 1.495,
12329
- "step": 176000
12330
- },
12331
- {
12332
- "epoch": 0.13417889147358292,
12333
- "grad_norm": 1.1160712242126465,
12334
- "learning_rate": 1.9964484584228952e-05,
12335
- "loss": 1.5652,
12336
- "step": 176100
12337
- },
12338
- {
12339
- "epoch": 0.1342550861876508,
12340
- "grad_norm": 0.8404836058616638,
12341
- "learning_rate": 1.9964444260048593e-05,
12342
- "loss": 1.533,
12343
- "step": 176200
12344
- },
12345
- {
12346
- "epoch": 0.13433128090171872,
12347
- "grad_norm": 0.6309127807617188,
12348
- "learning_rate": 1.996440391302997e-05,
12349
- "loss": 1.5509,
12350
- "step": 176300
12351
- },
12352
- {
12353
- "epoch": 0.13440747561578664,
12354
- "grad_norm": 0.8548604249954224,
12355
- "learning_rate": 1.9964363543173166e-05,
12356
- "loss": 1.4678,
12357
- "step": 176400
12358
  }
12359
  ],
12360
  "logging_steps": 100,
@@ -12374,7 +12094,7 @@
12374
  "attributes": {}
12375
  }
12376
  },
12377
- "total_flos": 2.4041619301059625e+18,
12378
  "train_batch_size": 1,
12379
  "trial_name": null,
12380
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.13135968705307038,
5
  "eval_steps": 200,
6
+ "global_step": 172400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12075
  "learning_rate": 1.996596052275932e-05,
12076
  "loss": 1.5559,
12077
  "step": 172400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12078
  }
12079
  ],
12080
  "logging_steps": 100,
 
12094
  "attributes": {}
12095
  }
12096
  },
12097
+ "total_flos": 2.3486437306994196e+18,
12098
  "train_batch_size": 1,
12099
  "trial_name": null,
12100
  "trial_params": null