MohamedAhmedAE
commited on
Commit
•
f41837a
1
Parent(s):
5767298
Training in progress, step 176500
Browse files
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5544997664
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4829c8d97387ccb4c56f85fac2fec1891c54be6e06f2c40b8e1a50a7553e2513
|
3 |
size 5544997664
|
last-checkpoint/adapter_config.json
CHANGED
@@ -22,11 +22,11 @@
|
|
22 |
"target_modules": [
|
23 |
"gate_proj",
|
24 |
"up_proj",
|
|
|
|
|
25 |
"k_proj",
|
26 |
"o_proj",
|
27 |
-
"
|
28 |
-
"v_proj",
|
29 |
-
"down_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
|
|
22 |
"target_modules": [
|
23 |
"gate_proj",
|
24 |
"up_proj",
|
25 |
+
"down_proj",
|
26 |
+
"q_proj",
|
27 |
"k_proj",
|
28 |
"o_proj",
|
29 |
+
"v_proj"
|
|
|
|
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
32 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5544997664
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:827134f8bc4269f2b2e4de9a603020b794835578e2bbfc7f2c4377e360c08169
|
3 |
size 5544997664
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 674093138
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33445f1bf4eea592138ef02c9d3845698e8dbe97f6d897cf7c712fd7f8bc406b
|
3 |
size 674093138
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3e1519b3525ee3b89552a8c064cc15f5a192f59a80e19fcabc9854e1e8b4732
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:244846527155197c1f1306848f61d350db80de94a16e52843db6a3c6063d2b04
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -12075,286 +12075,6 @@
|
|
12075 |
"learning_rate": 1.996596052275932e-05,
|
12076 |
"loss": 1.5559,
|
12077 |
"step": 172400
|
12078 |
-
},
|
12079 |
-
{
|
12080 |
-
"epoch": 0.13143588176713827,
|
12081 |
-
"grad_norm": 0.708541989326477,
|
12082 |
-
"learning_rate": 1.9965921043659414e-05,
|
12083 |
-
"loss": 1.5405,
|
12084 |
-
"step": 172500
|
12085 |
-
},
|
12086 |
-
{
|
12087 |
-
"epoch": 0.1315120764812062,
|
12088 |
-
"grad_norm": 0.5723733305931091,
|
12089 |
-
"learning_rate": 1.9965881541717857e-05,
|
12090 |
-
"loss": 1.5467,
|
12091 |
-
"step": 172600
|
12092 |
-
},
|
12093 |
-
{
|
12094 |
-
"epoch": 0.1315882711952741,
|
12095 |
-
"grad_norm": 0.5531366467475891,
|
12096 |
-
"learning_rate": 1.996584201693474e-05,
|
12097 |
-
"loss": 1.5528,
|
12098 |
-
"step": 172700
|
12099 |
-
},
|
12100 |
-
{
|
12101 |
-
"epoch": 0.131664465909342,
|
12102 |
-
"grad_norm": 0.5077796578407288,
|
12103 |
-
"learning_rate": 1.9965802469310148e-05,
|
12104 |
-
"loss": 1.5144,
|
12105 |
-
"step": 172800
|
12106 |
-
},
|
12107 |
-
{
|
12108 |
-
"epoch": 0.13174066062340992,
|
12109 |
-
"grad_norm": 0.6950669884681702,
|
12110 |
-
"learning_rate": 1.996576289884417e-05,
|
12111 |
-
"loss": 1.4381,
|
12112 |
-
"step": 172900
|
12113 |
-
},
|
12114 |
-
{
|
12115 |
-
"epoch": 0.1318168553374778,
|
12116 |
-
"grad_norm": 0.6486429572105408,
|
12117 |
-
"learning_rate": 1.996572330553691e-05,
|
12118 |
-
"loss": 1.5391,
|
12119 |
-
"step": 173000
|
12120 |
-
},
|
12121 |
-
{
|
12122 |
-
"epoch": 0.13189305005154572,
|
12123 |
-
"grad_norm": 0.707037627696991,
|
12124 |
-
"learning_rate": 1.9965683689388448e-05,
|
12125 |
-
"loss": 1.5175,
|
12126 |
-
"step": 173100
|
12127 |
-
},
|
12128 |
-
{
|
12129 |
-
"epoch": 0.13196924476561364,
|
12130 |
-
"grad_norm": 0.8228338956832886,
|
12131 |
-
"learning_rate": 1.9965644050398875e-05,
|
12132 |
-
"loss": 1.5695,
|
12133 |
-
"step": 173200
|
12134 |
-
},
|
12135 |
-
{
|
12136 |
-
"epoch": 0.13204543947968153,
|
12137 |
-
"grad_norm": 0.8611860871315002,
|
12138 |
-
"learning_rate": 1.9965604388568286e-05,
|
12139 |
-
"loss": 1.5804,
|
12140 |
-
"step": 173300
|
12141 |
-
},
|
12142 |
-
{
|
12143 |
-
"epoch": 0.13212163419374945,
|
12144 |
-
"grad_norm": 0.593332827091217,
|
12145 |
-
"learning_rate": 1.996556470389677e-05,
|
12146 |
-
"loss": 1.5022,
|
12147 |
-
"step": 173400
|
12148 |
-
},
|
12149 |
-
{
|
12150 |
-
"epoch": 0.13219782890781734,
|
12151 |
-
"grad_norm": 0.4934659004211426,
|
12152 |
-
"learning_rate": 1.9965524996384415e-05,
|
12153 |
-
"loss": 1.5131,
|
12154 |
-
"step": 173500
|
12155 |
-
},
|
12156 |
-
{
|
12157 |
-
"epoch": 0.13227402362188526,
|
12158 |
-
"grad_norm": 0.461875319480896,
|
12159 |
-
"learning_rate": 1.9965485266031317e-05,
|
12160 |
-
"loss": 1.4976,
|
12161 |
-
"step": 173600
|
12162 |
-
},
|
12163 |
-
{
|
12164 |
-
"epoch": 0.13235021833595317,
|
12165 |
-
"grad_norm": 0.7695358991622925,
|
12166 |
-
"learning_rate": 1.996544551283757e-05,
|
12167 |
-
"loss": 1.4759,
|
12168 |
-
"step": 173700
|
12169 |
-
},
|
12170 |
-
{
|
12171 |
-
"epoch": 0.13242641305002106,
|
12172 |
-
"grad_norm": 0.23193073272705078,
|
12173 |
-
"learning_rate": 1.9965405736803248e-05,
|
12174 |
-
"loss": 1.5739,
|
12175 |
-
"step": 173800
|
12176 |
-
},
|
12177 |
-
{
|
12178 |
-
"epoch": 0.13250260776408898,
|
12179 |
-
"grad_norm": 0.5889491438865662,
|
12180 |
-
"learning_rate": 1.9965365937928464e-05,
|
12181 |
-
"loss": 1.6115,
|
12182 |
-
"step": 173900
|
12183 |
-
},
|
12184 |
-
{
|
12185 |
-
"epoch": 0.13257880247815687,
|
12186 |
-
"grad_norm": 0.17042429745197296,
|
12187 |
-
"learning_rate": 1.9965326116213294e-05,
|
12188 |
-
"loss": 1.6021,
|
12189 |
-
"step": 174000
|
12190 |
-
},
|
12191 |
-
{
|
12192 |
-
"epoch": 0.1326549971922248,
|
12193 |
-
"grad_norm": 0.7887442708015442,
|
12194 |
-
"learning_rate": 1.9965286271657837e-05,
|
12195 |
-
"loss": 1.5276,
|
12196 |
-
"step": 174100
|
12197 |
-
},
|
12198 |
-
{
|
12199 |
-
"epoch": 0.1327311919062927,
|
12200 |
-
"grad_norm": 0.522075355052948,
|
12201 |
-
"learning_rate": 1.996524640426218e-05,
|
12202 |
-
"loss": 1.6293,
|
12203 |
-
"step": 174200
|
12204 |
-
},
|
12205 |
-
{
|
12206 |
-
"epoch": 0.1328073866203606,
|
12207 |
-
"grad_norm": 0.8914234042167664,
|
12208 |
-
"learning_rate": 1.996520651402642e-05,
|
12209 |
-
"loss": 1.5579,
|
12210 |
-
"step": 174300
|
12211 |
-
},
|
12212 |
-
{
|
12213 |
-
"epoch": 0.1328835813344285,
|
12214 |
-
"grad_norm": 0.5836375951766968,
|
12215 |
-
"learning_rate": 1.9965166600950642e-05,
|
12216 |
-
"loss": 1.5017,
|
12217 |
-
"step": 174400
|
12218 |
-
},
|
12219 |
-
{
|
12220 |
-
"epoch": 0.1329597760484964,
|
12221 |
-
"grad_norm": 0.8345896005630493,
|
12222 |
-
"learning_rate": 1.9965126665034943e-05,
|
12223 |
-
"loss": 1.5726,
|
12224 |
-
"step": 174500
|
12225 |
-
},
|
12226 |
-
{
|
12227 |
-
"epoch": 0.13303597076256432,
|
12228 |
-
"grad_norm": 0.5659860968589783,
|
12229 |
-
"learning_rate": 1.9965086706279407e-05,
|
12230 |
-
"loss": 1.5298,
|
12231 |
-
"step": 174600
|
12232 |
-
},
|
12233 |
-
{
|
12234 |
-
"epoch": 0.1331121654766322,
|
12235 |
-
"grad_norm": 0.4374712109565735,
|
12236 |
-
"learning_rate": 1.9965046724684136e-05,
|
12237 |
-
"loss": 1.5165,
|
12238 |
-
"step": 174700
|
12239 |
-
},
|
12240 |
-
{
|
12241 |
-
"epoch": 0.13318836019070013,
|
12242 |
-
"grad_norm": 0.5508346557617188,
|
12243 |
-
"learning_rate": 1.9965006720249214e-05,
|
12244 |
-
"loss": 1.5273,
|
12245 |
-
"step": 174800
|
12246 |
-
},
|
12247 |
-
{
|
12248 |
-
"epoch": 0.13326455490476805,
|
12249 |
-
"grad_norm": 1.0864572525024414,
|
12250 |
-
"learning_rate": 1.9964966692974733e-05,
|
12251 |
-
"loss": 1.5108,
|
12252 |
-
"step": 174900
|
12253 |
-
},
|
12254 |
-
{
|
12255 |
-
"epoch": 0.13334074961883594,
|
12256 |
-
"grad_norm": 0.3920418322086334,
|
12257 |
-
"learning_rate": 1.9964926642860785e-05,
|
12258 |
-
"loss": 1.5229,
|
12259 |
-
"step": 175000
|
12260 |
-
},
|
12261 |
-
{
|
12262 |
-
"epoch": 0.13341694433290385,
|
12263 |
-
"grad_norm": 0.845400869846344,
|
12264 |
-
"learning_rate": 1.9964886569907468e-05,
|
12265 |
-
"loss": 1.4647,
|
12266 |
-
"step": 175100
|
12267 |
-
},
|
12268 |
-
{
|
12269 |
-
"epoch": 0.13349313904697174,
|
12270 |
-
"grad_norm": 0.717110276222229,
|
12271 |
-
"learning_rate": 1.9964846474114866e-05,
|
12272 |
-
"loss": 1.5466,
|
12273 |
-
"step": 175200
|
12274 |
-
},
|
12275 |
-
{
|
12276 |
-
"epoch": 0.13356933376103966,
|
12277 |
-
"grad_norm": 0.5370656847953796,
|
12278 |
-
"learning_rate": 1.9964806355483074e-05,
|
12279 |
-
"loss": 1.5762,
|
12280 |
-
"step": 175300
|
12281 |
-
},
|
12282 |
-
{
|
12283 |
-
"epoch": 0.13364552847510758,
|
12284 |
-
"grad_norm": 1.161123514175415,
|
12285 |
-
"learning_rate": 1.9964766214012184e-05,
|
12286 |
-
"loss": 1.5829,
|
12287 |
-
"step": 175400
|
12288 |
-
},
|
12289 |
-
{
|
12290 |
-
"epoch": 0.13372172318917547,
|
12291 |
-
"grad_norm": 0.6493296027183533,
|
12292 |
-
"learning_rate": 1.996472604970229e-05,
|
12293 |
-
"loss": 1.5684,
|
12294 |
-
"step": 175500
|
12295 |
-
},
|
12296 |
-
{
|
12297 |
-
"epoch": 0.13379791790324339,
|
12298 |
-
"grad_norm": 1.104617714881897,
|
12299 |
-
"learning_rate": 1.9964685862553482e-05,
|
12300 |
-
"loss": 1.5545,
|
12301 |
-
"step": 175600
|
12302 |
-
},
|
12303 |
-
{
|
12304 |
-
"epoch": 0.13387411261731127,
|
12305 |
-
"grad_norm": 1.4800457954406738,
|
12306 |
-
"learning_rate": 1.996464565256585e-05,
|
12307 |
-
"loss": 1.5038,
|
12308 |
-
"step": 175700
|
12309 |
-
},
|
12310 |
-
{
|
12311 |
-
"epoch": 0.1339503073313792,
|
12312 |
-
"grad_norm": 0.966915488243103,
|
12313 |
-
"learning_rate": 1.9964605419739488e-05,
|
12314 |
-
"loss": 1.4949,
|
12315 |
-
"step": 175800
|
12316 |
-
},
|
12317 |
-
{
|
12318 |
-
"epoch": 0.1340265020454471,
|
12319 |
-
"grad_norm": 0.9788563847541809,
|
12320 |
-
"learning_rate": 1.9964565164074493e-05,
|
12321 |
-
"loss": 1.5273,
|
12322 |
-
"step": 175900
|
12323 |
-
},
|
12324 |
-
{
|
12325 |
-
"epoch": 0.134102696759515,
|
12326 |
-
"grad_norm": 0.5662705898284912,
|
12327 |
-
"learning_rate": 1.9964524885570945e-05,
|
12328 |
-
"loss": 1.495,
|
12329 |
-
"step": 176000
|
12330 |
-
},
|
12331 |
-
{
|
12332 |
-
"epoch": 0.13417889147358292,
|
12333 |
-
"grad_norm": 1.1160712242126465,
|
12334 |
-
"learning_rate": 1.9964484584228952e-05,
|
12335 |
-
"loss": 1.5652,
|
12336 |
-
"step": 176100
|
12337 |
-
},
|
12338 |
-
{
|
12339 |
-
"epoch": 0.1342550861876508,
|
12340 |
-
"grad_norm": 0.8404836058616638,
|
12341 |
-
"learning_rate": 1.9964444260048593e-05,
|
12342 |
-
"loss": 1.533,
|
12343 |
-
"step": 176200
|
12344 |
-
},
|
12345 |
-
{
|
12346 |
-
"epoch": 0.13433128090171872,
|
12347 |
-
"grad_norm": 0.6309127807617188,
|
12348 |
-
"learning_rate": 1.996440391302997e-05,
|
12349 |
-
"loss": 1.5509,
|
12350 |
-
"step": 176300
|
12351 |
-
},
|
12352 |
-
{
|
12353 |
-
"epoch": 0.13440747561578664,
|
12354 |
-
"grad_norm": 0.8548604249954224,
|
12355 |
-
"learning_rate": 1.9964363543173166e-05,
|
12356 |
-
"loss": 1.4678,
|
12357 |
-
"step": 176400
|
12358 |
}
|
12359 |
],
|
12360 |
"logging_steps": 100,
|
@@ -12374,7 +12094,7 @@
|
|
12374 |
"attributes": {}
|
12375 |
}
|
12376 |
},
|
12377 |
-
"total_flos": 2.
|
12378 |
"train_batch_size": 1,
|
12379 |
"trial_name": null,
|
12380 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.13135968705307038,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 172400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
12075 |
"learning_rate": 1.996596052275932e-05,
|
12076 |
"loss": 1.5559,
|
12077 |
"step": 172400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12078 |
}
|
12079 |
],
|
12080 |
"logging_steps": 100,
|
|
|
12094 |
"attributes": {}
|
12095 |
}
|
12096 |
},
|
12097 |
+
"total_flos": 2.3486437306994196e+18,
|
12098 |
"train_batch_size": 1,
|
12099 |
"trial_name": null,
|
12100 |
"trial_params": null
|