diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..e36863df2bc13b20909d6711019409e777802fb5 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "<|im_end|>": 32000, + "<|im_start|>": 32001 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..83d1345ae079449678e9057df5b0c9e249700cc9 --- /dev/null +++ b/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "v2ray/Mixtral-8x22B-v0.1", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 32000, + "hidden_act": "silu", + "hidden_size": 6144, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 65536, + "model_type": "mixtral", + "num_attention_heads": 48, + "num_experts_per_tok": 2, + "num_hidden_layers": 56, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.40.0.dev0", + "use_cache": false, + "vocab_size": 32002 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..16dd90acbcc482b30661bf1c48c719fec177f4a8 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "do_sample": true, + "eos_token_id": 2, + "transformers_version": "4.40.0.dev0" +} diff --git a/model-00001-of-00117.safetensors b/model-00001-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4956eb9efe706d414fc14b5e1053db973bd9667 --- /dev/null +++ b/model-00001-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc5002691f06c27ac489e643fd3b9fc284146dece90ab388081b72c0a063f25 +size 4762879840 diff --git a/model-00002-of-00117.safetensors b/model-00002-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a4b5931ac2d07c54675dcdf55452ad7a9e22f33 --- /dev/null +++ b/model-00002-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b5c31dea55980a9b646ce2494097e35e691e4295f21ff4ffeccd66b59ed7d1 +size 4831839800 diff --git a/model-00003-of-00117.safetensors b/model-00003-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30fbc4287c29ac3145060a2a74db4d1ddf8ecdf1 --- /dev/null +++ b/model-00003-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ddbfaf55fb5e2415cab20829d5de5229556de6626b32f4d3d96bbc460be783 +size 4781754592 diff --git a/model-00004-of-00117.safetensors b/model-00004-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9442fbb28ce919adb867a52acfee4dfd164d9555 --- /dev/null +++ b/model-00004-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f14147450632a545cb09c344640e6838ea893bb63484500c0827c36e58ed58d +size 4831839800 diff --git a/model-00005-of-00117.safetensors b/model-00005-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..809434a7d04f40a3210b4910b05cf2e184c71ebd --- /dev/null +++ b/model-00005-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ded3ad7edcbe6edda24506d4d388bf081603e7bd8a12841a0959b1112eb26af +size 4781754592 diff --git a/model-00006-of-00117.safetensors b/model-00006-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8c59ecb51a5008890b5734fb866bd11bed4988c --- /dev/null +++ b/model-00006-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b17c4d67b9454b7a10efb53d0163ee761408ff334f58af68aaa9db84972c949e +size 4831839800 diff --git a/model-00007-of-00117.safetensors b/model-00007-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3ee8374dcd2d98ea98d9382a06a9bd31b3e49d2 --- /dev/null +++ b/model-00007-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7753c82d8db058eed516df7ff10e511f0ae1a222a4882b9d15c209f9ecc73c +size 4781754592 diff --git a/model-00008-of-00117.safetensors b/model-00008-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..02b6e7cea30a1b1e2527682a80a7d83ae6ac2d50 --- /dev/null +++ b/model-00008-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b453fe7ec3831d08208c1bf8047665dad82cea0f14ae28fe5ce006f5ddec60b +size 4831839800 diff --git a/model-00009-of-00117.safetensors b/model-00009-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a17c3c1685ad8b4fd0098d6aaaa1a31830366e9 --- /dev/null +++ b/model-00009-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95207c5c18d23dddcc16d2399fd42f0cb4b4046b9abed4e6212eadace55ff1b7 +size 4781754592 diff --git a/model-00010-of-00117.safetensors b/model-00010-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5beb5cfdb59da2179cbcb55084ce6d15974703c --- /dev/null +++ b/model-00010-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e7438ce11ba78f8aa53522b69df108c46756f606710549db1800e01f2557e99 +size 4831839800 diff --git a/model-00011-of-00117.safetensors b/model-00011-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6483b7218b22b9ac350d2d7d56629ab4c9c45fa --- /dev/null +++ b/model-00011-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36d3bc159acdf00f775ba1caa94ff6a6011b4176de0321377474c2c7ab57531f +size 4781754592 diff --git a/model-00012-of-00117.safetensors b/model-00012-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0b2765492beb560e7f66feb6fcd76b2d668ef43 --- /dev/null +++ b/model-00012-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:007fa45d011278e4f4ec26afcd2be2832dbdc4f5eeb9de4a951db8f2d8857cb3 +size 4831839800 diff --git a/model-00013-of-00117.safetensors b/model-00013-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..636cf5e2857313dcd346073feebd3a10b9112f87 --- /dev/null +++ b/model-00013-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1958ca6d10b3c677786c9c9bd19e2ea417be9b102ed85d45c04ca55c0646b53 +size 4781754592 diff --git a/model-00014-of-00117.safetensors b/model-00014-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..230fbf3ee729e466fd4792668eea00a347dd3b24 --- /dev/null +++ b/model-00014-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc1064d5796463c81675cc1eacd2ea24425c0291e2610b074ccad46ac0edabe5 +size 4831839800 diff --git a/model-00015-of-00117.safetensors b/model-00015-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea689a4492016abcddf15722fd1cba880ca84819 --- /dev/null +++ b/model-00015-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cf3fb19d18aa781b4779226c7a9e5026a09b001a04667ceb8f51c70903abd8 +size 4781754592 diff --git a/model-00016-of-00117.safetensors b/model-00016-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..848e5ff73fee0a36839690a124f5bd2fafa40f27 --- /dev/null +++ b/model-00016-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df301d9acde7f8f821e5992181a56ab2fe6283c42e0ded5d9b8c7a33289a245 +size 4831839800 diff --git a/model-00017-of-00117.safetensors b/model-00017-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72f6585224299417314f23bc1a213779d9dc23ce --- /dev/null +++ b/model-00017-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2918efe7c741c9f3081e6d74ed0bf9a01c0cceee8e7275a32e18ea1a76f4e507 +size 4781754592 diff --git a/model-00018-of-00117.safetensors b/model-00018-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecb2494dc5b114648b9203ae3477c44406a64615 --- /dev/null +++ b/model-00018-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a71f6ed97aa70216934380dda6f5146880ed2aaef3caa0fd901ae07090ee2232 +size 4831839800 diff --git a/model-00019-of-00117.safetensors b/model-00019-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7814a6812fb261b9ec4018ae785c4b2f4bcce15 --- /dev/null +++ b/model-00019-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d513bdefe37af4b6fda7a57c85e19456a6401ab50126d53337842ab79c7931 +size 4781754592 diff --git a/model-00020-of-00117.safetensors b/model-00020-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e4b8b34a948265b29781d05cf5a067b0c0b9f03 --- /dev/null +++ b/model-00020-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f519c89e124efe8b2dffffddf6df07ea3031e7df20410f58507d37a14211ddb +size 4831839800 diff --git a/model-00021-of-00117.safetensors b/model-00021-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16ff7d6585b0e943157079c358a1c3b50620df94 --- /dev/null +++ b/model-00021-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ae2674cce96785abe0c3f9b94a772d2aff88aa80eb103f0da30359aa12d7c1 +size 4982884240 diff --git a/model-00022-of-00117.safetensors b/model-00022-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d8f4595ad7521f0e635bdccb6f6cb59c4cbd420 --- /dev/null +++ b/model-00022-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6731e496eb59b312aa94d70eff938ac8e3a6a9fcbe0fd47fb90280f1e837345f +size 4630710168 diff --git a/model-00023-of-00117.safetensors b/model-00023-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b3ec23a9a34bef574249f9cd29de417363431ad --- /dev/null +++ b/model-00023-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad451806e84e57a11437ab1cb62a3946ff82df6e24392a78ae12ee1b99da90f +size 4831839808 diff --git a/model-00024-of-00117.safetensors b/model-00024-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e2deb31017d10170d64c981a8700627a97f362e4 --- /dev/null +++ b/model-00024-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c919f3dfe5375cf24d277bb1162c1e307fe67872dfff8e35829b8c5f8a898b +size 4781754608 diff --git a/model-00025-of-00117.safetensors b/model-00025-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a48a9d93534855c042a1ed4df1939bf60acfaa4e --- /dev/null +++ b/model-00025-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5726ff602cffd3c27bb930a5eaf1b2a7d33d382c6265ee532e9dc5ac21f7d77d +size 4831839808 diff --git a/model-00026-of-00117.safetensors b/model-00026-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bbceaf3d0d762682e3b3ce22ee4f42f63741c7e --- /dev/null +++ b/model-00026-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a512f7d05c5433d3f2f194f0467a142c93fad0b9647ceda79d43b349835729a +size 4781754608 diff --git a/model-00027-of-00117.safetensors b/model-00027-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3a270b1689929866f626c0877d122fd373d486e --- /dev/null +++ b/model-00027-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ebb1a3f8e8ee13b5c726865e8deec1c0251b4ef31fc4dd94bd564b836ce5d8d +size 4831839808 diff --git a/model-00028-of-00117.safetensors b/model-00028-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6804907eb5904ce93d16630035184e2a1994e328 --- /dev/null +++ b/model-00028-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2942fb7ca1738c4bc87e8074064db4ba897d08ae5b4f0fab3d9651773474d7 +size 4781754608 diff --git a/model-00029-of-00117.safetensors b/model-00029-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c7b2d2c639e6564c0e256034cab2e626e061a02 --- /dev/null +++ b/model-00029-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660bdb970d14f121685ec8ffc4470d2f40435f73f9cbc2f2dfe73abe5cecd84f +size 4831839808 diff --git a/model-00030-of-00117.safetensors b/model-00030-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fce66b903a8adc58766f6412b6821c4512c84496 --- /dev/null +++ b/model-00030-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6aadff68ad38925d9c5d2406aef4a212cc809864dbeb18569efc24347c34878 +size 4781754608 diff --git a/model-00031-of-00117.safetensors b/model-00031-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7de78a22c651f607e9796b7c82b517ce102d7ee --- /dev/null +++ b/model-00031-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef00d619f88d0a58c4a41cf18cc502eaf92c93fb869d07cebb060c042bc10490 +size 4831839808 diff --git a/model-00032-of-00117.safetensors b/model-00032-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..18dca8743ec4e77b65bae4623b2a81caf1b148c0 --- /dev/null +++ b/model-00032-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090ab543c34b05951c3c224c01b84847dc9d962d2f038917ae8ee0c87f6a699a +size 4781754608 diff --git a/model-00033-of-00117.safetensors b/model-00033-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..257baad6962be7210b99607976285fff0f2ac021 --- /dev/null +++ b/model-00033-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85a67db67088d7c47539b1de9d840a483e75e9da57c3ee83fba31be32c4a8cb9 +size 4831839808 diff --git a/model-00034-of-00117.safetensors b/model-00034-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9dce5e156b24fd569506ac9a2b70b527074ef4a5 --- /dev/null +++ b/model-00034-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12416253fbb2f2f04c73f06778d10358c64309ae4ca71f58f2c99d07af3be8c2 +size 4781754608 diff --git a/model-00035-of-00117.safetensors b/model-00035-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..381ffe092401ebd5633753b4372a596ef4854fa8 --- /dev/null +++ b/model-00035-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ef54fdc7dfb252ed1c96fef3b5cd11bff6b0346c628a9a7f2f34f30ef2d3397 +size 4831839808 diff --git a/model-00036-of-00117.safetensors b/model-00036-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..568d74312b6ef9aa21e4f9f1a130405850cd9516 --- /dev/null +++ b/model-00036-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf215bac1467d9faf3bdc6ef5e121a265f22385dfb855067a73b218df957005f +size 4781754608 diff --git a/model-00037-of-00117.safetensors b/model-00037-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b8ff6c26973106ba52eaa603495e6096b5672bc --- /dev/null +++ b/model-00037-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b3d8dd4fad431d1a3bd56aed46967ab8f87bf3e7915e7168abead5c7004ac7 +size 4831839808 diff --git a/model-00038-of-00117.safetensors b/model-00038-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..972bed08ff05729a3872e787608064a3707c409e --- /dev/null +++ b/model-00038-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02cc26fb2c65c4b75f54768682527df9c74b5381e6265370d3eb131ee02bcc18 +size 4781754608 diff --git a/model-00039-of-00117.safetensors b/model-00039-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97896fd4143992fd34d0047c609687a9f16c673d --- /dev/null +++ b/model-00039-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cbecf08d21981d722f39679f66e7e8c3feaeec6b3bda3eb32cd08ed981e088f +size 4831839808 diff --git a/model-00040-of-00117.safetensors b/model-00040-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd31ea72964cb6e64406a6b642ea4e19b9ca4a75 --- /dev/null +++ b/model-00040-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b48ac25c6be5f7dfc2eaa8beda016f89533ea1acd585935303268fd6bf184ad5 +size 4781754608 diff --git a/model-00041-of-00117.safetensors b/model-00041-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdc472adbcc821ac501b25bda16f4d238723fb5f --- /dev/null +++ b/model-00041-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ae76284fd788bf98c43a432ba1966db37850371411abff57adc23d58398135 +size 4831839808 diff --git a/model-00042-of-00117.safetensors b/model-00042-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4db3d243c9273e4f91b45489177f61b553407e6c --- /dev/null +++ b/model-00042-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5832f523a3c1560ae7d6681a41847647c13ed7636110faaa9dbb268194d243 +size 4781754608 diff --git a/model-00043-of-00117.safetensors b/model-00043-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..af5e943c4de6e316ffc8aaf30956edba749aa924 --- /dev/null +++ b/model-00043-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03924b0a0c80b7f536f2bb18a734afff74d55181edcdf858fc8441fab11d4319 +size 4831839808 diff --git a/model-00044-of-00117.safetensors b/model-00044-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fed5142c87f5a90b84c7125eb24c9317c12863fd --- /dev/null +++ b/model-00044-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1060b553bb346b9435012441f213a57c9e3f1c405d0ecff45f623cb270a566c7 +size 4781754608 diff --git a/model-00045-of-00117.safetensors b/model-00045-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..13c8823bc718aff84ce928c40adac0288239e501 --- /dev/null +++ b/model-00045-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd18421da6e09c9c49493888ebad34ea7fd62cdc25067a4e5bbd4ab89f7eca9 +size 4831839808 diff --git a/model-00046-of-00117.safetensors b/model-00046-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f39e8f5b4ddbeb96dd960f4887629e67d908e158 --- /dev/null +++ b/model-00046-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e571cbeda4d7d3874da257e24accbf984e8428c1d44a38271ae7a9f962d4b5f +size 4982884256 diff --git a/model-00047-of-00117.safetensors b/model-00047-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e927ad527e769de21f6559ae6076aaba2314025c --- /dev/null +++ b/model-00047-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7343d02ed90f31e2c88b74d711426e13d7bdb3fd42cbebd40e646240c4ff2d94 +size 4630710168 diff --git a/model-00048-of-00117.safetensors b/model-00048-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca1bac8675e9bcc39e1c5eba4db05fedc6788651 --- /dev/null +++ b/model-00048-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3dec44dc3f731e57cff0d1434e914a8a00eceaf9f6f55db603678696d65dd02 +size 4831839808 diff --git a/model-00049-of-00117.safetensors b/model-00049-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f0055d394f618fa341a5efa53258d4bbf64de789 --- /dev/null +++ b/model-00049-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79abc128f41c2c3455e0ddf1ccec3986c19cfc92b546c2eb9baeeee2ab2e3abf +size 4781754608 diff --git a/model-00050-of-00117.safetensors b/model-00050-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a64d498ffece68d96826a4262dc4298ec59dc417 --- /dev/null +++ b/model-00050-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84a3467239e67ac5f4f2cbf249edcdd3d2d11a1ff288a6f1f8c2ee8e9772d9fb +size 4831839808 diff --git a/model-00051-of-00117.safetensors b/model-00051-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a1aab660cc7d8a2d9ab37ed3f7dd9a848d29616 --- /dev/null +++ b/model-00051-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e4e70e3ed01654be0022cc707b5e1e53836e248afb5814ff07d311789873ed +size 4781754608 diff --git a/model-00052-of-00117.safetensors b/model-00052-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7afc006ef4b866493f24f6aee32b759d3a4c191b --- /dev/null +++ b/model-00052-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e6097470d0aabfe0445c5d5910fa691aec464510a7a7555f38e9c309acb83b +size 4831839808 diff --git a/model-00053-of-00117.safetensors b/model-00053-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..151daacdee7d610bef2490576a803aa0aed6d9c7 --- /dev/null +++ b/model-00053-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf23040b83d62cd9b5693c1828208e88f0fd5de6ab170bc6aaef1d2bf73fc856 +size 4781754608 diff --git a/model-00054-of-00117.safetensors b/model-00054-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b68b91c80bb322651e4657f513aef35c5475ff7 --- /dev/null +++ b/model-00054-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a57535ada831d9f5327354648a293740ac49f459a9a134513f8113a951c8835a +size 4831839808 diff --git a/model-00055-of-00117.safetensors b/model-00055-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..df859828ca25a8815798381cf6ab866774bc73b2 --- /dev/null +++ b/model-00055-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e5eefaad61bbed139bf9a2349bae99fbb294ac661115a36b70c67764dc7f64 +size 4781754608 diff --git a/model-00056-of-00117.safetensors b/model-00056-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..510d4c061545f70bc6d04e7f67e51a4eee787aaa --- /dev/null +++ b/model-00056-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:477290ec68ef3b11f9352760942fa3b51e07a7595aae1526bb2ec14581b49618 +size 4831839808 diff --git a/model-00057-of-00117.safetensors b/model-00057-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ca26151b233535015e50ca7ea63fbc9729f3502 --- /dev/null +++ b/model-00057-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c769feb76fee1e6f6b5467e8e324441ba404b9e8e7bec75ae282b8edec9e38 +size 4781754608 diff --git a/model-00058-of-00117.safetensors b/model-00058-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..218a4ffeb433eb7ddaff976d6c81b0557a1c0ead --- /dev/null +++ b/model-00058-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0e42e60acc4217a59a1f4489298391655535b5e10e740887c899c7232ba95a +size 4831839808 diff --git a/model-00059-of-00117.safetensors b/model-00059-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e234116ee5e8fdfde3db0a3b7e4c70322403850f --- /dev/null +++ b/model-00059-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4757c6df6300e3b14d14ec44310beea028d25cd324d3c7da5f90844079c857e +size 4781754608 diff --git a/model-00060-of-00117.safetensors b/model-00060-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..843d86993b1af1cc3a211799e8b3473da6f7f37a --- /dev/null +++ b/model-00060-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:492249372bc8555a510a56bb1c081139667d1cdb119764a6468dbdbf23f651b1 +size 4831839808 diff --git a/model-00061-of-00117.safetensors b/model-00061-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a16dabf9267c5f4099dd913f511b824ab4e0e24 --- /dev/null +++ b/model-00061-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc376574980f67ff9b9ec365d8dca7312a8b68411e26e02f80d0ce3e041b062 +size 4781754608 diff --git a/model-00062-of-00117.safetensors b/model-00062-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..698fc0b875bc8eecf9dc7faf8e3b914fe9603b51 --- /dev/null +++ b/model-00062-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47627a76632102fd2ae0052884ecfaf41cbccb9e1a94088371ed1af7629aeb0 +size 4831839808 diff --git a/model-00063-of-00117.safetensors b/model-00063-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee9765b44891b91176f071f157cccb34d3ec94b5 --- /dev/null +++ b/model-00063-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d07484ca59f973c1a5e7415e2621ee04a5f7e6b6f6fb530c4a9b1450e167450 +size 4781754608 diff --git a/model-00064-of-00117.safetensors b/model-00064-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f97ba5237f89dc1866cfee8c1b473bdd1a2c1d5 --- /dev/null +++ b/model-00064-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fabb8b7873bf2dcec7fcfc2246adda07fe498a77eb488960ae3e05c2a47cceaf +size 4831839808 diff --git a/model-00065-of-00117.safetensors b/model-00065-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f589c46dc564d29d1929f7a78ca38aa5c3f294d --- /dev/null +++ b/model-00065-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f92559f00cbbbfdd71c4ba4b46af045a8bd63b25bdcd2a6a65d01cad287049d4 +size 4781754608 diff --git a/model-00066-of-00117.safetensors b/model-00066-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1acaa5981bd8025b0809378c0f77f64fc36475c5 --- /dev/null +++ b/model-00066-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c40935489585b08a63fd2ccdabde5eba47a09d359023875da516a8dc61d82e +size 4831839808 diff --git a/model-00067-of-00117.safetensors b/model-00067-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8bacda57f7c3f1d285e5a8703a1d2cef4eaeb47a --- /dev/null +++ b/model-00067-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da40c7bb296865d99794ee84e1de667d1b4169d90b5df056cb0c8dc65522074e +size 4781754608 diff --git a/model-00068-of-00117.safetensors b/model-00068-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4afa02483ae3ae639d1f85b70cc80b1eb670805 --- /dev/null +++ b/model-00068-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4627ea7a689a35992b12d61de26e88194134ae73c83c0163085ae33a18384e35 +size 4831839808 diff --git a/model-00069-of-00117.safetensors b/model-00069-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b73ad7b9ae6e8de2bd6dda8ec8ec93486dca820 --- /dev/null +++ b/model-00069-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca6b10a90d16868c4b9acc440c1af380b323341e1f346d237f044a697079f90e +size 4781754608 diff --git a/model-00070-of-00117.safetensors b/model-00070-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a190c997c73f14fcfd93f1a798f091f9f3f7feec --- /dev/null +++ b/model-00070-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0347015721002eba2b0cd46019640150dfe33ac26c1a1d8e04b55f59db546fdc +size 4831839808 diff --git a/model-00071-of-00117.safetensors b/model-00071-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9c86bc57c8f8931b1582dc7057a08f4c6809eb2 --- /dev/null +++ b/model-00071-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da5ea26e5aa38daf6a785b83c866b77163e969cee0702be2478425ab729c649 +size 4982884256 diff --git a/model-00072-of-00117.safetensors b/model-00072-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eea4d5df5656d0bbba6a1ce45f1d9e36f6b56eab --- /dev/null +++ b/model-00072-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c144382c05b2191f97b5a97aa8e5738247d062bde34ee6a8b74c237244f1c3e0 +size 4630710168 diff --git a/model-00073-of-00117.safetensors b/model-00073-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0409f827e52fa33c71c239cec7f9bee09bc29a8b --- /dev/null +++ b/model-00073-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a4ee933e384ddc7c5246e9056799b634b4a1718c1e3fedc2569e79a874405c +size 4831839808 diff --git a/model-00074-of-00117.safetensors b/model-00074-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ee4162ff05e6dc11d856499513486b32b56d76c5 --- /dev/null +++ b/model-00074-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b58033a8a345e65f75e79642b33f39901f10923dac8dedd049c0eb5b3834edc +size 4781754608 diff --git a/model-00075-of-00117.safetensors b/model-00075-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1df9d1d7b07d309e4f2b59862d3c79771099bfc7 --- /dev/null +++ b/model-00075-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd88eb5a8f247d078e99dc6dbe44759d31296f4e9c1713c2fb9a5da11b166550 +size 4831839808 diff --git a/model-00076-of-00117.safetensors b/model-00076-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9af96b207f918c1d44933e1125a99928dcb0389d --- /dev/null +++ b/model-00076-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a150332c0266b94bd363a69463803ffb3c7c39f758cd5a2e899c3089d6f836 +size 4781754608 diff --git a/model-00077-of-00117.safetensors b/model-00077-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c1e8dbf0206c5e91acae98c85682438a69bf151 --- /dev/null +++ b/model-00077-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7edbeaf3c5f86ff8ea4dae00ad1c8e8922d99b3f0983bdbdaa82bc05bacf2ddb +size 4831839808 diff --git a/model-00078-of-00117.safetensors b/model-00078-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4af47df9c5abf8e3807fb2587b3c96c0dc050e3c --- /dev/null +++ b/model-00078-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41dfe91acd2adf55ea53b4f28c807e5434d8d2b5ed855f79c30dcd1ef5ca9f25 +size 4781754608 diff --git a/model-00079-of-00117.safetensors b/model-00079-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a81b048df69e6f45b19d8777686ac54d51eb2d69 --- /dev/null +++ b/model-00079-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb06a2aa544efba2c7c4417d71765a832e77d06cedbd4b37c583e3d867241973 +size 4831839808 diff --git a/model-00080-of-00117.safetensors b/model-00080-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c2142b4d274c39a508952991755cc2dc4bbee34 --- /dev/null +++ b/model-00080-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15787a25671676d2b8e3e41bad1de8ef5d0caceacc4fdca60af2a12361d936d +size 4781754608 diff --git a/model-00081-of-00117.safetensors b/model-00081-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99ef309df37af8632880ea2ae8f5cb92049ca674 --- /dev/null +++ b/model-00081-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b852aae90c0c042ffce94686a5802b8a0ab74034ac49cd433a0ab267faa5299 +size 4831839808 diff --git a/model-00082-of-00117.safetensors b/model-00082-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61765206d5ec836b1da9a4d02204f7f82c99ebf5 --- /dev/null +++ b/model-00082-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5b0da72e82af142cfd127ea86ae675bd2a02e47c9ad0a3722b253f5768a5a0 +size 4781754608 diff --git a/model-00083-of-00117.safetensors b/model-00083-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7239bfc596164efea0ee8d53697a628ae0d7e1c9 --- /dev/null +++ b/model-00083-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90cc17486e23b6caa63f8ee89ebb2274165db85d438832b5b41cced823b1b3c6 +size 4831839808 diff --git a/model-00084-of-00117.safetensors b/model-00084-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db78e9baad971c0aa78c8fa6c54feaed00313235 --- /dev/null +++ b/model-00084-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53552c2a10a5b8ef79ed37cdcacb5a14172109a9042bb93006092f9575d9dcf5 +size 4781754608 diff --git a/model-00085-of-00117.safetensors b/model-00085-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..955c2afdd1770a051e0610c895ab45733a7912f3 --- /dev/null +++ b/model-00085-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81263a3808cb284cfbd81af5221c890f1c5f228afeecdc4597e27963c478e6e9 +size 4831839808 diff --git a/model-00086-of-00117.safetensors b/model-00086-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf1eac919c49285a1dc2ad7cf3e06d3f69ec1b82 --- /dev/null +++ b/model-00086-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa5ab5aec5dda5b219eff7e049fab36da4d33c4f5d122eb5a15a943c9700601 +size 4781754608 diff --git a/model-00087-of-00117.safetensors b/model-00087-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..062f75ca92e350aadf0f8b83180180ccb85b2c15 --- /dev/null +++ b/model-00087-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22cd0689dfbb500ea7974b77c562d1b8fea7e24f08e317b5c338f100dbb51ea8 +size 4831839808 diff --git a/model-00088-of-00117.safetensors b/model-00088-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07d76fe5146c5ed6a2ea5433875b08282844803f --- /dev/null +++ b/model-00088-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb48eeb6eef12c30f9181b5af9ac8c253f9ce5bbf6d2ab76a05b119ca15abc7 +size 4781754608 diff --git a/model-00089-of-00117.safetensors b/model-00089-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ca3afc7bde06ef185c5627babb2d55a97e34d7d --- /dev/null +++ b/model-00089-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ab150455f9076a3eaa91bd7c1adf536b5ddf4ea082abee403c3fa80b249716 +size 4831839808 diff --git a/model-00090-of-00117.safetensors b/model-00090-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a905e899861801cf247152824f16e61f3d203be1 --- /dev/null +++ b/model-00090-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2865f9596eeb42834a1ae1a9e2a14b7e97de0d4bfd5f5353c8f35972b73bba14 +size 4781754608 diff --git a/model-00091-of-00117.safetensors b/model-00091-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ff5fe6dc178477f3782830a15a7b11d6aaca78b --- /dev/null +++ b/model-00091-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4c45f1641dbe4b450d07a5c5771f51810577fcc8680d9670c5e24de58c7d663 +size 4831839808 diff --git a/model-00092-of-00117.safetensors b/model-00092-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8891e7edc87b0ba42920cfd5b6b88a50827b870 --- /dev/null +++ b/model-00092-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c18f803aa0dff7797105e485ed0abd4fa44f04a4af73f40034755cb6e8afe9c +size 4781754608 diff --git a/model-00093-of-00117.safetensors b/model-00093-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9f13869bb978b28e0ba07f9da966a4996894bcf6 --- /dev/null +++ b/model-00093-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddeed3fa79fa70e2dbed17614f75eec8d848eff2aef5418261a2b3f578a8e55e +size 4831839808 diff --git a/model-00094-of-00117.safetensors b/model-00094-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f544a3eaf634655c99f3d7f557abc85c861da2d4 --- /dev/null +++ b/model-00094-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251406c32bdb0bb0c7e00b68350da99ae7d07fcd94c4cb1d7c6aa37dd1058f30 +size 4781754608 diff --git a/model-00095-of-00117.safetensors b/model-00095-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdddaa806aa4746ffa2ee4c49c5ae06f2430997b --- /dev/null +++ b/model-00095-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87cf302e2c139291646dcb1a33b310dfbd8429653cb78e1022131c12ac4b56f5 +size 4831839808 diff --git a/model-00096-of-00117.safetensors b/model-00096-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7485d11084788c515a39e04b0badac4d1384cefe --- /dev/null +++ b/model-00096-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a1b7ffe158fc4b368f1462f482463eec78bdd5df8b9788843cde5fd0782038 +size 4982884256 diff --git a/model-00097-of-00117.safetensors b/model-00097-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73927ea2ab08d32d603f7308fbddd7a407a4e59f --- /dev/null +++ b/model-00097-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c08bd5450c95a536a8bffe21ccf795ac0c692b62b9276c49184f2abb3e4ce6a +size 4630710168 diff --git a/model-00098-of-00117.safetensors b/model-00098-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..991effe03b4cdcb112c58db8b45709ac05dafdd3 --- /dev/null +++ b/model-00098-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e156e40bed9526c4a5408109c522705606fa610c4cad60b3f27e24bf2634da0 +size 4831839808 diff --git a/model-00099-of-00117.safetensors b/model-00099-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a645ad48e18e9d63061668f792efa6fa1da0b6db --- /dev/null +++ b/model-00099-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:462653fceb3b38ea516cce2cd73dc467cfa173a3c7157f177f6c8e31e7cb775d +size 4781754608 diff --git a/model-00100-of-00117.safetensors b/model-00100-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc8d638425fa99ed1f6ef988421c4a233914ac22 --- /dev/null +++ b/model-00100-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72f18cdd0d999ac1803c1fbf91a205cb2999f249d3ffa96d910ddd0b0e9db90 +size 4831839808 diff --git a/model-00101-of-00117.safetensors b/model-00101-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de45dc75eb9d2a66ceb2b80c4cebcde80f2d1e8f --- /dev/null +++ b/model-00101-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf9d969d6f3f979884725c67960e22ed1fec44e6db0419dd4035d78707e9f28 +size 4781754608 diff --git a/model-00102-of-00117.safetensors b/model-00102-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15d275aad8fe377b4f7c551664cddc77c2d312cc --- /dev/null +++ b/model-00102-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfb56f52e9d7c2b9d665052ebfcff2ebef71a55f4ca2bb5fd1c59dfd0f3d005f +size 4831839808 diff --git a/model-00103-of-00117.safetensors b/model-00103-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef121609d37cdff596d2d092bdc8765189778ff7 --- /dev/null +++ b/model-00103-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03646e752466bd829f31aad5497699bbdeb9107742c7cda14d0fe29806cf02e2 +size 4781754608 diff --git a/model-00104-of-00117.safetensors b/model-00104-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb40f8a7791ae6639b41b66ad45fc0b46c2041c9 --- /dev/null +++ b/model-00104-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaacbab1af26f19aa2f68a35767ca20b0202279599dec5642161a30d6cd83393 +size 4831839808 diff --git a/model-00105-of-00117.safetensors b/model-00105-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49611b70b0e4ef1da112da7b8dd095aba6ac49df --- /dev/null +++ b/model-00105-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946d5a4d4d6baa966d0b02cf912250c74ca8cfc6ec5b8992af93cf717895aa4b +size 4781754608 diff --git a/model-00106-of-00117.safetensors b/model-00106-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b43d497e209fafba4ca8f661bbc25d2338f2868a --- /dev/null +++ b/model-00106-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:366a9a367e43af2bb7d760c7a5975ed68c0254777fd8a2875bc1d0a9ef740151 +size 4831839808 diff --git a/model-00107-of-00117.safetensors b/model-00107-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56e7fc3a38a33fb4a4e2a40fdfe84ece8a24ee8c --- /dev/null +++ b/model-00107-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b276dbaae6468c477606b905f410ae027395e3955cb7e9d79b538fc10a4cebe +size 4781754608 diff --git a/model-00108-of-00117.safetensors b/model-00108-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..016d361d0be196731ba940f70f75dd5563898736 --- /dev/null +++ b/model-00108-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d423435ae845db8ae19b2a2a6070f58098d1d4f6575cf4b2a69890ab1479631a +size 4831839808 diff --git a/model-00109-of-00117.safetensors b/model-00109-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4e0d1579b8f433a6c2b5fbf81537d292d8f3ac0 --- /dev/null +++ b/model-00109-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfab051c02fef60267c5a7769fac5d14fee18550947c8ddb2a3d9bbf9ae160b +size 4781754608 diff --git a/model-00110-of-00117.safetensors b/model-00110-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..044eac88223e04ce947d9cc63712ab2574376dcb --- /dev/null +++ b/model-00110-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb4838ae3681fa2b4980faafa9fffc22b1afa79c5c8561655e4e8ab16787786 +size 4831839808 diff --git a/model-00111-of-00117.safetensors b/model-00111-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15894f25a452424b439d9767c52c32202db9d7e5 --- /dev/null +++ b/model-00111-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eea7c567d9250fb7bfdd0914cd0a469ec37191313034a2e904a74ca1c3b0194e +size 4781754608 diff --git a/model-00112-of-00117.safetensors b/model-00112-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efd0f7f1ef94d0b356d2c48fd630155027fbfcf5 --- /dev/null +++ b/model-00112-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eda67fa5e3595438fd1155f666ddbbccda380568dd2e8036c65b0f9758fde0a +size 4831839808 diff --git a/model-00113-of-00117.safetensors b/model-00113-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34ffae2bc565aa10963b82b43ac16ef5464ed968 --- /dev/null +++ b/model-00113-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da2dd1ad68689874093645ee158d2ff52922c5e1f89881f7e9dcbc0836777e4 +size 4781754608 diff --git a/model-00114-of-00117.safetensors b/model-00114-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73bc0564f3d09ace13a2d01a31e82dc818b637c4 --- /dev/null +++ b/model-00114-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d9b82b347983a52fe5381efb3fb3e2a70851be7db173affbb4b038b2607ac1 +size 4831839808 diff --git a/model-00115-of-00117.safetensors b/model-00115-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f5938e8b55df64386b02c407f4b591451617dde --- /dev/null +++ b/model-00115-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f26a990807928b39e68000754a0fc851fa7629240eb2f5f56ca10da7523458e +size 4781754608 diff --git a/model-00116-of-00117.safetensors b/model-00116-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7788be9ff7cd4680fa3ddf44934f722fa592ace0 --- /dev/null +++ b/model-00116-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6994acc120c9f60628449c99f8c283a918653df45b73bc75186bd4c9532563 +size 4831839808 diff --git a/model-00117-of-00117.safetensors b/model-00117-of-00117.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffeece59020cb36fe25859bd6362d17753edc830 --- /dev/null +++ b/model-00117-of-00117.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d9224ddda0b69332504bd417c57290b4f75b4730597e52302ddbe7252999e94 +size 4813088480 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..873380b7bd029dfe7f5b976438deaca1c7947cbe --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1746 @@ +{ + "metadata": { + "total_size": 562482634752 + }, + "weight_map": { + "lm_head.weight": "model-00117-of-00117.safetensors", + "model.embed_tokens.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00117.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00117.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00117.safetensors", + "model.layers.0.input_layernorm.weight": "model-00003-of-00117.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00117.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00117.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00117.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00117.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00117.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00117.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00003-of-00117.safetensors", + "model.layers.1.input_layernorm.weight": "model-00005-of-00117.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00005-of-00117.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00003-of-00117.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00117.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00003-of-00117.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00003-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00117.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00117.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00022-of-00117.safetensors", + "model.layers.10.input_layernorm.weight": "model-00024-of-00117.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00024-of-00117.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00022-of-00117.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00022-of-00117.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00021-of-00117.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00022-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00117.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00117.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00024-of-00117.safetensors", + "model.layers.11.input_layernorm.weight": "model-00026-of-00117.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00026-of-00117.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00024-of-00117.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00024-of-00117.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00024-of-00117.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00024-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00117.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00117.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00026-of-00117.safetensors", + "model.layers.12.input_layernorm.weight": "model-00028-of-00117.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00028-of-00117.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00026-of-00117.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00026-of-00117.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00026-of-00117.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00026-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00117.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00117.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00028-of-00117.safetensors", + "model.layers.13.input_layernorm.weight": "model-00030-of-00117.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00030-of-00117.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00028-of-00117.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00028-of-00117.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00028-of-00117.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00028-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00117.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00030-of-00117.safetensors", + "model.layers.14.input_layernorm.weight": "model-00032-of-00117.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00032-of-00117.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00030-of-00117.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00030-of-00117.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00030-of-00117.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00030-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00117.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00032-of-00117.safetensors", + "model.layers.15.input_layernorm.weight": "model-00034-of-00117.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00034-of-00117.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00032-of-00117.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00032-of-00117.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00032-of-00117.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00032-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00117.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00034-of-00117.safetensors", + "model.layers.16.input_layernorm.weight": "model-00036-of-00117.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00036-of-00117.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00034-of-00117.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00034-of-00117.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00034-of-00117.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00034-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00117.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00036-of-00117.safetensors", + "model.layers.17.input_layernorm.weight": "model-00038-of-00117.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00038-of-00117.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00036-of-00117.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00036-of-00117.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00036-of-00117.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00036-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00117.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00038-of-00117.safetensors", + "model.layers.18.input_layernorm.weight": "model-00040-of-00117.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00040-of-00117.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00038-of-00117.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00038-of-00117.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00038-of-00117.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00038-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00117.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00040-of-00117.safetensors", + "model.layers.19.input_layernorm.weight": "model-00042-of-00117.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00042-of-00117.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00040-of-00117.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00040-of-00117.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00040-of-00117.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00040-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00117.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00005-of-00117.safetensors", + "model.layers.2.input_layernorm.weight": "model-00007-of-00117.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00007-of-00117.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00005-of-00117.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00005-of-00117.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00005-of-00117.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00005-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00117.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00042-of-00117.safetensors", + "model.layers.20.input_layernorm.weight": "model-00044-of-00117.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00044-of-00117.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00042-of-00117.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00042-of-00117.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00042-of-00117.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00042-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00117.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00044-of-00117.safetensors", + "model.layers.21.input_layernorm.weight": "model-00046-of-00117.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00046-of-00117.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00044-of-00117.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00044-of-00117.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00044-of-00117.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00044-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00117.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00117.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00047-of-00117.safetensors", + "model.layers.22.input_layernorm.weight": "model-00049-of-00117.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00049-of-00117.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00047-of-00117.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00047-of-00117.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00046-of-00117.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00047-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00050-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00117.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00117.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00049-of-00117.safetensors", + "model.layers.23.input_layernorm.weight": "model-00051-of-00117.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00051-of-00117.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00049-of-00117.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00049-of-00117.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00049-of-00117.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00049-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00053-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00053-of-00117.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00053-of-00117.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00051-of-00117.safetensors", + "model.layers.24.input_layernorm.weight": "model-00053-of-00117.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00053-of-00117.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00051-of-00117.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00051-of-00117.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00051-of-00117.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00051-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00054-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00117.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00117.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00053-of-00117.safetensors", + "model.layers.25.input_layernorm.weight": "model-00055-of-00117.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00055-of-00117.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00053-of-00117.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00053-of-00117.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00053-of-00117.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00053-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00055-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00117.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00055-of-00117.safetensors", + "model.layers.26.input_layernorm.weight": "model-00057-of-00117.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00057-of-00117.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00055-of-00117.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00055-of-00117.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00055-of-00117.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00055-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00059-of-00117.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00057-of-00117.safetensors", + "model.layers.27.input_layernorm.weight": "model-00059-of-00117.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00059-of-00117.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00057-of-00117.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00057-of-00117.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00057-of-00117.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00057-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00060-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00061-of-00117.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00059-of-00117.safetensors", + "model.layers.28.input_layernorm.weight": "model-00061-of-00117.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00061-of-00117.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00059-of-00117.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00059-of-00117.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00059-of-00117.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00059-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00061-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00061-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00061-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00061-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00062-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00063-of-00117.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00061-of-00117.safetensors", + "model.layers.29.input_layernorm.weight": "model-00063-of-00117.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00063-of-00117.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00061-of-00117.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00061-of-00117.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00061-of-00117.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00061-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00117.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00007-of-00117.safetensors", + "model.layers.3.input_layernorm.weight": "model-00009-of-00117.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00009-of-00117.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00007-of-00117.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00007-of-00117.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00007-of-00117.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00007-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00063-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00063-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00063-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00064-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00065-of-00117.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00063-of-00117.safetensors", + "model.layers.30.input_layernorm.weight": "model-00065-of-00117.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00065-of-00117.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00063-of-00117.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00063-of-00117.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00063-of-00117.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00063-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00065-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00065-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00066-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00067-of-00117.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00065-of-00117.safetensors", + "model.layers.31.input_layernorm.weight": "model-00067-of-00117.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00067-of-00117.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00065-of-00117.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00065-of-00117.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00065-of-00117.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00065-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00067-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00068-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00069-of-00117.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00067-of-00117.safetensors", + "model.layers.32.input_layernorm.weight": "model-00069-of-00117.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00069-of-00117.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00067-of-00117.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00067-of-00117.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00067-of-00117.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00067-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00070-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00071-of-00117.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00069-of-00117.safetensors", + "model.layers.33.input_layernorm.weight": "model-00071-of-00117.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00071-of-00117.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00069-of-00117.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00069-of-00117.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00069-of-00117.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00069-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00072-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00117.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00074-of-00117.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00072-of-00117.safetensors", + "model.layers.34.input_layernorm.weight": "model-00074-of-00117.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00074-of-00117.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00072-of-00117.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00072-of-00117.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00071-of-00117.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00072-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00074-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00075-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00076-of-00117.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00076-of-00117.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00074-of-00117.safetensors", + "model.layers.35.input_layernorm.weight": "model-00076-of-00117.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00076-of-00117.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00074-of-00117.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00074-of-00117.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00074-of-00117.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00074-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00076-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00077-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00078-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00078-of-00117.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00078-of-00117.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00076-of-00117.safetensors", + "model.layers.36.input_layernorm.weight": "model-00078-of-00117.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00078-of-00117.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00076-of-00117.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00076-of-00117.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00076-of-00117.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00076-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00078-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00079-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00080-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00080-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00080-of-00117.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00080-of-00117.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00078-of-00117.safetensors", + "model.layers.37.input_layernorm.weight": "model-00080-of-00117.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00080-of-00117.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00078-of-00117.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00078-of-00117.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00078-of-00117.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00078-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00080-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00081-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00082-of-00117.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00080-of-00117.safetensors", + "model.layers.38.input_layernorm.weight": "model-00082-of-00117.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00082-of-00117.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00080-of-00117.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00080-of-00117.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00080-of-00117.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00080-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00082-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00083-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00084-of-00117.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00082-of-00117.safetensors", + "model.layers.39.input_layernorm.weight": "model-00084-of-00117.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00084-of-00117.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00082-of-00117.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00082-of-00117.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00082-of-00117.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00082-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00117.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00009-of-00117.safetensors", + "model.layers.4.input_layernorm.weight": "model-00011-of-00117.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00011-of-00117.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00009-of-00117.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00009-of-00117.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00009-of-00117.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00009-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00084-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00085-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00086-of-00117.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00084-of-00117.safetensors", + "model.layers.40.input_layernorm.weight": "model-00086-of-00117.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00086-of-00117.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00084-of-00117.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00084-of-00117.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00084-of-00117.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00084-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00086-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00086-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00086-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00086-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00087-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00088-of-00117.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00086-of-00117.safetensors", + "model.layers.41.input_layernorm.weight": "model-00088-of-00117.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00088-of-00117.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00086-of-00117.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00086-of-00117.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00086-of-00117.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00086-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00088-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00088-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00088-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00089-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00090-of-00117.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00088-of-00117.safetensors", + "model.layers.42.input_layernorm.weight": "model-00090-of-00117.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00090-of-00117.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00088-of-00117.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00088-of-00117.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00088-of-00117.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00088-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00090-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00090-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00091-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00092-of-00117.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00090-of-00117.safetensors", + "model.layers.43.input_layernorm.weight": "model-00092-of-00117.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00092-of-00117.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00090-of-00117.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00090-of-00117.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00090-of-00117.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00090-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00093-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00094-of-00117.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00092-of-00117.safetensors", + "model.layers.44.input_layernorm.weight": "model-00094-of-00117.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00094-of-00117.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00092-of-00117.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00092-of-00117.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00092-of-00117.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00092-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00095-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00096-of-00117.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00094-of-00117.safetensors", + "model.layers.45.input_layernorm.weight": "model-00096-of-00117.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00096-of-00117.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00094-of-00117.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00094-of-00117.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00094-of-00117.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00094-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00097-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00098-of-00117.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00099-of-00117.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00097-of-00117.safetensors", + "model.layers.46.input_layernorm.weight": "model-00099-of-00117.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00099-of-00117.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00097-of-00117.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00097-of-00117.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00096-of-00117.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00097-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00099-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00100-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00101-of-00117.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00101-of-00117.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00099-of-00117.safetensors", + "model.layers.47.input_layernorm.weight": "model-00101-of-00117.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00101-of-00117.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00099-of-00117.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00099-of-00117.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00099-of-00117.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00099-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00101-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00102-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00103-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00103-of-00117.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00103-of-00117.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00101-of-00117.safetensors", + "model.layers.48.input_layernorm.weight": "model-00103-of-00117.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00103-of-00117.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00101-of-00117.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00101-of-00117.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00101-of-00117.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00101-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00103-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00104-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00105-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00105-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00105-of-00117.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00105-of-00117.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00103-of-00117.safetensors", + "model.layers.49.input_layernorm.weight": "model-00105-of-00117.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00105-of-00117.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00103-of-00117.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00103-of-00117.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00103-of-00117.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00103-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00117.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00011-of-00117.safetensors", + "model.layers.5.input_layernorm.weight": "model-00013-of-00117.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00013-of-00117.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00011-of-00117.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00011-of-00117.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00011-of-00117.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00011-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00105-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00106-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00107-of-00117.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00105-of-00117.safetensors", + "model.layers.50.input_layernorm.weight": "model-00107-of-00117.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00107-of-00117.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00105-of-00117.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00105-of-00117.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00105-of-00117.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00105-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00107-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00108-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00109-of-00117.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00107-of-00117.safetensors", + "model.layers.51.input_layernorm.weight": "model-00109-of-00117.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00109-of-00117.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00107-of-00117.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00107-of-00117.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00107-of-00117.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00107-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00109-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w1.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w2.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w3.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w1.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w2.weight": "model-00110-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w3.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w1.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w2.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w3.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w1.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w2.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w3.weight": "model-00111-of-00117.safetensors", + "model.layers.52.block_sparse_moe.gate.weight": "model-00109-of-00117.safetensors", + "model.layers.52.input_layernorm.weight": "model-00111-of-00117.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00111-of-00117.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00109-of-00117.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00109-of-00117.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00109-of-00117.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00109-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00111-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00111-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00111-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00111-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w1.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w2.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w3.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w1.weight": "model-00112-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w2.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w3.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w1.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w2.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w3.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w1.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w2.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w3.weight": "model-00113-of-00117.safetensors", + "model.layers.53.block_sparse_moe.gate.weight": "model-00111-of-00117.safetensors", + "model.layers.53.input_layernorm.weight": "model-00113-of-00117.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00113-of-00117.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00111-of-00117.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00111-of-00117.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00111-of-00117.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00111-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00113-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00113-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00113-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w1.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w2.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w3.weight": "model-00114-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w1.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w2.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w3.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w1.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w2.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w3.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w1.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w2.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w3.weight": "model-00115-of-00117.safetensors", + "model.layers.54.block_sparse_moe.gate.weight": "model-00113-of-00117.safetensors", + "model.layers.54.input_layernorm.weight": "model-00115-of-00117.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00115-of-00117.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00113-of-00117.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00113-of-00117.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00113-of-00117.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00113-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00115-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00115-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w1.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w2.weight": "model-00116-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w3.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w1.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w2.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w3.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w1.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w2.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w3.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w1.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w2.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w3.weight": "model-00117-of-00117.safetensors", + "model.layers.55.block_sparse_moe.gate.weight": "model-00115-of-00117.safetensors", + "model.layers.55.input_layernorm.weight": "model-00117-of-00117.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00117-of-00117.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00115-of-00117.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00115-of-00117.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00115-of-00117.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00115-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00117.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00013-of-00117.safetensors", + "model.layers.6.input_layernorm.weight": "model-00015-of-00117.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00015-of-00117.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00013-of-00117.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00013-of-00117.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00013-of-00117.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00013-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00117.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00015-of-00117.safetensors", + "model.layers.7.input_layernorm.weight": "model-00017-of-00117.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00017-of-00117.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00015-of-00117.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00015-of-00117.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00015-of-00117.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00015-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00117.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00017-of-00117.safetensors", + "model.layers.8.input_layernorm.weight": "model-00019-of-00117.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00019-of-00117.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00017-of-00117.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00017-of-00117.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00017-of-00117.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00017-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00117.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00019-of-00117.safetensors", + "model.layers.9.input_layernorm.weight": "model-00021-of-00117.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00021-of-00117.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00019-of-00117.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00019-of-00117.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00019-of-00117.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00019-of-00117.safetensors", + "model.norm.weight": "model-00117-of-00117.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..40b1c6dadc2aed5b9e61dc7f9c7299e0aee16069 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1a00b7631fa5bf71e294fba19eafd227f29cdc89 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,61 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": true, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +}