jtatman commited on
Commit
37d2374
1 Parent(s): bd8b024

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -19,52 +19,31 @@ SciPhi-Mistral-7B-32k-sliced is a merge of the following models using [LazyMerge
19
  slices:
20
  - sources:
21
  - model: SciPhi/SciPhi-Mistral-7B-32k
22
- layer_range: [0, 0]
23
  - sources:
24
  - model: SciPhi/SciPhi-Mistral-7B-32k
25
- layer_range: [1, 1]
26
- - sources:
27
- - model: SciPhi/SciPhi-Mistral-7B-32k
28
- layer_range: [3, 3]
29
- - sources:
30
- - model: SciPhi/SciPhi-Mistral-7B-32k
31
- layer_range: [5, 5]
32
- - sources:
33
- - model: SciPhi/SciPhi-Mistral-7B-32k
34
- layer_range: [6, 6]
35
  - sources:
36
  - model: SciPhi/SciPhi-Mistral-7B-32k
37
  layer_range: [10, 10]
38
  - sources:
39
  - model: SciPhi/SciPhi-Mistral-7B-32k
40
- layer_range: [17, 17]
41
- - sources:
42
- - model: SciPhi/SciPhi-Mistral-7B-32k
43
- layer_range: [18, 18]
44
- - sources:
45
- - model: SciPhi/SciPhi-Mistral-7B-32k
46
- layer_range: [19, 19]
47
- - sources:
48
- - model: SciPhi/SciPhi-Mistral-7B-32k
49
- layer_range: [20, 20]
50
- - sources:
51
- - model: SciPhi/SciPhi-Mistral-7B-32k
52
- layer_range: [23, 23]
53
  - sources:
54
  - model: SciPhi/SciPhi-Mistral-7B-32k
55
- layer_range: [32, 32]
56
  - sources:
57
  - model: Locutusque/TinyMistral-248M-v2.5
58
- layer_range: [0, 0]
59
  - sources:
60
  - model: Locutusque/TinyMistral-248M-v2.5
61
- layer_range: [11, 11]
62
  - sources:
63
  - model: Locutusque/TinyMistral-248M-v2.5-Instruct
64
- layer_range: [0, 0]
65
  - sources:
66
  - model: Locutusque/TinyMistral-248M-v2.5-Instruct
67
- layer_range: [11, 11]
68
  merge_method: slerp
69
  base_model: Locutusque/TinyMistral-248M-v2.5-Instruct
70
  parameters:
 
19
  slices:
20
  - sources:
21
  - model: SciPhi/SciPhi-Mistral-7B-32k
22
+ layer_range: [0, 3]
23
  - sources:
24
  - model: SciPhi/SciPhi-Mistral-7B-32k
25
+ layer_range: [5, 7]
 
 
 
 
 
 
 
 
 
26
  - sources:
27
  - model: SciPhi/SciPhi-Mistral-7B-32k
28
  layer_range: [10, 10]
29
  - sources:
30
  - model: SciPhi/SciPhi-Mistral-7B-32k
31
+ layer_range: [17, 23]
 
 
 
 
 
 
 
 
 
 
 
 
32
  - sources:
33
  - model: SciPhi/SciPhi-Mistral-7B-32k
34
+ layer_range: [31, 32]
35
  - sources:
36
  - model: Locutusque/TinyMistral-248M-v2.5
37
+ layer_range: [0, 1]
38
  - sources:
39
  - model: Locutusque/TinyMistral-248M-v2.5
40
+ layer_range: [11, 12]
41
  - sources:
42
  - model: Locutusque/TinyMistral-248M-v2.5-Instruct
43
+ layer_range: [0, 1]
44
  - sources:
45
  - model: Locutusque/TinyMistral-248M-v2.5-Instruct
46
+ layer_range: [11, 12]
47
  merge_method: slerp
48
  base_model: Locutusque/TinyMistral-248M-v2.5-Instruct
49
  parameters:
config.json CHANGED
@@ -13,7 +13,7 @@
13
  "max_position_embeddings": 32768,
14
  "model_type": "mistral",
15
  "num_attention_heads": 32,
16
- "num_hidden_layers": 0,
17
  "num_key_value_heads": 8,
18
  "rms_norm_eps": 1e-06,
19
  "rope_theta": 10000.0,
 
13
  "max_position_embeddings": 32768,
14
  "model_type": "mistral",
15
  "num_attention_heads": 32,
16
+ "num_hidden_layers": 16,
17
  "num_key_value_heads": 8,
18
  "rms_norm_eps": 1e-06,
19
  "rope_theta": 10000.0,
mergekit_config.yml CHANGED
@@ -2,52 +2,31 @@
2
  slices:
3
  - sources:
4
  - model: SciPhi/SciPhi-Mistral-7B-32k
5
- layer_range: [0, 0]
6
  - sources:
7
  - model: SciPhi/SciPhi-Mistral-7B-32k
8
- layer_range: [1, 1]
9
- - sources:
10
- - model: SciPhi/SciPhi-Mistral-7B-32k
11
- layer_range: [3, 3]
12
- - sources:
13
- - model: SciPhi/SciPhi-Mistral-7B-32k
14
- layer_range: [5, 5]
15
- - sources:
16
- - model: SciPhi/SciPhi-Mistral-7B-32k
17
- layer_range: [6, 6]
18
  - sources:
19
  - model: SciPhi/SciPhi-Mistral-7B-32k
20
  layer_range: [10, 10]
21
  - sources:
22
  - model: SciPhi/SciPhi-Mistral-7B-32k
23
- layer_range: [17, 17]
24
- - sources:
25
- - model: SciPhi/SciPhi-Mistral-7B-32k
26
- layer_range: [18, 18]
27
- - sources:
28
- - model: SciPhi/SciPhi-Mistral-7B-32k
29
- layer_range: [19, 19]
30
- - sources:
31
- - model: SciPhi/SciPhi-Mistral-7B-32k
32
- layer_range: [20, 20]
33
- - sources:
34
- - model: SciPhi/SciPhi-Mistral-7B-32k
35
- layer_range: [23, 23]
36
  - sources:
37
  - model: SciPhi/SciPhi-Mistral-7B-32k
38
- layer_range: [32, 32]
39
  - sources:
40
  - model: Locutusque/TinyMistral-248M-v2.5
41
- layer_range: [0, 0]
42
  - sources:
43
  - model: Locutusque/TinyMistral-248M-v2.5
44
- layer_range: [11, 11]
45
  - sources:
46
  - model: Locutusque/TinyMistral-248M-v2.5-Instruct
47
- layer_range: [0, 0]
48
  - sources:
49
  - model: Locutusque/TinyMistral-248M-v2.5-Instruct
50
- layer_range: [11, 11]
51
  merge_method: slerp
52
  base_model: Locutusque/TinyMistral-248M-v2.5-Instruct
53
  parameters:
 
2
  slices:
3
  - sources:
4
  - model: SciPhi/SciPhi-Mistral-7B-32k
5
+ layer_range: [0, 3]
6
  - sources:
7
  - model: SciPhi/SciPhi-Mistral-7B-32k
8
+ layer_range: [5, 7]
 
 
 
 
 
 
 
 
 
9
  - sources:
10
  - model: SciPhi/SciPhi-Mistral-7B-32k
11
  layer_range: [10, 10]
12
  - sources:
13
  - model: SciPhi/SciPhi-Mistral-7B-32k
14
+ layer_range: [17, 23]
 
 
 
 
 
 
 
 
 
 
 
 
15
  - sources:
16
  - model: SciPhi/SciPhi-Mistral-7B-32k
17
+ layer_range: [31, 32]
18
  - sources:
19
  - model: Locutusque/TinyMistral-248M-v2.5
20
+ layer_range: [0, 1]
21
  - sources:
22
  - model: Locutusque/TinyMistral-248M-v2.5
23
+ layer_range: [11, 12]
24
  - sources:
25
  - model: Locutusque/TinyMistral-248M-v2.5-Instruct
26
+ layer_range: [0, 1]
27
  - sources:
28
  - model: Locutusque/TinyMistral-248M-v2.5-Instruct
29
+ layer_range: [11, 12]
30
  merge_method: slerp
31
  base_model: Locutusque/TinyMistral-248M-v2.5-Instruct
32
  parameters:
model-00001-of-00001.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db00d61113ef1c6db26043cdc2f6be07fc9a0b9f9c3e3a3d77cd050d5b21acf2
3
- size 327692600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b25afb4975084363df26d6b6cf49fa6ab17ea46996f7acad6965a8a45929d388
3
+ size 1636367680
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39f4175ec1c3299d5ba3755fc0f37c36899400ca8a6db160d0a89880a9a60582
3
+ size 1932103880
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e9114872344e623ef09e1fcbd0875739938205736893dc19d5771c4353478f1
3
+ size 1889587008
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:104ea58074879641867c4c0875dca16c85f41153450b9aac7a65c4f5b66a3dca
3
+ size 1862357376
model.safetensors.index.json CHANGED
@@ -1 +1 @@
1
- {"metadata": {"mergekit_version": "0.0.4.1"}, "weight_map": {"lm_head.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors", "model.embed_tokens.weight": "model-00001-of-00001.safetensors"}}
 
1
+ {"metadata": {"mergekit_version": "0.0.4.1"}, "weight_map": {"lm_head.weight": "model-00001-of-00003.safetensors", "model.norm.weight": "model-00001-of-00003.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00001-of-00003.safetensors", "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00001-of-00003.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", "model.layers.15.input_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00001-of-00003.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00001-of-00003.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00001-of-00003.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00002-of-00003.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00002-of-00003.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", "model.layers.0.input_layernorm.weight": "model-00002-of-00003.safetensors", "model.embed_tokens.weight": "model-00002-of-00003.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00002-of-00003.safetensors", "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00002-of-00003.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", "model.layers.9.input_layernorm.weight": "model-00002-of-00003.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00002-of-00003.safetensors", "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00002-of-00003.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", "model.layers.8.input_layernorm.weight": "model-00002-of-00003.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00002-of-00003.safetensors", "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00003-of-00003.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", "model.layers.7.input_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00003-of-00003.safetensors", "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00003-of-00003.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", "model.layers.6.input_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00003-of-00003.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00003-of-00003.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", "model.layers.5.input_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00003-of-00003.safetensors", "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00003-of-00003.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", "model.layers.11.input_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00003-of-00003.safetensors", "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00003-of-00003.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", "model.layers.10.input_layernorm.weight": "model-00003-of-00003.safetensors"}}