jtatman commited on
Commit
1bfeb7f
1 Parent(s): 3dfb7b9

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +20 -29
  2. config.json +7 -7
  3. mergekit_config.yml +20 -5
  4. special_tokens_map.json +9 -7
  5. tokenizer.json +71 -12
  6. tokenizer_config.json +54 -11
README.md CHANGED
@@ -4,43 +4,25 @@ tags:
4
  - mergekit
5
  - lazymergekit
6
  - SciPhi/SciPhi-Mistral-7B-32k
7
- - SciPhi/SciPhi-Mistral-7B-32k
8
- - SciPhi/SciPhi-Mistral-7B-32k
9
- - SciPhi/SciPhi-Mistral-7B-32k
10
- - SciPhi/SciPhi-Mistral-7B-32k
11
- - SciPhi/SciPhi-Mistral-7B-32k
12
- - SciPhi/SciPhi-Mistral-7B-32k
13
- - SciPhi/SciPhi-Mistral-7B-32k
14
- - SciPhi/SciPhi-Mistral-7B-32k
15
  base_model:
16
  - SciPhi/SciPhi-Mistral-7B-32k
17
- - SciPhi/SciPhi-Mistral-7B-32k
18
- - SciPhi/SciPhi-Mistral-7B-32k
19
- - SciPhi/SciPhi-Mistral-7B-32k
20
- - SciPhi/SciPhi-Mistral-7B-32k
21
- - SciPhi/SciPhi-Mistral-7B-32k
22
- - SciPhi/SciPhi-Mistral-7B-32k
23
- - SciPhi/SciPhi-Mistral-7B-32k
24
- - SciPhi/SciPhi-Mistral-7B-32k
25
  ---
26
 
27
  # SciPhi-Mistral-7B-32k-sliced
28
 
29
  SciPhi-Mistral-7B-32k-sliced is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
30
  * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
31
- * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
32
- * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
33
- * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
34
- * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
35
- * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
36
- * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
37
- * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
38
- * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
39
 
40
  ## 🧩 Configuration
41
 
42
  ```yaml
43
  slices:
 
 
 
 
 
 
44
  - sources:
45
  - model: SciPhi/SciPhi-Mistral-7B-32k
46
  layer_range: [3, 3]
@@ -68,12 +50,21 @@ slices:
68
  - sources:
69
  - model: SciPhi/SciPhi-Mistral-7B-32k
70
  layer_range: [23, 23]
71
-
72
-
73
- merge_method: passthrough
74
- tokenizer_source: union
75
-
 
 
 
 
 
 
 
76
  dtype: float16
 
 
77
  ```
78
 
79
  ## 💻 Usage
 
4
  - mergekit
5
  - lazymergekit
6
  - SciPhi/SciPhi-Mistral-7B-32k
 
 
 
 
 
 
 
 
7
  base_model:
8
  - SciPhi/SciPhi-Mistral-7B-32k
 
 
 
 
 
 
 
 
9
  ---
10
 
11
  # SciPhi-Mistral-7B-32k-sliced
12
 
13
  SciPhi-Mistral-7B-32k-sliced is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
14
  * [SciPhi/SciPhi-Mistral-7B-32k](https://huggingface.co/SciPhi/SciPhi-Mistral-7B-32k)
 
 
 
 
 
 
 
 
15
 
16
  ## 🧩 Configuration
17
 
18
  ```yaml
19
  slices:
20
+ - sources:
21
+ - model: SciPhi/SciPhi-Mistral-7B-32k
22
+ layer_range: [0, 0]
23
+ - sources:
24
+ - model: SciPhi/SciPhi-Mistral-7B-32k
25
+ layer_range: [1, 1]
26
  - sources:
27
  - model: SciPhi/SciPhi-Mistral-7B-32k
28
  layer_range: [3, 3]
 
50
  - sources:
51
  - model: SciPhi/SciPhi-Mistral-7B-32k
52
  layer_range: [23, 23]
53
+ - sources:
54
+ - model: SciPhi/SciPhi-Mistral-7B-32k
55
+ layer_range: [32, 32]
56
+ merge_method: slerp
57
+ base_model: Locutusque/TinyMistral-248M-v2.5-Instruct
58
+ parameters:
59
+ t:
60
+ - filter: self_attn
61
+ value: [0, 0.5, 0.3, 0.7, 1]
62
+ - filter: mlp
63
+ value: [1, 0.5, 0.7, 0.3, 0]
64
+ - value: 0.3
65
  dtype: float16
66
+ tokenizer_source: base
67
+
68
  ```
69
 
70
  ## 💻 Usage
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "SciPhi/SciPhi-Mistral-7B-32k",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
@@ -7,20 +7,20 @@
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
10
- "hidden_size": 4096,
11
  "initializer_range": 0.02,
12
- "intermediate_size": 14336,
13
  "max_position_embeddings": 32768,
14
  "model_type": "mistral",
15
  "num_attention_heads": 32,
16
  "num_hidden_layers": 0,
17
  "num_key_value_heads": 8,
18
- "rms_norm_eps": 1e-05,
19
  "rope_theta": 10000.0,
20
- "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
  "transformers_version": "4.37.2",
24
- "use_cache": false,
25
- "vocab_size": 32000
26
  }
 
1
  {
2
+ "_name_or_path": "Locutusque/TinyMistral-248M-v2.5-Instruct",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
7
  "bos_token_id": 1,
8
  "eos_token_id": 2,
9
  "hidden_act": "silu",
10
+ "hidden_size": 1024,
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 4096,
13
  "max_position_embeddings": 32768,
14
  "model_type": "mistral",
15
  "num_attention_heads": 32,
16
  "num_hidden_layers": 0,
17
  "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-06,
19
  "rope_theta": 10000.0,
20
+ "sliding_window": 32,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
  "transformers_version": "4.37.2",
24
+ "use_cache": true,
25
+ "vocab_size": 32005
26
  }
mergekit_config.yml CHANGED
@@ -1,5 +1,11 @@
1
 
2
  slices:
 
 
 
 
 
 
3
  - sources:
4
  - model: SciPhi/SciPhi-Mistral-7B-32k
5
  layer_range: [3, 3]
@@ -27,9 +33,18 @@ slices:
27
  - sources:
28
  - model: SciPhi/SciPhi-Mistral-7B-32k
29
  layer_range: [23, 23]
30
-
31
-
32
- merge_method: passthrough
33
- tokenizer_source: union
34
-
 
 
 
 
 
 
 
35
  dtype: float16
 
 
 
1
 
2
  slices:
3
+ - sources:
4
+ - model: SciPhi/SciPhi-Mistral-7B-32k
5
+ layer_range: [0, 0]
6
+ - sources:
7
+ - model: SciPhi/SciPhi-Mistral-7B-32k
8
+ layer_range: [1, 1]
9
  - sources:
10
  - model: SciPhi/SciPhi-Mistral-7B-32k
11
  layer_range: [3, 3]
 
33
  - sources:
34
  - model: SciPhi/SciPhi-Mistral-7B-32k
35
  layer_range: [23, 23]
36
+ - sources:
37
+ - model: SciPhi/SciPhi-Mistral-7B-32k
38
+ layer_range: [32, 32]
39
+ merge_method: slerp
40
+ base_model: Locutusque/TinyMistral-248M-v2.5-Instruct
41
+ parameters:
42
+ t:
43
+ - filter: self_attn
44
+ value: [0, 0.5, 0.3, 0.7, 1]
45
+ - filter: mlp
46
+ value: [1, 0.5, 0.7, 0.3, 0]
47
+ - value: 0.3
48
  dtype: float16
49
+ tokenizer_source: base
50
+
special_tokens_map.json CHANGED
@@ -1,18 +1,20 @@
1
  {
2
- "additional_special_tokens": [
3
- "<unk>",
4
- "<s>",
5
- "</s>"
6
- ],
7
  "bos_token": {
8
- "content": "<s>",
9
  "lstrip": false,
10
  "normalized": false,
11
  "rstrip": false,
12
  "single_word": false
13
  },
14
  "eos_token": {
15
- "content": "</s>",
 
 
 
 
 
 
 
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
 
1
  {
 
 
 
 
 
2
  "bos_token": {
3
+ "content": "<|bos|>",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
  "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -10,7 +24,7 @@
10
  "lstrip": false,
11
  "rstrip": false,
12
  "normalized": false,
13
- "special": false
14
  },
15
  {
16
  "id": 1,
@@ -19,7 +33,7 @@
19
  "lstrip": false,
20
  "rstrip": false,
21
  "normalized": false,
22
- "special": false
23
  },
24
  {
25
  "id": 2,
@@ -28,7 +42,52 @@
28
  "lstrip": false,
29
  "rstrip": false,
30
  "normalized": false,
31
- "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
@@ -53,7 +112,7 @@
53
  "single": [
54
  {
55
  "SpecialToken": {
56
- "id": "<s>",
57
  "type_id": 0
58
  }
59
  },
@@ -67,7 +126,7 @@
67
  "pair": [
68
  {
69
  "SpecialToken": {
70
- "id": "<s>",
71
  "type_id": 0
72
  }
73
  },
@@ -79,7 +138,7 @@
79
  },
80
  {
81
  "SpecialToken": {
82
- "id": "<s>",
83
  "type_id": 1
84
  }
85
  },
@@ -91,13 +150,13 @@
91
  }
92
  ],
93
  "special_tokens": {
94
- "<s>": {
95
- "id": "<s>",
96
  "ids": [
97
- 1
98
  ],
99
  "tokens": [
100
- "<s>"
101
  ]
102
  }
103
  }
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 512
12
+ },
13
+ "direction": "Left",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 32001,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<|endoftext|>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
24
  "lstrip": false,
25
  "rstrip": false,
26
  "normalized": false,
27
+ "special": true
28
  },
29
  {
30
  "id": 1,
 
33
  "lstrip": false,
34
  "rstrip": false,
35
  "normalized": false,
36
+ "special": true
37
  },
38
  {
39
  "id": 2,
 
42
  "lstrip": false,
43
  "rstrip": false,
44
  "normalized": false,
45
+ "special": true
46
+ },
47
+ {
48
+ "id": 32000,
49
+ "content": "<|bos|>",
50
+ "single_word": false,
51
+ "lstrip": false,
52
+ "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
+ },
56
+ {
57
+ "id": 32001,
58
+ "content": "<|endoftext|>",
59
+ "single_word": false,
60
+ "lstrip": false,
61
+ "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
+ },
65
+ {
66
+ "id": 32002,
67
+ "content": "[PAD]",
68
+ "single_word": false,
69
+ "lstrip": false,
70
+ "rstrip": false,
71
+ "normalized": false,
72
+ "special": true
73
+ },
74
+ {
75
+ "id": 32003,
76
+ "content": "<|ASSISTANT|>",
77
+ "single_word": false,
78
+ "lstrip": false,
79
+ "rstrip": false,
80
+ "normalized": false,
81
+ "special": true
82
+ },
83
+ {
84
+ "id": 32004,
85
+ "content": "<|USER|>",
86
+ "single_word": false,
87
+ "lstrip": false,
88
+ "rstrip": false,
89
+ "normalized": false,
90
+ "special": true
91
  }
92
  ],
93
  "normalizer": {
 
112
  "single": [
113
  {
114
  "SpecialToken": {
115
+ "id": "<|bos|>",
116
  "type_id": 0
117
  }
118
  },
 
126
  "pair": [
127
  {
128
  "SpecialToken": {
129
+ "id": "<|bos|>",
130
  "type_id": 0
131
  }
132
  },
 
138
  },
139
  {
140
  "SpecialToken": {
141
+ "id": "<|bos|>",
142
  "type_id": 1
143
  }
144
  },
 
150
  }
151
  ],
152
  "special_tokens": {
153
+ "<|bos|>": {
154
+ "id": "<|bos|>",
155
  "ids": [
156
+ 32000
157
  ],
158
  "tokens": [
159
+ "<|bos|>"
160
  ]
161
  }
162
  }
tokenizer_config.json CHANGED
@@ -8,7 +8,7 @@
8
  "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
- "special": false
12
  },
13
  "1": {
14
  "content": "<s>",
@@ -16,7 +16,7 @@
16
  "normalized": false,
17
  "rstrip": false,
18
  "single_word": false,
19
- "special": false
20
  },
21
  "2": {
22
  "content": "</s>",
@@ -24,23 +24,66 @@
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false,
27
- "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  }
29
  },
30
- "additional_special_tokens": [
31
- "<unk>",
32
- "<s>",
33
- "</s>"
34
- ],
35
- "bos_token": "<s>",
36
  "clean_up_tokenization_spaces": false,
37
- "eos_token": "</s>",
38
  "legacy": true,
 
39
  "model_max_length": 1000000000000000019884624838656,
40
- "pad_token": null,
 
 
 
41
  "sp_model_kwargs": {},
42
  "spaces_between_special_tokens": false,
 
43
  "tokenizer_class": "LlamaTokenizer",
 
 
44
  "unk_token": "<unk>",
45
  "use_default_system_prompt": true
46
  }
 
8
  "normalized": false,
9
  "rstrip": false,
10
  "single_word": false,
11
+ "special": true
12
  },
13
  "1": {
14
  "content": "<s>",
 
16
  "normalized": false,
17
  "rstrip": false,
18
  "single_word": false,
19
+ "special": true
20
  },
21
  "2": {
22
  "content": "</s>",
 
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false,
27
+ "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<|bos|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32001": {
38
+ "content": "<|endoftext|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "32002": {
46
+ "content": "[PAD]",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "32003": {
54
+ "content": "<|ASSISTANT|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "32004": {
62
+ "content": "<|USER|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
  }
69
  },
70
+ "additional_special_tokens": [],
71
+ "bos_token": "<|bos|>",
 
 
 
 
72
  "clean_up_tokenization_spaces": false,
73
+ "eos_token": "<|endoftext|>",
74
  "legacy": true,
75
+ "max_length": 1536,
76
  "model_max_length": 1000000000000000019884624838656,
77
+ "pad_to_multiple_of": null,
78
+ "pad_token": "<|endoftext|>",
79
+ "pad_token_type_id": 0,
80
+ "padding_side": "left",
81
  "sp_model_kwargs": {},
82
  "spaces_between_special_tokens": false,
83
+ "stride": 0,
84
  "tokenizer_class": "LlamaTokenizer",
85
+ "truncation_side": "right",
86
+ "truncation_strategy": "longest_first",
87
  "unk_token": "<unk>",
88
  "use_default_system_prompt": true
89
  }