# slices: | |
# - sources: | |
# - model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 # 21 Layers | |
# layer_range: [0, 21] | |
# - model: microsoft/Phi-3-mini-4k-instruct # 31 Layers | |
# layer_range: [10, 31] | |
# merge_method: slerp | |
# base_model: microsoft/Phi-3-mini-4k-instruct | |
# parameters: | |
# t: | |
# - filter: self_attn.o_proj | |
# value: 0.5 | |
# - filter: self_attn.qkv_proj | |
# value: 0.5 | |
# - filter: mlp.down_proj | |
# value: 0.5 | |
# - filter: input_layernorm | |
# value: 0.5 | |
# - filter: post_attention_layernorm | |
# value: 0.5 | |
# - filter: embed_tokens | |
# value: 0.5 | |
# - value: 0 | |
# dtype: bfloat16 | |
slices: | |
- sources: | |
- model: mistralai/Mistral-7B-Instruct-v0.2 # 21 Layers | |
layer_range: [0, 31] | |
- model: meta-llama/Meta-Llama-3.1-8B-Instruct # 31 Layers | |
layer_range: [0, 31] | |
merge_method: slerp | |
base_model: meta-llama/Meta-Llama-3.1-8B-Instruct | |
parameters: | |
t: | |
- filter: self_attn | |
value: 0.5 | |
- filter: mlp | |
value: 0.5 | |
- value: 0.5 | |
dtype: bfloat16 | |