|
test_stage: |
|
obcq_modifiers: |
|
LogarithmicEqualizationModifier: |
|
mappings: [ |
|
[["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"], |
|
[["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"], |
|
] |
|
QuantizationModifier: |
|
ignore: |
|
|
|
- LlamaRotaryEmbedding |
|
- LlamaRMSNorm |
|
- SiLUActivation |
|
- MatMulOutput_QK |
|
- MatMulOutput_PV |
|
|
|
- model.layers.21.mlp.down_proj |
|
- model.layers.7.mlp.down_proj |
|
- model.layers.2.mlp.down_proj |
|
- model.layers.8.self_attn.q_proj |
|
- model.layers.8.self_attn.k_proj |
|
post_oneshot_calibration: true |
|
scheme_overrides: |
|
|
|
Linear: |
|
weights: |
|
num_bits: 8 |
|
symmetric: true |
|
strategy: channel |
|
MatMulLeftInput_QK: |
|
input_activations: |
|
num_bits: 8 |
|
symmetric: true |
|
|
|
Embedding: |
|
input_activations: null |
|
weights: |
|
num_bits: 8 |
|
symmetric: false |
|
SparseGPTModifier: |
|
sparsity: 0.5 |
|
block_size: 128 |
|
sequential_update: true |
|
quantize: true |
|
percdamp: 0.01 |
|
mask_structure: "0:0" |
|
targets: ["re:model.layers.\\d*$"] |