Mikael110 commited on
Commit
f8744cd
1 Parent(s): bbbedd1

Fix Incorrect Prompt Template defined in tokenizer_config.json

Browse files

The `chat_template` property in `tokenizer_config.json` currently contains the Llama-3 chat template, rather than ChatML which is what this model is actually trained with. This template is picked up by various tools and inference applications so it's beneficial that it reflects the real template the model uses.

Files changed (1) hide show
  1. tokenizer_config.json +1 -1
tokenizer_config.json CHANGED
@@ -2066,7 +2066,7 @@
2066
  }
2067
  },
2068
  "bos_token": "<|begin_of_text|>",
2069
- "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}",
2070
  "clean_up_tokenization_spaces": true,
2071
  "eos_token": "<|im_end|>",
2072
  "model_input_names": [
 
2066
  }
2067
  },
2068
  "bos_token": "<|begin_of_text|>",
2069
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
2070
  "clean_up_tokenization_spaces": true,
2071
  "eos_token": "<|im_end|>",
2072
  "model_input_names": [