from transformers import AutoTokenizer tokenizer = AutoTokenizer.from_pretrained("/workspace/dolphin-2.6-mistral-7b-hf") # 1. Remove the "" token from the vocabulary vocab = tokenizer.get_vocab() del vocab[''] vocab['<|im_end|>'] = 2 tokenizer = AutoTokenizer.from_pretrained( "/workspace/dolphin-2.6-mistral-7b-hf", vocab=vocab ) tokenizer.eos_token = "<|im_end|>" tokenizer.pad_token = "<|im_end|>" # 5. Save the modified tokenizer tokenizer.save_pretrained('/workspace/dolphin-new-tokenizer/')