{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 5, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 6, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "``" }, "content": "\"" }, { "type": "Replace", "pattern": { "String": "''" }, "content": "\"" }, { "type": "NFKD" }, { "type": "StripAccents" }, { "type": "Replace", "pattern": { "Regex": " {2,}" }, "content": " " } ] }, "pre_tokenizer": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true, "prepend_scheme": "always" }, "post_processor": null, "decoder": { "type": "Metaspace", "replacement": "▁", "add_prefix_space": true, "prepend_scheme": "always" }, "model": { "type": "Unigram", "unk_id": 0, "vocab": [ [ "", 0.0 ] ], "byte_fallback": false } }