{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 53, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 54, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 55, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true }, { "id": 56, "content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "RobertaProcessing", "sep": [ "", 54 ], "cls": [ "", 53 ], "trim_offsets": true, "add_prefix_space": false }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "vocab": { "": 0, "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "x": 27, "v": 28, "d": 29, "r": 30, "s": 31, "w": 32, "c": 33, "k": 34, "n": 35, "y": 36, "p": 37, "j": 38, "g": 39, "f": 40, "t": 41, "z": 42, "a": 43, "e": 44, "o": 45, "q": 46, "i": 47, "u": 48, "m": 49, "h": 50, "l": 51, "b": 52 }, "merges": [] } }