|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<unk>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "<cls>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "<sep>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "<pad>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 4, |
|
"content": "<mask>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 5, |
|
"content": "<s>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 6, |
|
"content": "</s>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": { |
|
"type": "Sequence", |
|
"normalizers": [ |
|
{ |
|
"type": "Replace", |
|
"pattern": { |
|
"String": "``" |
|
}, |
|
"content": "\"" |
|
}, |
|
{ |
|
"type": "Replace", |
|
"pattern": { |
|
"String": "''" |
|
}, |
|
"content": "\"" |
|
}, |
|
{ |
|
"type": "NFKD" |
|
}, |
|
{ |
|
"type": "StripAccents" |
|
}, |
|
{ |
|
"type": "Replace", |
|
"pattern": { |
|
"Regex": " {2,}" |
|
}, |
|
"content": " " |
|
} |
|
] |
|
}, |
|
"pre_tokenizer": { |
|
"type": "Metaspace", |
|
"replacement": "▁", |
|
"add_prefix_space": true, |
|
"prepend_scheme": "always" |
|
}, |
|
"post_processor": null, |
|
"decoder": { |
|
"type": "Metaspace", |
|
"replacement": "▁", |
|
"add_prefix_space": true, |
|
"prepend_scheme": "always" |
|
}, |
|
"model": { |
|
"type": "Unigram", |
|
"unk_id": 0, |
|
"vocab": [ |
|
[ |
|
"<unk>", |
|
0.0 |
|
] |
|
], |
|
"byte_fallback": false |
|
} |
|
} |