{ | |
"added_tokens_decoder": { | |
"0": { | |
"content": "<pad>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"1": { | |
"content": "<unk>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"2": { | |
"content": "<s>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"3": { | |
"content": "</s>", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256001": { | |
"content": "__afr__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256002": { | |
"content": "__amh__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256003": { | |
"content": "__arb__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256004": { | |
"content": "__ary__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256005": { | |
"content": "__arz__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256006": { | |
"content": "__asm__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256007": { | |
"content": "__azj__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256008": { | |
"content": "__bel__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256009": { | |
"content": "__ben__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256010": { | |
"content": "__bos__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256011": { | |
"content": "__bul__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256012": { | |
"content": "__cat__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256013": { | |
"content": "__ceb__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256014": { | |
"content": "__ces__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256015": { | |
"content": "__ckb__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256016": { | |
"content": "__cmn__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256017": { | |
"content": "__cmn_Hant__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256018": { | |
"content": "__cym__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256019": { | |
"content": "__dan__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256020": { | |
"content": "__deu__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256021": { | |
"content": "__ell__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256022": { | |
"content": "__eng__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256023": { | |
"content": "__est__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256024": { | |
"content": "__eus__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256025": { | |
"content": "__fin__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256026": { | |
"content": "__fra__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256027": { | |
"content": "__fuv__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256028": { | |
"content": "__gaz__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256029": { | |
"content": "__gle__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256030": { | |
"content": "__glg__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256031": { | |
"content": "__guj__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256032": { | |
"content": "__heb__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256033": { | |
"content": "__hin__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256034": { | |
"content": "__hrv__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256035": { | |
"content": "__hun__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256036": { | |
"content": "__hye__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256037": { | |
"content": "__ibo__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256038": { | |
"content": "__ind__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256039": { | |
"content": "__isl__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256040": { | |
"content": "__ita__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256041": { | |
"content": "__jav__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256042": { | |
"content": "__jpn__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256043": { | |
"content": "__kan__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256044": { | |
"content": "__kat__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256045": { | |
"content": "__kaz__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256046": { | |
"content": "__khk__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256047": { | |
"content": "__khm__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256048": { | |
"content": "__kir__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256049": { | |
"content": "__kor__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256050": { | |
"content": "__lao__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256051": { | |
"content": "__lit__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256052": { | |
"content": "__lug__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256053": { | |
"content": "__luo__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256054": { | |
"content": "__lvs__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256055": { | |
"content": "__mai__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256056": { | |
"content": "__mal__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256057": { | |
"content": "__mar__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256058": { | |
"content": "__mkd__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256059": { | |
"content": "__mlt__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256060": { | |
"content": "__mni__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256061": { | |
"content": "__mya__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256062": { | |
"content": "__nld__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256063": { | |
"content": "__nno__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256064": { | |
"content": "__nob__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256065": { | |
"content": "__npi__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256066": { | |
"content": "__nya__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256067": { | |
"content": "__ory__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256068": { | |
"content": "__pan__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256069": { | |
"content": "__pbt__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256070": { | |
"content": "__pes__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256071": { | |
"content": "__pol__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256072": { | |
"content": "__por__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256073": { | |
"content": "__ron__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256074": { | |
"content": "__rus__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256075": { | |
"content": "__sat__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256076": { | |
"content": "__slk__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256077": { | |
"content": "__slv__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256078": { | |
"content": "__sna__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256079": { | |
"content": "__snd__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256080": { | |
"content": "__som__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256081": { | |
"content": "__spa__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256082": { | |
"content": "__srp__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256083": { | |
"content": "__swe__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256084": { | |
"content": "__swh__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256085": { | |
"content": "__tam__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256086": { | |
"content": "__tel__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256087": { | |
"content": "__tgk__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256088": { | |
"content": "__tgl__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256089": { | |
"content": "__tha__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256090": { | |
"content": "__tur__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256091": { | |
"content": "__ukr__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256092": { | |
"content": "__urd__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256093": { | |
"content": "__uzn__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256094": { | |
"content": "__vie__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256095": { | |
"content": "__yor__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256096": { | |
"content": "__yue__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256097": { | |
"content": "__zlm__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
}, | |
"256098": { | |
"content": "__zul__", | |
"lstrip": false, | |
"normalized": false, | |
"rstrip": false, | |
"single_word": false, | |
"special": true | |
} | |
}, | |
"additional_special_tokens": [ | |
"__afr__", | |
"__amh__", | |
"__arb__", | |
"__ary__", | |
"__arz__", | |
"__asm__", | |
"__azj__", | |
"__bel__", | |
"__ben__", | |
"__bos__", | |
"__bul__", | |
"__cat__", | |
"__ceb__", | |
"__ces__", | |
"__ckb__", | |
"__cmn__", | |
"__cmn_Hant__", | |
"__cym__", | |
"__dan__", | |
"__deu__", | |
"__ell__", | |
"__eng__", | |
"__est__", | |
"__eus__", | |
"__fin__", | |
"__fra__", | |
"__fuv__", | |
"__gaz__", | |
"__gle__", | |
"__glg__", | |
"__guj__", | |
"__heb__", | |
"__hin__", | |
"__hrv__", | |
"__hun__", | |
"__hye__", | |
"__ibo__", | |
"__ind__", | |
"__isl__", | |
"__ita__", | |
"__jav__", | |
"__jpn__", | |
"__kan__", | |
"__kat__", | |
"__kaz__", | |
"__khk__", | |
"__khm__", | |
"__kir__", | |
"__kor__", | |
"__lao__", | |
"__lit__", | |
"__lug__", | |
"__luo__", | |
"__lvs__", | |
"__mai__", | |
"__mal__", | |
"__mar__", | |
"__mkd__", | |
"__mlt__", | |
"__mni__", | |
"__mya__", | |
"__nld__", | |
"__nno__", | |
"__nob__", | |
"__npi__", | |
"__nya__", | |
"__ory__", | |
"__pan__", | |
"__pbt__", | |
"__pes__", | |
"__pol__", | |
"__por__", | |
"__ron__", | |
"__rus__", | |
"__sat__", | |
"__slk__", | |
"__slv__", | |
"__sna__", | |
"__snd__", | |
"__som__", | |
"__spa__", | |
"__srp__", | |
"__swe__", | |
"__swh__", | |
"__tam__", | |
"__tel__", | |
"__tgk__", | |
"__tgl__", | |
"__tha__", | |
"__tur__", | |
"__ukr__", | |
"__urd__", | |
"__uzn__", | |
"__vie__", | |
"__yor__", | |
"__yue__", | |
"__zlm__", | |
"__zul__" | |
], | |
"bos_token": "<s>", | |
"clean_up_tokenization_spaces": true, | |
"cls_token": "<s>", | |
"eos_token": "</s>", | |
"model_max_length": 1000000000000000019884624838656, | |
"pad_token": "<pad>", | |
"processor_class": "SeamlessM4TProcessor", | |
"sep_token": "</s>", | |
"sp_model_kwargs": {}, | |
"src_lang": "__eng__", | |
"tgt_lang": "__fra__", | |
"tokenizer_class": "SeamlessM4TTokenizer", | |
"unk_token": "<unk>" | |
} | |