maher13 commited on
Commit
5cd04db
1 Parent(s): d4385eb

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"ف": 0, "ذ": 1, "ر": 2, "ڨ": 3, "ط": 4, "": 5, "ھ": 6, "ض": 7, "ح": 8, "ص": 9, "ك": 10, "ه": 11, "خ": 12, "س": 13, "ا": 14, "ء": 15, "ى": 16, "ک": 17, "ز": 18, "م": 19, "ن": 20, "و": 21, "د": 22, "ع": 23, "غ": 24, "ئ": 25, "ی": 26, "ب": 27, "ق": 28, "ت": 30, "ظ": 31, "ث": 32, "ش": 33, "ج": 34, "ؤ": 35, "ة": 36, "ل": 37, "ي": 38, "": 39, "|": 29, "[UNK]": 40, "[PAD]": 41}
 
1
+ {"ش": 0, "ج": 2, "ت": 3, "ن": 4, "ز": 5, "ط": 6, "ض": 7, "": 8, "و": 9, "ر": 10, "ق": 11, "ء": 12, "خ": 13, "ئ": 14, "ڨ": 15, "ذ": 16, "ھ": 17, "م": 18, "ث": 19, "ة": 20, "ي": 21, "س": 22, "ا": 23, "ص": 24, "ی": 25, "ک": 26, "ع": 27, "ح": 28, "ف": 29, "ل": 30, "غ": 31, "ظ": 32, "": 33, "ى": 34, "ك": 35, "ؤ": 36, "ب": 37, "ه": 38, "د": 39, "|": 1, "[UNK]": 40, "[PAD]": 41}