deepseek-ai
/

deepseek-coder-6.7b-base

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Upload 2 files

#1

by jonatanklosko - opened Nov 17, 2023

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

Files changed (2) hide show

special_tokens_map.json +23 -0
tokenizer.json +52 -4

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json CHANGED Viewed

@@ -254,10 +254,58 @@
     ]
   },
   "post_processor": {
-    "type": "ByteLevel",
-    "add_prefix_space": true,
-    "trim_offsets": false,
-    "use_regex": true
   },
   "decoder": {
     "type": "ByteLevel",

     ]
   },
   "post_processor": {
+    "type": "TemplateProcessing",
+    "single": [
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      }
+    ],
+    "pair": [
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 0
+        }
+      },
+      {
+        "Sequence": {
+          "id": "A",
+          "type_id": 0
+        }
+      },
+      {
+        "SpecialToken": {
+          "id": "<｜begin▁of▁sentence｜>",
+          "type_id": 1
+        }
+      },
+      {
+        "Sequence": {
+          "id": "B",
+          "type_id": 1
+        }
+      }
+    ],
+    "special_tokens": {
+      "<｜begin▁of▁sentence｜>": {
+        "id": "<｜begin▁of▁sentence｜>",
+        "ids": [
+          32013
+        ],
+        "tokens": [
+          "<｜begin▁of▁sentence｜>"
+        ]
+      }
+    }
   },
   "decoder": {
     "type": "ByteLevel",