Training in progress, epoch 1

Browse files

Files changed (8) hide show

adapter_config.json +3 -3
adapter_model.safetensors +2 -2
added_tokens.json +2 -0
special_tokens_map.json +5 -18
tmpxros8ucc/_remote_module_non_scriptable.py +81 -0
tokenizer.json +18 -0
tokenizer_config.json +22 -6
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
     "o_proj",
     "k_proj",
     "gate_proj",
     "v_proj",
-    "down_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
     "k_proj",
+    "up_proj",
+    "q_proj",
     "gate_proj",
     "v_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:89519c5a20dacfa95d5083ca09a5714305db29fd2074f42c2585183d301d4411
-size 1803940752

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f048bfa57bfd8d47cae6f7092daf12bc4719cad2627d816a511a3147140c1d1
+size 1803973520

added_tokens.json CHANGED Viewed

@@ -1,4 +1,6 @@
 {
   "DIALECT": 32018,
   "[/DIALECT]": 32017,
   "[PAD]": 32019,

 {
+  "<|im_end|>": 32020,
+  "<|im_start|>": 32021,
   "DIALECT": 32018,
   "[/DIALECT]": 32017,
   "[PAD]": 32019,

special_tokens_map.json CHANGED Viewed

@@ -1,35 +1,22 @@
 {
   "additional_special_tokens": [
     {
-      "content": "[SQL]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
-      "content": "[/DIALECT]",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false
-    },
-    {
-      "content": "DIALECT",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     }
   ],
-  "bos_token": "<s>",
-  "eos_token": "</s>",
-  "pad_token": {
-    "content": "[PAD]",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
   "unk_token": "<unk>"
 }

 {
   "additional_special_tokens": [
     {
+      "content": "<|im_end|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     },
     {
+      "content": "<|im_start|>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false
     }
   ],
+  "bos_token": "<|im_start|>",
+  "eos_token": "<|im_end|>",
+  "pad_token": "<|im_end|>",
   "unk_token": "<unk>"
 }

tmpxros8ucc/_remote_module_non_scriptable.py ADDED Viewed

	@@ -0,0 +1,81 @@

+from typing import *
+import torch
+import torch.distributed.rpc as rpc
+from torch import Tensor
+from torch._jit_internal import Future
+from torch.distributed.rpc import RRef
+from typing import Tuple  # pyre-ignore: unused import
+module_interface_cls = None
+def forward_async(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    return rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+def forward(self, *args, **kwargs):
+    args = (self.module_rref, self.device, self.is_device_map_set, *args)
+    kwargs = {**kwargs}
+    ret_fut = rpc.rpc_async(
+        self.module_rref.owner(),
+        _remote_forward,
+        args,
+        kwargs,
+    )
+    return ret_fut.wait()
+_generated_methods = [
+    forward_async,
+    forward,
+]
+def _remote_forward(
+    module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
+    module = module_rref.local_value()
+    device = torch.device(device)
+    if device.type != "cuda":
+        return module.forward(*args, **kwargs)
+    # If the module is on a cuda device,
+    # move any CPU tensor in args or kwargs to the same cuda device.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
+    args = (*args,)
+    out_args: Tuple[()] = ()
+    for arg in args:
+        arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
+        out_args = out_args + arg
+    kwargs = {**kwargs}
+    for k, v in kwargs.items():
+        if isinstance(v, Tensor):
+            kwargs[k] = kwargs[k].to(device)
+    if is_device_map_set:
+        return module.forward(*out_args, **kwargs)
+    # If the device map is empty, then only CPU tensors are allowed to send over wire,
+    # so have to move any GPU tensor to CPU in the output.
+    # Since torch script does not support generator expression,
+    # have to use concatenation instead of
+    # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
+    ret: Tuple[()] = ()
+    for i in module.forward(*out_args, **kwargs):
+        i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
+        ret = ret + i
+    return ret

tokenizer.json CHANGED Viewed

@@ -106,6 +106,24 @@
       "rstrip": false,
       "normalized": false,
       "special": true
     }
   ],
   "normalizer": {

       "rstrip": false,
       "normalized": false,
       "special": true
+    },
+    {
+      "id": 32020,
+      "content": "<|im_end|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 32021,
+      "content": "<|im_start|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": {

tokenizer_config.json CHANGED Viewed

@@ -89,22 +89,38 @@
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "additional_special_tokens": [
-    "[SQL]",
-    "[/DIALECT]",
-    "DIALECT"
   ],
-  "bos_token": "<s>",
   "clean_up_tokenization_spaces": false,
-  "eos_token": "</s>",
   "eot_token": "▁<EOT>",
   "fill_token": "<FILL_ME>",
   "legacy": null,
   "middle_token": "▁<MID>",
   "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "[PAD]",
   "prefix_token": "▁<PRE>",
   "sp_model_kwargs": {},
   "suffix_token": "▁<SUF>",

       "rstrip": false,
       "single_word": false,
       "special": true
+    },
+    "32020": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32021": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
     }
   },
   "additional_special_tokens": [
+    "<|im_end|>",
+    "<|im_start|>"
   ],
+  "bos_token": "<|im_start|>",
+  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
   "eot_token": "▁<EOT>",
   "fill_token": "<FILL_ME>",
   "legacy": null,
   "middle_token": "▁<MID>",
   "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|im_end|>",
   "prefix_token": "▁<PRE>",
   "sp_model_kwargs": {},
   "suffix_token": "▁<SUF>",

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcc62bb1e6ac14e110070d8afe60cdbd0f23c0c49c4036bf2b417084e7372ebd
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f0547de3fa49e3814734f06ac490ef2652b071cef7904b638f1c173e5fb6e99
 size 4664