Yaofu3 commited on
Commit
aac38e8
1 Parent(s): faf48c3

bug/fix_moe-infinity (#9)

Browse files

- fix offloading dir (a75cfddd44b4ac45b4e042b3577a6781f4635339)

src/backend/hflm_with_measurement.py CHANGED
@@ -1,6 +1,7 @@
1
  import copy
2
  import os
3
  from datetime import timedelta
 
4
  from time import time
5
  from pathlib import Path
6
  from typing import List, Literal, Optional, Tuple, Union
 
1
  import copy
2
  import os
3
  from datetime import timedelta
4
+ import sys
5
  from time import time
6
  from pathlib import Path
7
  from typing import List, Literal, Optional, Tuple, Union
src/backend/moe_infinity.py CHANGED
@@ -1,5 +1,6 @@
1
  import torch
2
  import os
 
3
  from transformers import AutoTokenizer
4
  from transformers import AutoModelForCausalLM
5
  from moe_infinity import MoE
@@ -34,6 +35,11 @@ class MoEHFLM(HFLMWithMeasurement):
34
  *args, **kwargs, pretrained=pretrained, device_map="cuda:0"
35
  ) # Assuming HFLM accepts a 'pretrained' arg and handles it
36
  # self._create_model()
 
 
 
 
 
37
 
38
  def _create_model(self, *args, **kwargs):
39
  """
@@ -46,7 +52,18 @@ class MoEHFLM(HFLMWithMeasurement):
46
  }
47
  # Update default config with any user-provided config
48
  final_moe_config = {**default_moe_config, **self.moe_config}
 
 
 
 
 
 
 
 
 
 
49
  self._model = MoE(self.checkpoint, final_moe_config)
 
50
  # self._model = AutoModelForCausalLM.from_pretrained(
51
  # self.checkpoint, torch_dtype=torch.float16, device_map="auto"
52
  # )
 
1
  import torch
2
  import os
3
+ import shutil
4
  from transformers import AutoTokenizer
5
  from transformers import AutoModelForCausalLM
6
  from moe_infinity import MoE
 
35
  *args, **kwargs, pretrained=pretrained, device_map="cuda:0"
36
  ) # Assuming HFLM accepts a 'pretrained' arg and handles it
37
  # self._create_model()
38
+ shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
39
+
40
+ def __del__(self):
41
+ # Clean up offloaded models from self.offload_path
42
+ shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
43
 
44
  def _create_model(self, *args, **kwargs):
45
  """
 
52
  }
53
  # Update default config with any user-provided config
54
  final_moe_config = {**default_moe_config, **self.moe_config}
55
+
56
+ # dirty fix, to be removed when MoE-infinity supports move input to correct device
57
+ def MoEGenDecorator(func):
58
+ def wrapper(*args, **kwargs):
59
+ # Ensure all tensor in the input are in the same device as the model
60
+ args = [arg.to("cuda:0") if isinstance(arg, torch.Tensor) else arg for arg in args]
61
+ kwargs = {k: v.to("cuda:0") if isinstance(v, torch.Tensor) else v for k, v in kwargs.items()}
62
+ return func(*args, **kwargs)
63
+ return wrapper
64
+
65
  self._model = MoE(self.checkpoint, final_moe_config)
66
+ self._model.generate = MoEGenDecorator(self._model.generate)
67
  # self._model = AutoModelForCausalLM.from_pretrained(
68
  # self.checkpoint, torch_dtype=torch.float16, device_map="auto"
69
  # )