sync from github
Browse files
src/backend/moe_infinity.py
CHANGED
@@ -31,16 +31,20 @@ class MoEHFLM(HFLMWithMeasurement):
|
|
31 |
self.use_chat_template = use_chat_template
|
32 |
if "device" in kwargs:
|
33 |
kwargs.pop("device")
|
|
|
|
|
34 |
kwargs["device_map"] = "cuda:0"
|
35 |
super().__init__(
|
36 |
*args, **kwargs, pretrained=pretrained
|
37 |
) # Assuming HFLM accepts a 'pretrained' arg and handles it
|
38 |
# self._create_model()
|
39 |
-
shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
|
40 |
|
41 |
def __del__(self):
|
42 |
-
#
|
43 |
-
|
|
|
|
|
|
|
44 |
|
45 |
def _create_model(self, *args, **kwargs):
|
46 |
"""
|
|
|
31 |
self.use_chat_template = use_chat_template
|
32 |
if "device" in kwargs:
|
33 |
kwargs.pop("device")
|
34 |
+
if os.path.exists(os.path.join(self.offload_path, "moe-infinity-offloads")):
|
35 |
+
shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads"))
|
36 |
kwargs["device_map"] = "cuda:0"
|
37 |
super().__init__(
|
38 |
*args, **kwargs, pretrained=pretrained
|
39 |
) # Assuming HFLM accepts a 'pretrained' arg and handles it
|
40 |
# self._create_model()
|
|
|
41 |
|
42 |
def __del__(self):
|
43 |
+
self._model.engine.clean_up() # clean up hooks
|
44 |
+
self._model.engine.archer_engine.clean_up_resources() # clean up resources
|
45 |
+
if os.path.exists(os.path.join(self.offload_path, "moe-infinity-offloads")):
|
46 |
+
shutil.rmtree(os.path.join(self.offload_path, "moe-infinity-offloads")) # clean up offload model
|
47 |
+
|
48 |
|
49 |
def _create_model(self, *args, **kwargs):
|
50 |
"""
|