when I run it in multi-gpus by accelerate, it has an AttributeError
#188
by
waleyWang
- opened
I have 4 A100s with 80G. I tried to infer this model, but it always has an error. My code is below:
def main(args):
#device = "cuda" if torch.cuda.is_available() else "cpu"
if args.flash_attn:
replace_llama_attn(inference=True)
#multi GPU
memory = '80GiB'
cuda_list = os.environ.get('CUDA_VISIBLE_DEVICES', '')
no_split_module_classes = LlamaForCausalLM._no_split_modules
if cuda_list:
cuda_list = cuda_list.split(',')
else:
cuda_list = [str(i) for i in range(torch.cuda.device_count())]
max_memory = {int(cuda): memory for cuda in cuda_list}
config = LlamaConfig.from_pretrained(args.base_model)
with init_empty_weights():
model = LlamaForCausalLM._from_config(config, torch_dtype=torch.float16)
#torch_dtype=torch.float16 if device == "cuda" else torch.float32
device_map = infer_auto_device_map(model, max_memory=max_memory, no_split_module_classes=LlamaForCausalLM._no_split_modules)
print('cuda_list',cuda_list)
print('max_memory',max_memory)
print('device_map',device_map)
load_checkpoint_in_model(model, args.base_model, device_map=device_map)
model = dispatch_model(model, device_map=device_map)
tokenizer = transformers.AutoTokenizer.from_pretrained(
args.base_model,
cache_dir=args.cache_dir,
)
torch.set_grad_enabled(False)
model.eval()
but it has the error:
Traceback (most recent call last):
File "model/LongLoRA/inference_all_accelerate.py", line 196, in <module>
main(args)
File "model/LongLoRA/inference_all_accelerate.py", line 126, in main
load_checkpoint_in_model(model, args.base_model, device_map=device_map)
File "llm_env/lib/python3.10/site-packages/accelerate/utils/modeling.py", line 1625, in load_checkpoint_in_model
set_module_tensor_to_device(
File "lib/python3.10/site-packages/accelerate/utils/modeling.py", line 311, in set_module_tensor_to_device
new_module = getattr(module, split)
File "llm_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1688, in __getattr__
raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'LlamaDecoderLayer' object has no attribute 'block_sparse_moe'