czczup commited on
Commit
59ef538
1 Parent(s): bb36861

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +4 -0
  2. modeling_internvl_chat.py +3 -3
README.md CHANGED
@@ -100,6 +100,7 @@ model = AutoModel.from_pretrained(
100
  path,
101
  torch_dtype=torch.bfloat16,
102
  low_cpu_mem_usage=True,
 
103
  trust_remote_code=True).eval().cuda()
104
  ```
105
 
@@ -114,6 +115,7 @@ model = AutoModel.from_pretrained(
114
  torch_dtype=torch.bfloat16,
115
  load_in_8bit=True,
116
  low_cpu_mem_usage=True,
 
117
  trust_remote_code=True).eval()
118
  ```
119
 
@@ -160,6 +162,7 @@ model = AutoModel.from_pretrained(
160
  path,
161
  torch_dtype=torch.bfloat16,
162
  low_cpu_mem_usage=True,
 
163
  trust_remote_code=True,
164
  device_map=device_map).eval()
165
  ```
@@ -256,6 +259,7 @@ model = AutoModel.from_pretrained(
256
  path,
257
  torch_dtype=torch.bfloat16,
258
  low_cpu_mem_usage=True,
 
259
  trust_remote_code=True).eval().cuda()
260
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
261
 
 
100
  path,
101
  torch_dtype=torch.bfloat16,
102
  low_cpu_mem_usage=True,
103
+ use_flash_attn=True,
104
  trust_remote_code=True).eval().cuda()
105
  ```
106
 
 
115
  torch_dtype=torch.bfloat16,
116
  load_in_8bit=True,
117
  low_cpu_mem_usage=True,
118
+ use_flash_attn=True,
119
  trust_remote_code=True).eval()
120
  ```
121
 
 
162
  path,
163
  torch_dtype=torch.bfloat16,
164
  low_cpu_mem_usage=True,
165
+ use_flash_attn=True,
166
  trust_remote_code=True,
167
  device_map=device_map).eval()
168
  ```
 
259
  path,
260
  torch_dtype=torch.bfloat16,
261
  low_cpu_mem_usage=True,
262
+ use_flash_attn=True,
263
  trust_remote_code=True).eval().cuda()
264
  tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True, use_fast=False)
265
 
modeling_internvl_chat.py CHANGED
@@ -10,15 +10,14 @@ import torch.utils.checkpoint
10
  import transformers
11
  from torch import nn
12
  from torch.nn import CrossEntropyLoss
13
- from transformers import (AutoModel, GenerationConfig, LlamaForCausalLM,
14
- LlamaTokenizer)
15
  from transformers.modeling_outputs import CausalLMOutputWithPast
16
  from transformers.modeling_utils import PreTrainedModel
17
  from transformers.utils import ModelOutput, logging
18
 
19
  from .configuration_internvl_chat import InternVLChatConfig
20
  from .conversation import get_conv_template
21
- from .modeling_intern_vit import InternVisionModel
22
  from .modeling_internlm2 import InternLM2ForCausalLM
23
 
24
  logger = logging.get_logger(__name__)
@@ -50,6 +49,7 @@ class InternVLChatModel(PreTrainedModel):
50
  self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
51
  self.downsample_ratio = config.downsample_ratio
52
  self.ps_version = config.ps_version
 
53
  config.vision_config.use_flash_attn = True if use_flash_attn else False
54
  config.llm_config.attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
55
 
 
10
  import transformers
11
  from torch import nn
12
  from torch.nn import CrossEntropyLoss
13
+ from transformers import AutoModel, GenerationConfig, LlamaForCausalLM
 
14
  from transformers.modeling_outputs import CausalLMOutputWithPast
15
  from transformers.modeling_utils import PreTrainedModel
16
  from transformers.utils import ModelOutput, logging
17
 
18
  from .configuration_internvl_chat import InternVLChatConfig
19
  from .conversation import get_conv_template
20
+ from .modeling_intern_vit import InternVisionModel, has_flash_attn
21
  from .modeling_internlm2 import InternLM2ForCausalLM
22
 
23
  logger = logging.get_logger(__name__)
 
49
  self.num_image_token = int((image_size // patch_size) ** 2 * (config.downsample_ratio ** 2))
50
  self.downsample_ratio = config.downsample_ratio
51
  self.ps_version = config.ps_version
52
+ use_flash_attn = use_flash_attn if has_flash_attn else False
53
  config.vision_config.use_flash_attn = True if use_flash_attn else False
54
  config.llm_config.attn_implementation = 'flash_attention_2' if use_flash_attn else 'eager'
55