yangheng commited on
Commit
f2a9e29
1 Parent(s): 3162e55

Delete configuration_omnigenome.py

Browse files
Files changed (1) hide show
  1. configuration_omnigenome.py +0 -307
configuration_omnigenome.py DELETED
@@ -1,307 +0,0 @@
1
- # coding=utf-8
2
- # Copyright 2022 Meta and The HuggingFace Inc. team. All rights reserved.
3
- #
4
- # Licensed under the Apache License, Version 2.0 (the "License");
5
- # you may not use this file except in compliance with the License.
6
- # You may obtain a copy of the License at
7
- #
8
- # http://www.apache.org/licenses/LICENSE-2.0
9
- #
10
- # Unless required by applicable law or agreed to in writing, software
11
- # distributed under the License is distributed on an "AS IS" BASIS,
12
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- # See the License for the specific language governing permissions and
14
- # limitations under the License.
15
- """ OmniGenome model configuration"""
16
-
17
- from dataclasses import asdict, dataclass
18
- from typing import Optional
19
-
20
- from transformers import PretrainedConfig
21
-
22
- from transformers.utils import logging
23
-
24
- logger = logging.get_logger(__name__)
25
-
26
- # TODO Update this
27
- OmniGenome_PRETRAINED_CONFIG_ARCHIVE_MAP = {
28
- "yangheng/OmniGenome-52M": "https://huggingface.co/yangheng/OmniGenome-52M/resolve/main/config.json",
29
- "yangheng/OmniGenome-186M": "https://huggingface.co/yangheng/OmniGenome-186M/resolve/main/config.json",
30
- # See all OmniGenome models at https://huggingface.co/models?filter=OmniGenome
31
- }
32
-
33
-
34
- class OmniGenomeConfig(PretrainedConfig):
35
- r"""
36
- This is the configuration class to store the configuration of a [`OmniGenomeModel`]. It is used to instantiate a OmniGenome model
37
- according to the specified arguments, defining the model architecture. Instantiating a configuration with the
38
- defaults will yield a similar configuration to that of the OmniGenome
39
- [yangheng/OmniGenome-52M](https://huggingface.co/yangheng/OmniGenome-52M) architecture.
40
-
41
- Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
42
- documentation from [`PretrainedConfig`] for more information.
43
-
44
-
45
- Args:
46
- vocab_size (`int`, *optional*):
47
- Vocabulary size of the OmniGenome model. Defines the number of different tokens that can be represented by the
48
- `inputs_ids` passed when calling [`OmniGenomeModel`].
49
- mask_token_id (`int`, *optional*):
50
- The index of the mask token in the vocabulary. This must be included in the config because of the
51
- "mask-dropout" scaling trick, which will scale the inputs depending on the number of masked tokens.
52
- pad_token_id (`int`, *optional*):
53
- The index of the padding token in the vocabulary. This must be included in the config because certain parts
54
- of the OmniGenome code use this instead of the attention mask.
55
- hidden_size (`int`, *optional*, defaults to 768):
56
- Dimensionality of the encoder layers and the pooler layer.
57
- num_hidden_layers (`int`, *optional*, defaults to 12):
58
- Number of hidden layers in the Transformer encoder.
59
- num_attention_heads (`int`, *optional*, defaults to 12):
60
- Number of attention heads for each attention layer in the Transformer encoder.
61
- intermediate_size (`int`, *optional*, defaults to 3072):
62
- Dimensionality of the "intermediate" (often named feed-forward) layer in the Transformer encoder.
63
- hidden_dropout_prob (`float`, *optional*, defaults to 0.1):
64
- The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
65
- attention_probs_dropout_prob (`float`, *optional*, defaults to 0.1):
66
- The dropout ratio for the attention probabilities.
67
- max_position_embeddings (`int`, *optional*, defaults to 1026):
68
- The maximum sequence length that this model might ever be used with. Typically set this to something large
69
- just in case (e.g., 512 or 1024 or 2048).
70
- initializer_range (`float`, *optional*, defaults to 0.02):
71
- The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
72
- layer_norm_eps (`float`, *optional*, defaults to 1e-12):
73
- The epsilon used by the layer normalization layers.
74
- position_embedding_type (`str`, *optional*, defaults to `"absolute"`):
75
- Type of position embedding. Choose one of `"absolute"`, `"relative_key"`, `"relative_key_query", "rotary"`.
76
- For positional embeddings use `"absolute"`. For more information on `"relative_key"`, please refer to
77
- [Self-Attention with Relative Position Representations (Shaw et al.)](https://arxiv.org/abs/1803.02155).
78
- For more information on `"relative_key_query"`, please refer to *Method 4* in [Improve Transformer Models
79
- with Better Relative Position Embeddings (Huang et al.)](https://arxiv.org/abs/2009.13658).
80
- is_decoder (`bool`, *optional*, defaults to `False`):
81
- Whether the model is used as a decoder or not. If `False`, the model is used as an encoder.
82
- use_cache (`bool`, *optional*, defaults to `True`):
83
- Whether or not the model should return the last key/values attentions (not used by all models). Only
84
- relevant if `config.is_decoder=True`.
85
- emb_layer_norm_before (`bool`, *optional*):
86
- Whether to apply layer normalization after embeddings but before the main stem of the network.
87
- token_dropout (`bool`, defaults to `False`):
88
- When this is enabled, masked tokens are treated as if they had been dropped out by input dropout.
89
-
90
- Examples:
91
-
92
- ```python
93
- # >>> from transformers import OmniGenomeModel, OmniGenomeConfig
94
- #
95
- # >>> # Initializing a OmniGenome yangheng/OmniGenome-52M style configuration >>> configuration = OmniGenomeConfig()
96
- #
97
- # >>> # Initializing a model from the configuration >>> model = OmniGenomeModel(configuration)
98
- #
99
- # >>> # Accessing the model configuration >>> configuration = model.config
100
- ```"""
101
-
102
- model_type = "mprna"
103
-
104
- def __init__(
105
- self,
106
- vocab_size=None,
107
- mask_token_id=None,
108
- pad_token_id=None,
109
- hidden_size=768,
110
- num_hidden_layers=12,
111
- num_attention_heads=12,
112
- intermediate_size=3072,
113
- hidden_dropout_prob=0.1,
114
- attention_probs_dropout_prob=0.1,
115
- max_position_embeddings=1026,
116
- initializer_range=0.02,
117
- layer_norm_eps=1e-12,
118
- position_embedding_type="absolute",
119
- use_cache=True,
120
- emb_layer_norm_before=None,
121
- token_dropout=False,
122
- is_folding_model=False,
123
- OmniGenomefold_config=None,
124
- vocab_list=None,
125
- **kwargs,
126
- ):
127
- super().__init__(
128
- pad_token_id=pad_token_id, mask_token_id=mask_token_id, **kwargs
129
- )
130
-
131
- self.vocab_size = vocab_size
132
- self.hidden_size = hidden_size
133
- self.num_hidden_layers = num_hidden_layers
134
- self.num_attention_heads = num_attention_heads
135
- self.intermediate_size = intermediate_size
136
- self.hidden_dropout_prob = hidden_dropout_prob
137
- self.attention_probs_dropout_prob = attention_probs_dropout_prob
138
- self.max_position_embeddings = max_position_embeddings
139
- self.initializer_range = initializer_range
140
- self.layer_norm_eps = layer_norm_eps
141
- self.position_embedding_type = position_embedding_type
142
- self.use_cache = use_cache
143
- self.emb_layer_norm_before = emb_layer_norm_before
144
- self.token_dropout = token_dropout
145
- self.is_folding_model = is_folding_model
146
- self.OmniGenomefold_config = None
147
- self.vocab_list = None
148
- if self.OmniGenomefold_config is not None and getattr(
149
- self.OmniGenomefold_config, "use_OmniGenome_attn_map", False
150
- ):
151
- raise ValueError(
152
- "The HuggingFace port of OmniGenomeFold does not support use_OmniGenome_attn_map at this time!"
153
- )
154
-
155
- def to_dict(self):
156
- """
157
- Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
158
-
159
- Returns:
160
- `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
161
- """
162
- output = super().to_dict()
163
- return output
164
-
165
-
166
- @dataclass
167
- class TrunkConfig:
168
- num_blocks: int = 48
169
- sequence_state_dim: int = 1024
170
- pairwise_state_dim: int = 128
171
- sequence_head_width: int = 32
172
- pairwise_head_width: int = 32
173
- position_bins: int = 32
174
- dropout: float = 0
175
- layer_drop: float = 0
176
- cpu_grad_checkpoint: bool = False
177
- max_recycles: int = 4
178
- chunk_size: Optional[int] = 128
179
- structure_module: "StructureModuleConfig" = None
180
-
181
- def __post_init__(self):
182
- if self.structure_module is None:
183
- self.structure_module = StructureModuleConfig()
184
- elif isinstance(self.structure_module, dict):
185
- self.structure_module = StructureModuleConfig(**self.structure_module)
186
-
187
- if self.max_recycles <= 0:
188
- raise ValueError(
189
- f"`max_recycles` should be positive, got {self.max_recycles}."
190
- )
191
- if self.sequence_state_dim % self.sequence_state_dim != 0:
192
- raise ValueError(
193
- "`sequence_state_dim` should be a round multiple of `sequence_state_dim`, got"
194
- f" {self.sequence_state_dim} and {self.sequence_state_dim}."
195
- )
196
- if self.pairwise_state_dim % self.pairwise_state_dim != 0:
197
- raise ValueError(
198
- "`pairwise_state_dim` should be a round multiple of `pairwise_state_dim`, got"
199
- f" {self.pairwise_state_dim} and {self.pairwise_state_dim}."
200
- )
201
-
202
- sequence_num_heads = self.sequence_state_dim // self.sequence_head_width
203
- pairwise_num_heads = self.pairwise_state_dim // self.pairwise_head_width
204
-
205
- if self.sequence_state_dim != sequence_num_heads * self.sequence_head_width:
206
- raise ValueError(
207
- "`sequence_state_dim` should be equal to `sequence_num_heads * sequence_head_width, got"
208
- f" {self.sequence_state_dim} != {sequence_num_heads} * {self.sequence_head_width}."
209
- )
210
- if self.pairwise_state_dim != pairwise_num_heads * self.pairwise_head_width:
211
- raise ValueError(
212
- "`pairwise_state_dim` should be equal to `pairwise_num_heads * pairwise_head_width, got"
213
- f" {self.pairwise_state_dim} != {pairwise_num_heads} * {self.pairwise_head_width}."
214
- )
215
- if self.pairwise_state_dim % 2 != 0:
216
- raise ValueError(
217
- f"`pairwise_state_dim` should be even, got {self.pairwise_state_dim}."
218
- )
219
-
220
- if self.dropout >= 0.4:
221
- raise ValueError(
222
- f"`dropout` should not be greater than 0.4, got {self.dropout}."
223
- )
224
-
225
- def to_dict(self):
226
- """
227
- Serializes this instance to a Python dictionary. Override the default [`~PretrainedConfig.to_dict`].
228
-
229
- Returns:
230
- `Dict[str, any]`: Dictionary of all the attributes that make up this configuration instance,
231
- """
232
- output = asdict(self)
233
- output["structure_module"] = self.structure_module.to_dict()
234
- return output
235
-
236
-
237
- @dataclass
238
- class StructureModuleConfig:
239
- """
240
- Args:
241
- sequence_dim:
242
- Single representation channel dimension
243
- pairwise_dim:
244
- Pair representation channel dimension
245
- ipa_dim:
246
- IPA hidden channel dimension
247
- resnet_dim:
248
- Angle resnet (Alg. 23 lines 11-14) hidden channel dimension
249
- num_heads_ipa:
250
- Number of IPA heads
251
- num_qk_points:
252
- Number of query/key points to generate during IPA
253
- num_v_points:
254
- Number of value points to generate during IPA
255
- dropout_rate:
256
- Dropout rate used throughout the layer
257
- num_blocks:
258
- Number of structure module blocks
259
- num_transition_layers:
260
- Number of layers in the single representation transition (Alg. 23 lines 8-9)
261
- num_resnet_blocks:
262
- Number of blocks in the angle resnet
263
- num_angles:
264
- Number of angles to generate in the angle resnet
265
- trans_scale_factor:
266
- Scale of single representation transition hidden dimension
267
- epsilon:
268
- Small number used in angle resnet normalization
269
- inf:
270
- Large number used for attention masking
271
- """
272
-
273
- sequence_dim: int = 384
274
- pairwise_dim: int = 128
275
- ipa_dim: int = 16
276
- resnet_dim: int = 128
277
- num_heads_ipa: int = 12
278
- num_qk_points: int = 4
279
- num_v_points: int = 8
280
- dropout_rate: float = 0.1
281
- num_blocks: int = 8
282
- num_transition_layers: int = 1
283
- num_resnet_blocks: int = 2
284
- num_angles: int = 7
285
- trans_scale_factor: int = 10
286
- epsilon: float = 1e-8
287
- inf: float = 1e5
288
-
289
- def to_dict(self):
290
- return asdict(self)
291
-
292
-
293
- def get_default_vocab_list():
294
- return (
295
- "<cls>",
296
- "<pad>",
297
- "<eos>",
298
- "<unk>",
299
- "A",
300
- "C",
301
- "G",
302
- "T",
303
- "U",
304
- "N",
305
- " ",
306
- "<mask>",
307
- )