minDALLE / configs /transfer-imagenet-clscond-gen.yaml
valhalla's picture
init
b442155
raw
history blame
821 Bytes
dataset:
dataset: imagenet
image_resolution: 256
stage1:
type: vqgan
embed_dim: 256
n_embed: 16384
hparams:
double_z: False
z_channels: 256
resolution: 256
in_channels: 3
out_ch: 3
ch: 128
ch_mult: [1, 1, 2, 2, 4]
num_res_blocks: 2
attn_resolutions: [16]
pdrop: 0.0
stage2:
type: igpt
use_cls_cond: True
vocab_size_img: 16384
hparams:
embed_dim: 1536
n_layers: 42
n_heads: 24
n_dense_layers: 42
ctx_len_img: 256
embd_pdrop: 0.0
resid_pdrop: 0.0
attn_pdrop: 0.0
mlp_bias: True
attn_bias: True
gelu_use_approx: False
n_classes: 1000
optimizer:
opt_type: adamW
base_lr: 1e-4
weight_decay: 0.0
betas: [0.9, 0.95]
grad_clip_norm: 4.0
experiment:
local_batch_size: 2
total_batch_size: 512
epochs: 8