|
name: "0428_clip_subsp+pk_sal_perceiver=256_01_4096_8_udt=03" |
|
|
|
|
|
|
|
|
|
training: |
|
steps: 500000 |
|
use_amp: true |
|
ckpt_path: "" |
|
base_lr: 1.e-4 |
|
gradient_clip_val: 5.0 |
|
gradient_clip_algorithm: "norm" |
|
every_n_train_steps: 5000 |
|
val_check_interval: 1024 |
|
limit_val_batches: 16 |
|
|
|
|
|
dataset: |
|
target: michelangelo.data.asl_torch_dataset.MultiAlignedShapeImageTextModule |
|
params: |
|
batch_size: 38 |
|
num_workers: 4 |
|
val_num_workers: 4 |
|
buffer_size: 256 |
|
return_normal: true |
|
random_crop: false |
|
surface_sampling: true |
|
pc_size: &pc_size 4096 |
|
image_size: 384 |
|
mean: &mean [0.5, 0.5, 0.5] |
|
std: &std [0.5, 0.5, 0.5] |
|
|
|
cond_stage_key: "text" |
|
|
|
meta_info: |
|
3D-FUTURE: |
|
render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE" |
|
|
|
ABO: |
|
render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/ABO" |
|
|
|
GSO: |
|
render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/GSO" |
|
|
|
TOYS4K: |
|
render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/TOYS4K" |
|
|
|
3DCaricShop: |
|
render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop" |
|
|
|
Thingi10K: |
|
render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/Thingi10K" |
|
|
|
shapenet: |
|
render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/shapenet" |
|
|
|
pokemon: |
|
render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/pokemon" |
|
|
|
objaverse: |
|
render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders" |
|
tar_folder: "/root/workspace/datasets/make_tars/objaverse" |
|
|
|
model: |
|
target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser |
|
params: |
|
first_stage_config: |
|
target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule |
|
params: |
|
|
|
shape_module_cfg: |
|
target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver |
|
params: |
|
num_latents: &num_latents 256 |
|
embed_dim: &embed_dim 64 |
|
point_feats: 3 |
|
num_freqs: 8 |
|
include_pi: false |
|
heads: 12 |
|
width: 768 |
|
num_encoder_layers: 8 |
|
num_decoder_layers: 16 |
|
use_ln_post: true |
|
init_scale: 0.25 |
|
qkv_bias: false |
|
use_checkpoint: true |
|
aligned_module_cfg: |
|
target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule |
|
params: |
|
clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14" |
|
|
|
loss_cfg: |
|
target: torch.nn.Identity |
|
|
|
cond_stage_config: |
|
target: michelangelo.models.conditional_encoders.encoder_factory.FrozenAlignedCLIPTextEmbedder |
|
params: |
|
version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14" |
|
zero_embedding_radio: 0.1 |
|
max_length: 77 |
|
|
|
first_stage_key: "surface" |
|
cond_stage_key: "text" |
|
scale_by_std: false |
|
|
|
denoiser_cfg: |
|
target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser |
|
params: |
|
input_channels: *embed_dim |
|
output_channels: *embed_dim |
|
n_ctx: *num_latents |
|
width: 768 |
|
layers: 8 |
|
heads: 12 |
|
context_dim: 768 |
|
init_scale: 1.0 |
|
skip_ln: true |
|
use_checkpoint: true |
|
|
|
scheduler_cfg: |
|
guidance_scale: 7.5 |
|
num_inference_steps: 50 |
|
eta: 0.0 |
|
|
|
noise: |
|
target: diffusers.schedulers.DDPMScheduler |
|
params: |
|
num_train_timesteps: 1000 |
|
beta_start: 0.00085 |
|
beta_end: 0.012 |
|
beta_schedule: "scaled_linear" |
|
variance_type: "fixed_small" |
|
clip_sample: false |
|
denoise: |
|
target: diffusers.schedulers.DDIMScheduler |
|
params: |
|
num_train_timesteps: 1000 |
|
beta_start: 0.00085 |
|
beta_end: 0.012 |
|
beta_schedule: "scaled_linear" |
|
clip_sample: false |
|
set_alpha_to_one: false |
|
steps_offset: 1 |
|
|
|
optimizer_cfg: |
|
optimizer: |
|
target: torch.optim.AdamW |
|
params: |
|
betas: [0.9, 0.99] |
|
eps: 1.e-6 |
|
weight_decay: 1.e-2 |
|
|
|
scheduler: |
|
target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler |
|
params: |
|
warm_up_steps: 5000 |
|
f_start: 1.e-6 |
|
f_min: 1.e-3 |
|
f_max: 1.0 |
|
|
|
loss_cfg: |
|
loss_type: "mse" |
|
|
|
logger: |
|
target: michelangelo.utils.trainings.mesh_log_callback.TextConditionalASLDiffuserLogger |
|
params: |
|
step_frequency: 1000 |
|
num_samples: 4 |
|
sample_times: 4 |
|
bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1] |
|
octree_depth: 7 |
|
num_chunks: 10000 |
|
|