Maikou commited on
Commit
ea60be8
1 Parent(s): 926c0cf

delete useless file

Browse files
configs/deploy/clip_aslp_3df+3dc+abo+gso+toy+t10k+obj+sp+pk=256_01_4096_8_ckpt_250000_udt=110M_finetune_500000_deploy.yaml DELETED
@@ -1,181 +0,0 @@
1
- name: "0630_clip_aslp_3df+3dc+abo+gso+toy+t10k+obj+sp+pk=256_01_4096_8_ckpt_250000_udt=110M_finetune_500000"
2
- #wandb:
3
- # project: "image_diffuser"
4
- # offline: false
5
-
6
-
7
- training:
8
- steps: 500000
9
- use_amp: true
10
- ckpt_path: ""
11
- base_lr: 1.e-4
12
- gradient_clip_val: 5.0
13
- gradient_clip_algorithm: "norm"
14
- every_n_train_steps: 5000
15
- val_check_interval: 1024
16
- limit_val_batches: 16
17
-
18
- dataset:
19
- target: michelangelo.data.asl_webdataset.MultiAlignedShapeLatentModule
20
- params:
21
- batch_size: 38
22
- num_workers: 4
23
- val_num_workers: 4
24
- buffer_size: 256
25
- return_normal: true
26
- random_crop: false
27
- surface_sampling: true
28
- pc_size: &pc_size 4096
29
- image_size: 384
30
- mean: &mean [0.5, 0.5, 0.5]
31
- std: &std [0.5, 0.5, 0.5]
32
- cond_stage_key: "image"
33
-
34
- meta_info:
35
- 3D-FUTURE:
36
- render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders"
37
- tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE"
38
-
39
- ABO:
40
- render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders"
41
- tar_folder: "/root/workspace/datasets/make_tars/ABO"
42
-
43
- GSO:
44
- render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders"
45
- tar_folder: "/root/workspace/datasets/make_tars/GSO"
46
-
47
- TOYS4K:
48
- render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders"
49
- tar_folder: "/root/workspace/datasets/make_tars/TOYS4K"
50
-
51
- 3DCaricShop:
52
- render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders"
53
- tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop"
54
-
55
- Thingi10K:
56
- render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders"
57
- tar_folder: "/root/workspace/datasets/make_tars/Thingi10K"
58
-
59
- shapenet:
60
- render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders"
61
- tar_folder: "/root/workspace/datasets/make_tars/shapenet"
62
-
63
- pokemon:
64
- render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders"
65
- tar_folder: "/root/workspace/datasets/make_tars/pokemon"
66
-
67
- objaverse:
68
- render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders"
69
- tar_folder: "/root/workspace/datasets/make_tars/objaverse"
70
-
71
- model:
72
- target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
73
- params:
74
- first_stage_config:
75
- target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
76
- params:
77
- shape_module_cfg:
78
- target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
79
- params:
80
- num_latents: &num_latents 256
81
- embed_dim: &embed_dim 64
82
- point_feats: 3 # normal
83
- num_freqs: 8
84
- include_pi: false
85
- heads: 12
86
- width: 768
87
- num_encoder_layers: 8
88
- num_decoder_layers: 16
89
- use_ln_post: true
90
- init_scale: 0.25
91
- qkv_bias: false
92
- use_checkpoint: false
93
- aligned_module_cfg:
94
- target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
95
- params:
96
- clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
97
- # clip_model_version: "/root/workspace/checkpoints/clip/clip-vit-large-patch14"
98
-
99
- loss_cfg:
100
- target: torch.nn.Identity
101
-
102
- cond_stage_config:
103
- target: michelangelo.models.conditional_encoders.encoder_factory.FrozenCLIPImageGridEmbedder
104
- params:
105
- version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
106
- # version: "/root/workspace/checkpoints/clip/clip-vit-large-patch14"
107
- zero_embedding_radio: 0.1
108
-
109
- first_stage_key: "surface"
110
- cond_stage_key: "image"
111
- scale_by_std: false
112
-
113
- denoiser_cfg:
114
- target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
115
- params:
116
- input_channels: *embed_dim
117
- output_channels: *embed_dim
118
- n_ctx: *num_latents
119
- width: 768
120
- layers: 6 # 2 * 6 + 1 = 13
121
- heads: 12
122
- context_dim: 1024
123
- init_scale: 1.0
124
- skip_ln: true
125
- use_checkpoint: true
126
-
127
- scheduler_cfg:
128
- guidance_scale: 7.5
129
- num_inference_steps: 50
130
- eta: 0.0
131
-
132
- noise:
133
- target: diffusers.schedulers.DDPMScheduler
134
- params:
135
- num_train_timesteps: 1000
136
- beta_start: 0.00085
137
- beta_end: 0.012
138
- beta_schedule: "scaled_linear"
139
- variance_type: "fixed_small"
140
- clip_sample: false
141
- denoise:
142
- target: diffusers.schedulers.DDIMScheduler
143
- params:
144
- num_train_timesteps: 1000
145
- beta_start: 0.00085
146
- beta_end: 0.012
147
- beta_schedule: "scaled_linear"
148
- clip_sample: false # clip sample to -1~1
149
- set_alpha_to_one: false
150
- steps_offset: 1
151
-
152
- optimizer_cfg:
153
- optimizer:
154
- target: torch.optim.AdamW
155
- params:
156
- betas: [0.9, 0.99]
157
- eps: 1.e-6
158
- weight_decay: 1.e-2
159
-
160
- scheduler:
161
- target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
162
- params:
163
- warm_up_steps: 5000
164
- f_start: 1.e-6
165
- f_min: 1.e-3
166
- f_max: 1.0
167
-
168
- loss_cfg:
169
- loss_type: "mse"
170
-
171
- logger:
172
- target: michelangelo.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
173
- params:
174
- step_frequency: 2000
175
- num_samples: 4
176
- sample_times: 4
177
- mean: *mean
178
- std: *std
179
- bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1]
180
- octree_depth: 7
181
- num_chunks: 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
configs/deploy/clip_sp+pk_aslperceiver=256_01_4096_8_udt=03.yaml DELETED
@@ -1,180 +0,0 @@
1
- name: "0428_clip_subsp+pk_sal_perceiver=256_01_4096_8_udt=03"
2
- #wandb:
3
- # project: "image_diffuser"
4
- # offline: false
5
-
6
- training:
7
- steps: 500000
8
- use_amp: true
9
- ckpt_path: ""
10
- base_lr: 1.e-4
11
- gradient_clip_val: 5.0
12
- gradient_clip_algorithm: "norm"
13
- every_n_train_steps: 5000
14
- val_check_interval: 1024
15
- limit_val_batches: 16
16
-
17
- # dataset
18
- dataset:
19
- target: michelangelo.data.asl_torch_dataset.MultiAlignedShapeImageTextModule
20
- params:
21
- batch_size: 38
22
- num_workers: 4
23
- val_num_workers: 4
24
- buffer_size: 256
25
- return_normal: true
26
- random_crop: false
27
- surface_sampling: true
28
- pc_size: &pc_size 4096
29
- image_size: 384
30
- mean: &mean [0.5, 0.5, 0.5]
31
- std: &std [0.5, 0.5, 0.5]
32
-
33
- cond_stage_key: "text"
34
-
35
- meta_info:
36
- 3D-FUTURE:
37
- render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders"
38
- tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE"
39
-
40
- ABO:
41
- render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders"
42
- tar_folder: "/root/workspace/datasets/make_tars/ABO"
43
-
44
- GSO:
45
- render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders"
46
- tar_folder: "/root/workspace/datasets/make_tars/GSO"
47
-
48
- TOYS4K:
49
- render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders"
50
- tar_folder: "/root/workspace/datasets/make_tars/TOYS4K"
51
-
52
- 3DCaricShop:
53
- render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders"
54
- tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop"
55
-
56
- Thingi10K:
57
- render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders"
58
- tar_folder: "/root/workspace/datasets/make_tars/Thingi10K"
59
-
60
- shapenet:
61
- render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders"
62
- tar_folder: "/root/workspace/datasets/make_tars/shapenet"
63
-
64
- pokemon:
65
- render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders"
66
- tar_folder: "/root/workspace/datasets/make_tars/pokemon"
67
-
68
- objaverse:
69
- render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders"
70
- tar_folder: "/root/workspace/datasets/make_tars/objaverse"
71
-
72
- model:
73
- target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
74
- params:
75
- first_stage_config:
76
- target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
77
- params:
78
- # ckpt_path: "/root/workspace/cq_workspace/michelangelo/experiments/aligned_shape_latents/clip_aslperceiver_sp+pk_01_01/ckpt/ckpt-step=00230000.ckpt"
79
- shape_module_cfg:
80
- target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
81
- params:
82
- num_latents: &num_latents 256
83
- embed_dim: &embed_dim 64
84
- point_feats: 3 # normal
85
- num_freqs: 8
86
- include_pi: false
87
- heads: 12
88
- width: 768
89
- num_encoder_layers: 8
90
- num_decoder_layers: 16
91
- use_ln_post: true
92
- init_scale: 0.25
93
- qkv_bias: false
94
- use_checkpoint: true
95
- aligned_module_cfg:
96
- target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
97
- params:
98
- clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
99
-
100
- loss_cfg:
101
- target: torch.nn.Identity
102
-
103
- cond_stage_config:
104
- target: michelangelo.models.conditional_encoders.encoder_factory.FrozenAlignedCLIPTextEmbedder
105
- params:
106
- version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
107
- zero_embedding_radio: 0.1
108
- max_length: 77
109
-
110
- first_stage_key: "surface"
111
- cond_stage_key: "text"
112
- scale_by_std: false
113
-
114
- denoiser_cfg:
115
- target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
116
- params:
117
- input_channels: *embed_dim
118
- output_channels: *embed_dim
119
- n_ctx: *num_latents
120
- width: 768
121
- layers: 8 # 2 * 6 + 1 = 13
122
- heads: 12
123
- context_dim: 768
124
- init_scale: 1.0
125
- skip_ln: true
126
- use_checkpoint: true
127
-
128
- scheduler_cfg:
129
- guidance_scale: 7.5
130
- num_inference_steps: 50
131
- eta: 0.0
132
-
133
- noise:
134
- target: diffusers.schedulers.DDPMScheduler
135
- params:
136
- num_train_timesteps: 1000
137
- beta_start: 0.00085
138
- beta_end: 0.012
139
- beta_schedule: "scaled_linear"
140
- variance_type: "fixed_small"
141
- clip_sample: false
142
- denoise:
143
- target: diffusers.schedulers.DDIMScheduler
144
- params:
145
- num_train_timesteps: 1000
146
- beta_start: 0.00085
147
- beta_end: 0.012
148
- beta_schedule: "scaled_linear"
149
- clip_sample: false # clip sample to -1~1
150
- set_alpha_to_one: false
151
- steps_offset: 1
152
-
153
- optimizer_cfg:
154
- optimizer:
155
- target: torch.optim.AdamW
156
- params:
157
- betas: [0.9, 0.99]
158
- eps: 1.e-6
159
- weight_decay: 1.e-2
160
-
161
- scheduler:
162
- target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
163
- params:
164
- warm_up_steps: 5000
165
- f_start: 1.e-6
166
- f_min: 1.e-3
167
- f_max: 1.0
168
-
169
- loss_cfg:
170
- loss_type: "mse"
171
-
172
- logger:
173
- target: michelangelo.utils.trainings.mesh_log_callback.TextConditionalASLDiffuserLogger
174
- params:
175
- step_frequency: 1000
176
- num_samples: 4
177
- sample_times: 4
178
- bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1]
179
- octree_depth: 7
180
- num_chunks: 10000