Image-to-3D
English
wala
depth-map-to-3d
WaLa-DM1-1B / args.json
kamalrahimi's picture
Update args.json
6bfb14e verified
{
"strategy": null,
"gpu_workers": 8,
"restore_path": null,
"prefetch_factor": null,
"matual_precision": "medium",
"max_concurrency": 1500,
"multipart_size": 8388608,
"print_every": 100,
"save_every": 100,
"validation_every": 15000,
"validation_every_log": 500,
"visualization_every": 15000,
"log_level": "info",
"experiment_type": "max",
"experiment_every": 5,
"fast_dev_run": false,
"limit_val_batches": 0.05,
"filter_path": "filter_list",
"finetune": false,
"finetune_dp_cond": null,
"finetune_dp_cond_type": null,
"ft_train_number": null,
"ft_train_datasets": null,
"val_cnt": null,
"use_even_val": false,
"use_ema": true,
"ema_decay": 0.9999,
"use_ema_weights": true,
"use_compile": true,
"batch_size": 8,
"test_batch_size": 8,
"num_workers": 16,
"test_threshold": 0.05,
"num_gpus": 8,
"seed": 1,
"epochs": 300,
"optimizer": "Adam",
"lr": 0.0001,
"train_mode": "train",
"use_local_storage": false,
"auto_exp_name": null,
"input_type": "Wavelet",
"output_type": "Wavelet",
"encoder_type": "General_Encoder_Down_2",
"decoder_type": "General_Decoder_Up_2",
"encoder_num_tran": 0,
"decoder_num_tran": 0,
"last_feature_transform": null,
"reconstruct_loss_type": "mean",
"quantizer_type": "original",
"normalize_latent": null,
"e_dim": 4,
"n_e": 1024,
"beta": 0.25,
"sample_mode": "bilinear",
"padding": 0.1,
"gamma": 1,
"grid_size": 12,
"t_loss": 1.0,
"num_latent_tokens": 256,
"dataset_name": "all",
"voxel_transform": null,
"num_points": 2048,
"num_sdf_points": 5000,
"categories": null,
"resolution": 256,
"max_depth": 3,
"max_training_level": 2,
"point_num": 16384,
"keep_level": 2,
"data_keep_level": 2,
"wavelet": "bior6.8",
"padding_mode": "constant",
"use_normalization": false,
"use_shift_mean": false,
"start_stage": 0,
"use_adaptive_stage_update": false,
"no_rebalance_loss": true,
"use_compact_indices": true,
"sample_threshold_ratio": 0.03125,
"use_batched_threshold": true,
"use_sample_training": true,
"use_sample_threshold": true,
"div_hyp": 1.0,
"checkpoint": null,
"use_timestamp": false,
"num_iterations": 300000,
"gpu": "0",
"threshold": 0.45,
"sampling_type": null,
"auto_precision": "bf16",
"gradient_clip_val": 1.0,
"dropout": 0.0,
"sdf_points": 20000,
"sdf_sample_type": "mixture",
"sdf_res": 256,
"greater_or_no": true,
"s3_bucket": "build3d-wavelets",
"s3_prefix": "dataset",
"use_s3": true,
"wavelet_transform": "all",
"test_exp_name": "default",
"test_file_name": "model_performance.csv",
"network_type": "latent_uvit",
"diffusion_beta_schedule": "cosine",
"diffusion_step": 1000,
"diffusion_rescale_timestep": 100,
"diffusion_scale_ratio": 1.0,
"diffusion_model_var_type": "FIXED_SMALL",
"diffusion_model_mean_type": "START_X",
"diffusion_loss_type": "MSE",
"diffusion_sampler": "second-order",
"dit_block_type": "cross_dit",
"att_patch_size": 1,
"att_hidden_size": 1152,
"transformer_num_blocks": 32,
"transformer_num_heads": 16,
"transformer_add_num_register": 0,
"unet_model_channels": 128,
"unet_num_res_blocks": [
3
],
"learnable_skip_r": null,
"add_condition_res_ch": 128,
"with_fix_pos": true,
"cond_num_mapping_layers": 0,
"add_condition_time_ch": true,
"add_condition_input_ch": null,
"use_pointcloud_conditions": false,
"pc_encoder_type": "PointNet_Simple",
"use_pointvoxel_encoder": false,
"num_pc_points": 2500,
"use_pc_samples": false,
"sample_num": 2500,
"pc_dims": 1024,
"num_inds": 1024,
"pc_output_dim": 1024,
"use_voxel_conditions": false,
"voxel_context_dim": 1024,
"voxel_dim": 8,
"voxel_resolution": 16,
"use_image_conditions": false,
"use_camera_index": false,
"render_resolution": 384,
"max_images_num": 55,
"image_transform": "dino",
"clip_model_type": "dino-l-14_reg",
"input_view_cnt": 4,
"use_multiple_views_grids": false,
"training_views": null,
"testing_views": null,
"use_depth_conditions": true,
"use_wavelet_conditions": false,
"dp_cond": 0.05,
"scale": 3,
"guidance_type": null,
"dp_cond_type": null,
"use_autoencoder": false,
"checkpoint_type": "last_ft_10000",
"pre_quant": true,
"latent_normalization": false,
"use_autoencoder_ema": false,
"test_log_num": null,
"precision": "bf16",
"gradient_clip_val_2": 1.0,
"wavelet_transform_2": null,
"weight_decay": 0.0,
"opt_eps": 1e-08,
"latent_lr": 0.0001,
"pin_memory": false,
"n_px": 224,
"cond_emb_dim": 1024,
"cond_grid_size": 256,
"cond_grid_emb_size": 1024,
"condition_dim": 1024,
"num_cond_vectors": 256
}