{"domains_in": ["rgb@224", "caption", "det", "tok_rgb@224", "tok_depth@224", "tok_normal@224", "tok_semseg@224", "tok_clip@224"], "domains_out": ["caption", "det", "tok_rgb@224", "tok_depth@224", "tok_normal@224", "tok_semseg@224", "tok_clip@224"], "encoder_depth": 24, "decoder_depth": 24, "dim": 2048, "num_heads": 32, "mlp_ratio": 4, "qkv_bias": false, "proj_bias": false, "mlp_bias": false, "norm_bias": false, "act_layer": "SiLU", "gated_mlp": true, "image_size": 224, "patch_size": 16} |