apolinario's picture
upload clipseg
48fa639
raw
history blame
1.93 kB
configuration:
batch_size: 64
optimizer: torch.optim.AdamW
lr: 0.001
trainer: experiment_setup.train_loop
scorer: experiment_setup.score
model: models.clipseg.CLIPDensePredT
lr_scheduler: cosine
T_max: 20000
eta_min: 0.0001
max_iterations: 20000 # <-##########################################
val_interval: null
# dataset
dataset: datasets.phrasecut.PhraseCut # <-----------------
split_mode: pascal_test
split: train
mask: text_and_crop_blur_highlight352
image_size: 352
negative_prob: 0.2
mix_text_max: 0.5
# general
mix: True # <-----------------
prompt: shuffle+
norm_cond: True
mix_text_min: 0.0
with_visual: True
# model
version: 'ViT-B/16'
extract_layers: [3, 7, 9]
reduce_dim: 64
depth: 3
fix_shift: False # <-##########################################
loss: torch.nn.functional.binary_cross_entropy_with_logits
amp: True
test_configuration_common:
normalize: True
image_size: 352
batch_size: 32
sigmoid: True
split: test
label_support: True
test_configuration:
-
name: pc
metric: metrics.FixedIntervalMetrics
test_dataset: phrasecut
mask: text
-
name: pc-vis
metric: metrics.FixedIntervalMetrics
test_dataset: phrasecut
mask: crop_blur_highlight352
with_visual: True
visual_only: True
columns: [name,
pc_fgiou_best, pc_miou_best, pc_fgiou_0.5,
pc-vis_fgiou_best, pc-vis_miou_best, pc-vis_fgiou_0.5,
duration]
individual_configurations:
- {name: rd64-uni}
- {name: rd64-no-pretrain, not_pretrained: True, lr: 0.0003}
- {name: rd64-no-negatives, negative_prob: 0.0}
- {name: rd64-neg0.5, negative_prob: 0.5}
- {name: rd64-no-visual, with_visual: False, mix: False}
- {name: rd16-uni, reduce_dim: 16}
- {name: rd64-layer3, extract_layers: [3], depth: 1}
- {name: rd64-blur-highlight, mask: text_and_blur_highlight, test_configuration: {mask: blur_highlight}}