Spaces:

Traly
/

SAM-DiffSR

Runtime error

App Files Files Community

Traly commited on Mar 1

Commit

193c713

•

1 Parent(s): 191a4b9

init

Browse files

Files changed (50) hide show

app.py +113 -0
images/0801x4.png +0 -0
images/0804x4.png +0 -0
images/0809x4.png +0 -0
images/lion.jpg +0 -0
images/logo.png +0 -0
requirements.txt +27 -0
sam_diffsr/configs/base/config_base.yaml +41 -0
sam_diffsr/configs/base/diffsr_base.yaml +41 -0
sam_diffsr/configs/base/sr_base.yaml +11 -0
sam_diffsr/configs/data/df2k4x.yaml +11 -0
sam_diffsr/configs/data/df2k4x_sam.yaml +11 -0
sam_diffsr/configs/diffsr_df2k4x.yaml +18 -0
sam_diffsr/configs/rrdb/df2k4x_pretrain.yaml +14 -0
sam_diffsr/configs/sam/sam_diffsr_df2k4x.yaml +26 -0
sam_diffsr/models_sr/__init__.py +0 -0
sam_diffsr/models_sr/commons.py +317 -0
sam_diffsr/models_sr/diffsr_modules.py +177 -0
sam_diffsr/models_sr/diffusion.py +291 -0
sam_diffsr/models_sr/diffusion_sam.py +90 -0
sam_diffsr/models_sr/module_util.py +58 -0
sam_diffsr/tasks/__init__.py +0 -0
sam_diffsr/tasks/infer.py +81 -0
sam_diffsr/tasks/rrdb.py +68 -0
sam_diffsr/tasks/rrdb_sam.py +49 -0
sam_diffsr/tasks/srdiff.py +76 -0
sam_diffsr/tasks/srdiff_df2k.py +119 -0
sam_diffsr/tasks/srdiff_df2k_sam.py +211 -0
sam_diffsr/tasks/trainer.py +346 -0
sam_diffsr/tb_logs/events.out.tfevents.1709283169.wangchengchengdeMacBook-Pro.local.99018.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709284054.wangchengchengdeMacBook-Pro.local.99188.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709284076.wangchengchengdeMacBook-Pro.local.99198.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709284101.wangchengchengdeMacBook-Pro.local.99211.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709284193.wangchengchengdeMacBook-Pro.local.99233.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709284415.wangchengchengdeMacBook-Pro.local.99289.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709284460.wangchengchengdeMacBook-Pro.local.99308.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709284491.wangchengchengdeMacBook-Pro.local.99315.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709285127.wangchengchengdeMacBook-Pro.local.785.0 +3 -0
sam_diffsr/tb_logs/events.out.tfevents.1709285146.wangchengchengdeMacBook-Pro.local.901.0 +3 -0
sam_diffsr/tools/caculate_iqa.py +136 -0
sam_diffsr/tools/visualize_sam_mask.py +20 -0
sam_diffsr/utils_sr/__init__.py +0 -0
sam_diffsr/utils_sr/dataset.py +50 -0
sam_diffsr/utils_sr/hparams.py +157 -0
sam_diffsr/utils_sr/indexed_datasets.py +72 -0
sam_diffsr/utils_sr/matlab_resize.py +181 -0
sam_diffsr/utils_sr/plt_img.py +109 -0
sam_diffsr/utils_sr/sr_utils.py +171 -0
sam_diffsr/utils_sr/utils.py +269 -0
sam_diffsr/weight/model_ckpt_steps_400000.ckpt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import importlib
+from collections import OrderedDict
+from pathlib import Path
+import gradio as gr
+import os
+import numpy as np
+import torch
+from PIL import Image
+from torchvision import transforms
+from sam_diffsr.utils_sr.hparams import set_hparams, hparams
+from sam_diffsr.utils_sr.matlab_resize import imresize
+def get_img_data(img_PIL, hparams, sr_scale=4):
+    img_lr = img_PIL.convert('RGB')
+    img_lr = np.uint8(np.asarray(img_lr))
+    h, w, c = img_lr.shape
+    h, w = h * sr_scale, w * sr_scale
+    h = h - h % (sr_scale * 2)
+    w = w - w % (sr_scale * 2)
+    h_l = h // sr_scale
+    w_l = w // sr_scale
+    img_lr = img_lr[:h_l, :w_l]
+    to_tensor_norm = transforms.Compose([
+        transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    ])
+    img_lr_up = imresize(img_lr / 256, hparams['sr_scale'])  # np.float [H, W, C]
+    img_lr, img_lr_up = [to_tensor_norm(x).float() for x in [img_lr, img_lr_up]]
+    img_lr = torch.unsqueeze(img_lr, dim=0)
+    img_lr_up = torch.unsqueeze(img_lr_up, dim=0)
+    return img_lr, img_lr_up
+def load_checkpoint(model, ckpt_path):
+    checkpoint = torch.load(ckpt_path, map_location='cpu')
+    print(f'loding check from: {ckpt_path}')
+    stat_dict = checkpoint['state_dict']['model']
+    new_state_dict = OrderedDict()
+    for k, v in stat_dict.items():
+        if k[:7] == 'module.':
+            k = k[7:]  # 去掉 `module.`
+        new_state_dict[k] = v
+    model.load_state_dict(new_state_dict)
+    model.cuda()
+    del checkpoint
+    torch.cuda.empty_cache()
+def model_init(ckpt_path):
+    set_hparams()
+    from sam_diffsr.tasks.srdiff_df2k_sam import SRDiffDf2k_sam as trainer
+    trainer = trainer()
+    trainer.build_model()
+    load_checkpoint(trainer.model, ckpt_path)
+    torch.backends.cudnn.benchmark = False
+    return trainer
+def image_infer(img_PIL):
+    with torch.no_grad():
+        trainer.model.eval()
+        img_lr, img_lr_up = get_img_data(img_PIL, hparams, sr_scale=4)
+        img_lr = img_lr.to('cuda')
+        img_lr_up = img_lr_up.to('cuda')
+        img_sr, _ = trainer.model.sample(img_lr, img_lr_up, img_lr_up.shape)
+        img_sr = img_sr.clamp(-1, 1)
+        img_sr = trainer.tensor2img(img_sr)[0]
+        img_sr = Image.fromarray(img_sr)
+    return img_sr
+# cheetah = os.path.join(os.path.dirname(__file__), "images/cheetah1.jpg")
+root_path = os.path.dirname(__file__)
+cheetah = os.path.join(root_path, "images/lion.jpg")
+print(cheetah)
+demo = gr.Interface(image_infer, gr.Image(type="pil", value=cheetah), "image",
+                    # flagging_options=["blurry", "incorrect", "other"],
+                    examples=[
+                        os.path.join(root_path, "images/0801x4.png"),
+                        os.path.join(root_path, "images/0809x4.png"),
+                        os.path.join(root_path, "images/0809x4.png"),
+                    ]
+                    )
+if __name__ == "__main__":
+    parent_path = Path(__file__).absolute().parent
+    fill_root = os.path.abspath(parent_path)
+    ckpt_path = os.path.join(fill_root, 'sam_diffsr/weight/model_ckpt_steps_400000.ckpt')
+    trainer = model_init(ckpt_path)
+    demo.launch()

images/0801x4.png ADDED Viewed

images/0804x4.png ADDED Viewed

images/0809x4.png ADDED Viewed

images/lion.jpg ADDED Viewed

images/logo.png ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,27 @@

+torch
+torchvision
+Cython
+matplotlib
+tqdm
+numpy
+scipy
+PyYAML
+tensorboardX
+tensorboard
+scikit-learn
+scikit-image
+seaborn
+pillow
+opencv-contrib-python
+einops
+lpips
+natsort
+timm
+openpyxl
+kornia
+xlwt==1.3.0
+xlrd==1.2.0
+pyiqa
+rotary_embedding_torch
+opencv-python>=4.8.0.76
+opencv-python-headless>=4.5.5.64

sam_diffsr/configs/base/config_base.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+# task
+binary_data_dir: ''
+work_dir: '' # experiment directory.
+infer: false # infer
+seed: 1234
+debug: false
+save_codes:
+  - configs
+  - models_sr
+  - tasks
+  - utils_sr
+#############
+# dataset
+#############
+ds_workers: 1
+endless: false
+#########
+# train and eval
+#########
+print_nan_grads: false
+load_ckpt: ''
+save_best: true
+num_ckpt_keep: 100
+clip_grad_norm: 0
+accumulate_grad_batches: 1
+tb_log_interval: 100
+num_sanity_val_steps: 5  # steps of validation at the beginning
+check_val_every_n_epoch: 10
+val_check_interval: 4000
+valid_monitor_key: 'val_loss'
+valid_monitor_mode: 'min'
+max_epochs: 1000
+max_updates: 600000
+amp: false
+batch_size: 32
+eval_batch_size: 32
+num_workers: 8
+test_input_dir: ''
+resume_from_checkpoint: 0

sam_diffsr/configs/base/diffsr_base.yaml ADDED Viewed

	@@ -0,0 +1,41 @@

+base_config:
+  - ./config_base.yaml
+  - ./sr_base.yaml
+# model
+beta_schedule: cosine
+beta_s: 0.008
+beta_end: 0.02
+hidden_size: 64
+timesteps: 100
+res: true
+res_rescale: 2.0
+up_input: false
+use_wn: false
+gn_groups: 0
+use_rrdb: true
+#rrdb_num_block: 8
+#rrdb_num_feat: 32
+rrdb_num_block: 17
+rrdb_num_feat: 64
+rrdb_ckpt: ''
+unet_dim_mults: 1|2|2|4
+clip_input: true
+denoise_fn: unet
+use_attn: false
+aux_l1_loss: true
+aux_ssim_loss: false
+aux_percep_loss: false
+loss_type: l1
+pred_noise: true
+clip_grad_norm: 10
+weight_init: false
+fix_rrdb: true
+# train and eval
+lr: 0.0002
+decay_steps: 100000
+accumulate_grad_batches: 1
+style_interp: false
+save_intermediate: false
+show_training_process: false
+print_arch: false

sam_diffsr/configs/base/sr_base.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+base_config: ./config_base.yaml
+data_interp: bicubic # bilinear | bicubic
+data_augmentation: false
+max_updates: 300000
+batch_size: 16
+eval_batch_size: 1
+test_batch_size: 1
+valid_steps: 3
+num_sanity_val_steps: 3
+test_save_png: false
+gen_dir_name: ''

sam_diffsr/configs/data/df2k4x.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+binary_data_dir: data/train/df2k4x
+patch_size: 160
+crop_size: 320
+thresh_size: 160
+test_crop_size: [ 2040, 2040 ]
+test_thresh_size: 0
+valid_steps: 4
+num_sanity_val_steps: 4
+eval_batch_size: 1
+test_batch_size: 1
+sr_scale: 4

sam_diffsr/configs/data/df2k4x_sam.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+binary_data_dir: data/train/df2k4x_sam
+patch_size: 160
+crop_size: 320
+thresh_size: 160
+test_crop_size: [ 2040, 2040 ]
+test_thresh_size: 0
+valid_steps: 4
+num_sanity_val_steps: 4
+eval_batch_size: 1
+test_batch_size: 1
+sr_scale: 4

sam_diffsr/configs/diffsr_df2k4x.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+base_config:
+    - ./base/diffsr_base.yaml
+    - ./data/df2k4x.yaml
+trainer_cls: tasks.srdiff_df2k.SRDiffDf2k
+# model
+unet_dim_mults: 1|2|3|4
+decay_steps: 200000
+# train and test
+batch_size: 64
+max_updates: 400000
+sam_config:
+    cond_sam: False
+    p_losses_sam: False
+    p_sample_sam: False
+    q_sample_sam: False

sam_diffsr/configs/rrdb/df2k4x_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+base_config:
+    - ../sr_base.yaml
+    - ../df2k4x.yaml
+trainer_cls: tasks.rrdb.RRDBDf2kTask
+# model
+hidden_size: 64
+lr: 0.0002
+num_block: 17
+# train and eval
+max_updates: 100000
+batch_size: 64
+eval_batch_size: 1
+valid_steps: 3

sam_diffsr/configs/sam/sam_diffsr_df2k4x.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+base_config:
+    - ../base/diffsr_base.yaml
+    - ../data/df2k4x_sam.yaml
+trainer_cls: tasks.srdiff_df2k_sam.SRDiffDf2k_sam
+# model
+unet_dim_mults: 1|2|3|4
+decay_steps: 200000
+# train and test
+batch_size: 64
+max_updates: 400000
+rrdb_num_feat: 64
+sam_config:
+    cond_sam: False
+    p_losses_sam: True
+    mask_coefficient: True
+sam_data_config:
+    all_same_mask_to_zero: False
+    normalize_01: False
+    normalize_11: False
+num_sanity_val_steps: 2

sam_diffsr/models_sr/__init__.py ADDED Viewed

File without changes

sam_diffsr/models_sr/commons.py ADDED Viewed

	@@ -0,0 +1,317 @@

+import math
+import torch
+import torch.nn.functional as F
+from einops import rearrange
+from torch import nn
+from torch.nn import Parameter
+class Residual(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+    def forward(self, x, *args, **kwargs):
+        return self.fn(x, *args, **kwargs) + x
+class SinusoidalPosEmb(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.dim = dim
+    def forward(self, x):
+        device = x.device
+        half_dim = self.dim // 2
+        emb = math.log(10000) / (half_dim - 1)
+        emb = torch.exp(torch.arange(half_dim, device=device) * -emb)
+        emb = x[:, None] * emb[None, :]
+        emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
+        return emb
+class Mish(nn.Module):
+    def forward(self, x):
+        return x * torch.tanh(F.softplus(x))
+class Rezero(nn.Module):
+    def __init__(self, fn):
+        super().__init__()
+        self.fn = fn
+        self.g = nn.Parameter(torch.zeros(1))
+    def forward(self, x):
+        return self.fn(x) * self.g
+# building block modules
+class Block(nn.Module):
+    def __init__(self, dim, dim_out, groups=8):
+        super().__init__()
+        if groups == 0:
+            self.block = nn.Sequential(
+                nn.ReflectionPad2d(1),
+                nn.Conv2d(dim, dim_out, 3),
+                Mish()
+            )
+        else:
+            self.block = nn.Sequential(
+                nn.ReflectionPad2d(1),
+                nn.Conv2d(dim, dim_out, 3),
+                nn.GroupNorm(groups, dim_out),
+                Mish()
+            )
+    def forward(self, x):
+        return self.block(x)
+class ResnetBlock(nn.Module):
+    def __init__(self, dim, dim_out, *, time_emb_dim=0, groups=8):
+        super().__init__()
+        if time_emb_dim > 0:
+            self.mlp = nn.Sequential(
+                Mish(),
+                nn.Linear(time_emb_dim, dim_out)
+            )
+        self.block1 = Block(dim, dim_out, groups=groups)
+        self.block2 = Block(dim_out, dim_out, groups=groups)
+        self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity()
+    def forward(self, x, time_emb=None, cond=None):
+        h = self.block1(x)
+        if time_emb is not None:
+            h += self.mlp(time_emb)[:, :, None, None]
+        if cond is not None:
+            h += cond
+        h = self.block2(h)
+        return h + self.res_conv(x)
+class Upsample(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.ConvTranspose2d(dim, dim, 4, 2, 1),
+        )
+    def forward(self, x):
+        return self.conv(x)
+class Downsample(nn.Module):
+    def __init__(self, dim):
+        super().__init__()
+        self.conv = nn.Sequential(
+            nn.ReflectionPad2d(1),
+            nn.Conv2d(dim, dim, 3, 2),
+        )
+    def forward(self, x):
+        return self.conv(x)
+class LinearAttention(nn.Module):
+    def __init__(self, dim, heads=4, dim_head=32):
+        super().__init__()
+        self.heads = heads
+        hidden_dim = dim_head * heads
+        self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False)
+        self.to_out = nn.Conv2d(hidden_dim, dim, 1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        qkv = self.to_qkv(x)
+        q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads=self.heads, qkv=3)
+        k = k.softmax(dim=-1)
+        context = torch.einsum('bhdn,bhen->bhde', k, v)
+        out = torch.einsum('bhde,bhdn->bhen', context, q)
+        out = rearrange(out, 'b heads c (h w) -> b (heads c) h w', heads=self.heads, h=h, w=w)
+        return self.to_out(out)
+class MultiheadAttention(nn.Module):
+    def __init__(self, embed_dim, num_heads, kdim=None, vdim=None, dropout=0., bias=True,
+                 add_bias_kv=False, add_zero_attn=False):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
+        self.num_heads = num_heads
+        self.dropout = dropout
+        self.head_dim = embed_dim // num_heads
+        assert self.head_dim * num_heads == self.embed_dim, "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+        if self.qkv_same_dim:
+            self.in_proj_weight = Parameter(torch.Tensor(3 * embed_dim, embed_dim))
+        else:
+            self.k_proj_weight = Parameter(torch.Tensor(embed_dim, self.kdim))
+            self.v_proj_weight = Parameter(torch.Tensor(embed_dim, self.vdim))
+            self.q_proj_weight = Parameter(torch.Tensor(embed_dim, embed_dim))
+        if bias:
+            self.in_proj_bias = Parameter(torch.Tensor(3 * embed_dim))
+        else:
+            self.register_parameter('in_proj_bias', None)
+        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+        self.add_zero_attn = add_zero_attn
+        self.reset_parameters()
+        self.enable_torch_version = False
+        if hasattr(F, "multi_head_attention_forward"):
+            self.enable_torch_version = True
+        else:
+            self.enable_torch_version = False
+        self.last_attn_probs = None
+    def reset_parameters(self):
+        if self.qkv_same_dim:
+            nn.init.xavier_uniform_(self.in_proj_weight)
+        else:
+            nn.init.xavier_uniform_(self.k_proj_weight)
+            nn.init.xavier_uniform_(self.v_proj_weight)
+            nn.init.xavier_uniform_(self.q_proj_weight)
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.in_proj_bias is not None:
+            nn.init.constant_(self.in_proj_bias, 0.)
+            nn.init.constant_(self.out_proj.bias, 0.)
+        if self.bias_k is not None:
+            nn.init.xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            nn.init.xavier_normal_(self.bias_v)
+    def forward(
+            self,
+            query, key, value,
+            key_padding_mask=None,
+            need_weights=True,
+            attn_mask=None,
+            before_softmax=False,
+            need_head_weights=False,
+    ):
+        """Input shape: [B, T, C]
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+        query = query.transpose(0, 1)
+        key = key.transpose(0, 1)
+        value = value.transpose(0, 1)
+        tgt_len, bsz, embed_dim = query.size()
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+        attn_output, attn_output_weights = F.multi_head_attention_forward(
+            query, key, value, self.embed_dim, self.num_heads,
+            self.in_proj_weight, self.in_proj_bias, self.bias_k, self.bias_v,
+            self.add_zero_attn, self.dropout, self.out_proj.weight, self.out_proj.bias,
+            self.training, key_padding_mask, need_weights, attn_mask)
+        attn_output = attn_output.transpose(0, 1)
+        return attn_output, attn_output_weights
+    def in_proj_qkv(self, query):
+        return self._in_proj(query).chunk(3, dim=-1)
+    def in_proj_q(self, query):
+        if self.qkv_same_dim:
+            return self._in_proj(query, end=self.embed_dim)
+        else:
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[:self.embed_dim]
+            return F.linear(query, self.q_proj_weight, bias)
+    def in_proj_k(self, key):
+        if self.qkv_same_dim:
+            return self._in_proj(key, start=self.embed_dim, end=2 * self.embed_dim)
+        else:
+            weight = self.k_proj_weight
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[self.embed_dim:2 * self.embed_dim]
+            return F.linear(key, weight, bias)
+    def in_proj_v(self, value):
+        if self.qkv_same_dim:
+            return self._in_proj(value, start=2 * self.embed_dim)
+        else:
+            weight = self.v_proj_weight
+            bias = self.in_proj_bias
+            if bias is not None:
+                bias = bias[2 * self.embed_dim:]
+            return F.linear(value, weight, bias)
+    def _in_proj(self, input, start=0, end=None):
+        weight = self.in_proj_weight
+        bias = self.in_proj_bias
+        weight = weight[start:end, :]
+        if bias is not None:
+            bias = bias[start:end]
+        return F.linear(input, weight, bias)
+class ResidualDenseBlock_5C(nn.Module):
+    def __init__(self, nf=64, gc=32, bias=True):
+        super(ResidualDenseBlock_5C, self).__init__()
+        # gc: growth channel, i.e. intermediate channels
+        self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias)
+        self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias)
+        self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias)
+        self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
+        # initialization
+        # mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
+    def forward(self, x):
+        x1 = self.lrelu(self.conv1(x))
+        x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
+        x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
+        x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
+        x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
+        return x5 * 0.2 + x
+class RRDB(nn.Module):
+    '''Residual in Residual Dense Block'''
+    def __init__(self, nf, gc=32):
+        super(RRDB, self).__init__()
+        self.RDB1 = ResidualDenseBlock_5C(nf, gc)
+        self.RDB2 = ResidualDenseBlock_5C(nf, gc)
+        self.RDB3 = ResidualDenseBlock_5C(nf, gc)
+    def forward(self, x):
+        out = self.RDB1(x)
+        out = self.RDB2(out)
+        out = self.RDB3(out)
+        return out * 0.2 + x

sam_diffsr/models_sr/diffsr_modules.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import functools
+import torch
+import torch.nn.functional as F
+from torch import nn
+from sam_diffsr.utils_sr.hparams import hparams
+from .commons import Mish, SinusoidalPosEmb, RRDB, Residual, Rezero, LinearAttention
+from .commons import ResnetBlock, Upsample, Block, Downsample
+from .module_util import make_layer, initialize_weights
+class RRDBNet(nn.Module):
+    def __init__(self, in_nc, out_nc, nf, nb, gc=32):
+        super(RRDBNet, self).__init__()
+        RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc)
+        self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True)
+        self.RRDB_trunk = make_layer(RRDB_block_f, nb)
+        self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        #### upsampling
+        self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        if hparams['sr_scale'] == 8:
+            self.upconv3 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True)
+        self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True)
+        self.lrelu = nn.LeakyReLU(negative_slope=0.2)
+    def forward(self, x, get_fea=False):
+        feas = []
+        x = (x + 1) / 2
+        fea_first = fea = self.conv_first(x)
+        for l in self.RRDB_trunk:
+            fea = l(fea)
+            feas.append(fea)
+        trunk = self.trunk_conv(fea)
+        fea = fea_first + trunk
+        feas.append(fea)
+        fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        if hparams['sr_scale'] == 8:
+            fea = self.lrelu(self.upconv3(F.interpolate(fea, scale_factor=2, mode='nearest')))
+        fea_hr = self.HRconv(fea)
+        out = self.conv_last(self.lrelu(fea_hr))
+        out = out.clamp(0, 1)
+        out = out * 2 - 1
+        if get_fea:
+            return out, feas
+        else:
+            return out
+class Unet(nn.Module):
+    def __init__(self, dim, out_dim=None, dim_mults=(1, 2, 4, 8), cond_dim=32):
+        super().__init__()
+        dims = [3, *map(lambda m: dim * m, dim_mults)]
+        in_out = list(zip(dims[:-1], dims[1:]))
+        groups = 0
+        self.sam_config = hparams['sam_config']
+        cond_proj_in = cond_dim * ((hparams['rrdb_num_block'] + 1) // 3)
+        if self.sam_config['cond_sam']:
+            # cond_proj_in += 1
+            self.sam_conv = nn.Sequential(
+                    nn.Conv2d(dim + 1, dim, 1, 1, 0, bias=True),
+                    nn.Conv2d(dim, dim, 1, 1, 0, bias=True),
+                    nn.Conv2d(dim, dim, 1, 1, 0, bias=True)
+            )
+        else:
+            self.sam_conv = None
+        self.cond_proj = nn.ConvTranspose2d(cond_proj_in, dim, hparams['sr_scale'] * 2, hparams['sr_scale'],
+                                            hparams['sr_scale'] // 2)
+        self.time_pos_emb = SinusoidalPosEmb(dim)
+        self.mlp = nn.Sequential(
+                nn.Linear(dim, dim * 4),
+                Mish(),
+                nn.Linear(dim * 4, dim)
+        )
+        self.downs = nn.ModuleList([])
+        self.ups = nn.ModuleList([])
+        num_resolutions = len(in_out)
+        for ind, (dim_in, dim_out) in enumerate(in_out):
+            is_last = ind >= (num_resolutions - 1)
+            self.downs.append(nn.ModuleList([
+                    ResnetBlock(dim_in, dim_out, time_emb_dim=dim, groups=groups),
+                    ResnetBlock(dim_out, dim_out, time_emb_dim=dim, groups=groups),
+                    Downsample(dim_out) if not is_last else nn.Identity()
+            ]))
+        mid_dim = dims[-1]
+        self.mid_block1 = ResnetBlock(mid_dim, mid_dim, time_emb_dim=dim, groups=groups)
+        if hparams['use_attn']:
+            self.mid_attn = Residual(Rezero(LinearAttention(mid_dim)))
+        self.mid_block2 = ResnetBlock(mid_dim, mid_dim, time_emb_dim=dim, groups=groups)
+        for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])):
+            is_last = ind >= (num_resolutions - 1)
+            self.ups.append(nn.ModuleList([
+                    ResnetBlock(dim_out * 2, dim_in, time_emb_dim=dim, groups=groups),
+                    ResnetBlock(dim_in, dim_in, time_emb_dim=dim, groups=groups),
+                    Upsample(dim_in) if not is_last else nn.Identity()
+            ]))
+        self.final_conv = nn.Sequential(
+                Block(dim, dim, groups=groups),
+                nn.Conv2d(dim, out_dim, 1)
+        )
+        if hparams['res'] and hparams['up_input']:
+            self.up_proj = nn.Sequential(
+                    nn.ReflectionPad2d(1), nn.Conv2d(3, dim, 3),
+            )
+        if hparams['use_wn']:
+            self.apply_weight_norm()
+        if hparams['weight_init']:
+            self.apply(initialize_weights)
+    def apply_weight_norm(self):
+        def _apply_weight_norm(m):
+            if isinstance(m, torch.nn.Conv1d) or isinstance(m, torch.nn.Conv2d):
+                torch.nn.utils.weight_norm(m)
+                # print(f"| Weight norm is applied to {m}.")
+        self.apply(_apply_weight_norm)
+    def forward(self, x, time, cond, img_lr_up, sam_mask=None):
+        t = self.time_pos_emb(time)
+        t = self.mlp(t)
+        h = []
+        cond = self.cond_proj(torch.cat(cond[2::3], 1))
+        if self.sam_config['cond_sam']:
+            cond = torch.cat([cond, sam_mask], 1)
+            cond = self.sam_conv(cond)
+        for i, (resnet, resnet2, downsample) in enumerate(self.downs):
+            x = resnet(x, t)
+            x = resnet2(x, t)
+            if i == 0:
+                x = x + cond
+                if hparams['res'] and hparams['up_input']:
+                    x = x + self.up_proj(img_lr_up)
+            h.append(x)
+            x = downsample(x)
+        x = self.mid_block1(x, t)
+        if hparams['use_attn']:
+            x = self.mid_attn(x)
+        x = self.mid_block2(x, t)
+        for resnet, resnet2, upsample in self.ups:
+            x = torch.cat((x, h.pop()), dim=1)
+            x = resnet(x, t)
+            x = resnet2(x, t)
+            x = upsample(x)
+        return self.final_conv(x)
+    def make_generation_fast_(self):
+        def remove_weight_norm(m):
+            try:
+                nn.utils.remove_weight_norm(m)
+            except ValueError:  # this module didn't have weight norm
+                return
+        self.apply(remove_weight_norm)

sam_diffsr/models_sr/diffusion.py ADDED Viewed

	@@ -0,0 +1,291 @@

+from functools import partial
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn
+from tqdm import tqdm
+from sam_diffsr.utils_sr.plt_img import plt_tensor_img
+from .module_util import default
+from sam_diffsr.utils_sr.sr_utils import SSIM, PerceptualLoss
+from sam_diffsr.utils_sr.hparams import hparams
+# gaussian diffusion trainer class
+def extract(a, t, x_shape):
+    b, *_ = t.shape
+    out = a.gather(-1, t)
+    return out.reshape(b, *((1,) * (len(x_shape) - 1)))
+def noise_like(shape, device, repeat=False):
+    repeat_noise = lambda: torch.randn((1, *shape[1:]), device=device).repeat(shape[0], *((1,) * (len(shape) - 1)))
+    noise = lambda: torch.randn(shape, device=device)
+    return repeat_noise() if repeat else noise()
+def _warmup_beta(beta_start, beta_end, num_diffusion_timesteps, warmup_frac):
+    betas = beta_end * np.ones(num_diffusion_timesteps, dtype=np.float64)
+    warmup_time = int(num_diffusion_timesteps * warmup_frac)
+    betas[:warmup_time] = np.linspace(beta_start, beta_end, warmup_time, dtype=np.float64)
+    return betas
+def get_beta_schedule(num_diffusion_timesteps, beta_schedule='linear', beta_start=0.0001, beta_end=0.02):
+    if beta_schedule == 'quad':
+        betas = np.linspace(beta_start ** 0.5, beta_end ** 0.5, num_diffusion_timesteps, dtype=np.float64) ** 2
+    elif beta_schedule == 'linear':
+        betas = np.linspace(beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64)
+    elif beta_schedule == 'warmup10':
+        betas = _warmup_beta(beta_start, beta_end, num_diffusion_timesteps, 0.1)
+    elif beta_schedule == 'warmup50':
+        betas = _warmup_beta(beta_start, beta_end, num_diffusion_timesteps, 0.5)
+    elif beta_schedule == 'const':
+        betas = beta_end * np.ones(num_diffusion_timesteps, dtype=np.float64)
+    elif beta_schedule == 'jsd':  # 1/T, 1/(T-1), 1/(T-2), ..., 1
+        betas = 1. / np.linspace(num_diffusion_timesteps, 1, num_diffusion_timesteps, dtype=np.float64)
+    else:
+        raise NotImplementedError(beta_schedule)
+    assert betas.shape == (num_diffusion_timesteps,)
+    return betas
+def cosine_beta_schedule(timesteps, s=0.008):
+    """
+    cosine schedule
+    as proposed in https://openreview.net/forum?id=-NEXDKk8gZ
+    """
+    steps = timesteps + 1
+    x = np.linspace(0, steps, steps)
+    alphas_cumprod = np.cos(((x / steps) + s) / (1 + s) * np.pi * 0.5) ** 2
+    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
+    betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
+    return np.clip(betas, a_min=0, a_max=0.999)
+class GaussianDiffusion(nn.Module):
+    def __init__(self, denoise_fn, rrdb_net, timesteps=1000, loss_type='l1'):
+        super().__init__()
+        self.denoise_fn = denoise_fn
+        # condition net
+        self.rrdb = rrdb_net
+        self.ssim_loss = SSIM(window_size=11)
+        if hparams['beta_schedule'] == 'cosine':
+            betas = cosine_beta_schedule(timesteps, s=hparams['beta_s'])
+        if hparams['beta_schedule'] == 'linear':
+            betas = get_beta_schedule(timesteps, beta_end=hparams['beta_end'])
+            if hparams['res']:
+                betas[-1] = 0.999
+        alphas = 1. - betas
+        alphas_cumprod = np.cumprod(alphas, axis=0)
+        alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
+        timesteps, = betas.shape
+        self.num_timesteps = int(timesteps)
+        self.loss_type = loss_type
+        to_torch = partial(torch.tensor, dtype=torch.float32)
+        self.register_buffer('betas', to_torch(betas))
+        self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod))
+        self.register_buffer('alphas_cumprod_prev', to_torch(alphas_cumprod_prev))
+        # calculations for diffusion q(x_t | x_{t-1}) and others
+        self.register_buffer('sqrt_alphas_cumprod', to_torch(np.sqrt(alphas_cumprod)))
+        self.register_buffer('sqrt_one_minus_alphas_cumprod', to_torch(np.sqrt(1. - alphas_cumprod)))
+        self.register_buffer('log_one_minus_alphas_cumprod', to_torch(np.log(1. - alphas_cumprod)))
+        self.register_buffer('sqrt_recip_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod)))
+        self.register_buffer('sqrt_recipm1_alphas_cumprod', to_torch(np.sqrt(1. / alphas_cumprod - 1)))
+        # calculations for posterior q(x_{t-1} | x_t, x_0)
+        posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod)
+        # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t)
+        self.register_buffer('posterior_variance', to_torch(posterior_variance))
+        # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain
+        self.register_buffer('posterior_log_variance_clipped', to_torch(np.log(np.maximum(posterior_variance, 1e-20))))
+        self.register_buffer('posterior_mean_coef1', to_torch(
+                betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod)))
+        self.register_buffer('posterior_mean_coef2', to_torch(
+                (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod)))
+        self.sample_tqdm = True
+        self.mask_coefficient = to_torch(np.sqrt(1. - alphas_cumprod) * betas)
+    def q_mean_variance(self, x_start, t):
+        mean = extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start
+        variance = extract(1. - self.alphas_cumprod, t, x_start.shape)
+        log_variance = extract(self.log_one_minus_alphas_cumprod, t, x_start.shape)
+        return mean, variance, log_variance
+    def predict_start_from_noise(self, x_t, t, noise):
+        return (
+                extract(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t -
+                extract(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise
+        )
+    def q_posterior(self, x_start, x_t, t):
+        posterior_mean = (
+                extract(self.posterior_mean_coef1, t, x_t.shape) * x_start +
+                extract(self.posterior_mean_coef2, t, x_t.shape) * x_t
+        )
+        posterior_variance = extract(self.posterior_variance, t, x_t.shape)
+        posterior_log_variance_clipped = extract(self.posterior_log_variance_clipped, t, x_t.shape)
+        return posterior_mean, posterior_variance, posterior_log_variance_clipped
+    def p_mean_variance(self, x, t, noise_pred, clip_denoised: bool):
+        x_recon = self.predict_start_from_noise(x, t=t, noise=noise_pred)
+        if clip_denoised:
+            x_recon.clamp_(-1.0, 1.0)
+        model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start=x_recon, x_t=x, t=t)
+        return model_mean, posterior_variance, posterior_log_variance, x_recon
+    def forward(self, img_hr, img_lr, img_lr_up, t=None, *args, **kwargs):
+        x = img_hr
+        b, *_, device = *x.shape, x.device
+        t = torch.randint(0, self.num_timesteps, (b,), device=device).long() \
+            if t is None else torch.LongTensor([t]).repeat(b).to(device)
+        if hparams['use_rrdb']:
+            if hparams['fix_rrdb']:
+                self.rrdb.eval()
+                with torch.no_grad():
+                    rrdb_out, cond = self.rrdb(img_lr, True)
+            else:
+                rrdb_out, cond = self.rrdb(img_lr, True)
+        else:
+            rrdb_out = img_lr_up
+            cond = img_lr
+        x = self.img2res(x, img_lr_up)
+        p_losses, x_tp1, noise_pred, x_t, x_t_gt, x_0 = self.p_losses(x, t, cond, img_lr_up, *args, **kwargs)
+        ret = {'q': p_losses}
+        if not hparams['fix_rrdb']:
+            if hparams['aux_l1_loss']:
+                ret['aux_l1'] = F.l1_loss(rrdb_out, img_hr)
+            if hparams['aux_ssim_loss']:
+                ret['aux_ssim'] = 1 - self.ssim_loss(rrdb_out, img_hr)
+            if hparams['aux_percep_loss']:
+                ret['aux_percep'] = self.percep_loss_fn[0](img_hr, rrdb_out)
+        x_tp1 = self.res2img(x_tp1, img_lr_up)
+        x_t = self.res2img(x_t, img_lr_up)
+        x_t_gt = self.res2img(x_t_gt, img_lr_up)
+        return ret, (x_tp1, x_t_gt, x_t), t
+    def p_losses(self, x_start, t, cond, img_lr_up, noise=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        x_tp1_gt = self.q_sample(x_start=x_start, t=t, noise=noise)
+        x_t_gt = self.q_sample(x_start=x_start, t=t - 1, noise=noise)
+        noise_pred = self.denoise_fn(x_tp1_gt, t, cond, img_lr_up)
+        x_t_pred, x0_pred = self.p_sample(x_tp1_gt, t, cond, img_lr_up, noise_pred=noise_pred)
+        if self.loss_type == 'l1':
+            loss = (noise - noise_pred).abs().mean()
+        elif self.loss_type == 'l2':
+            loss = F.mse_loss(noise, noise_pred)
+        elif self.loss_type == 'ssim':
+            loss = (noise - noise_pred).abs().mean()
+            loss = loss + (1 - self.ssim_loss(noise, noise_pred))
+        else:
+            raise NotImplementedError()
+        return loss, x_tp1_gt, noise_pred, x_t_pred, x_t_gt, x0_pred
+    def q_sample(self, x_start, t, noise=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        t_cond = (t[:, None, None, None] >= 0).float()
+        t = t.clamp_min(0)
+        return (
+                extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start +
+                extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise
+        ) * t_cond + x_start * (1 - t_cond)
+    @torch.no_grad()
+    def p_sample(self, x, t, cond, img_lr_up, noise_pred=None, clip_denoised=True, repeat_noise=False):
+        if noise_pred is None:
+            noise_pred = self.denoise_fn(x, t, cond=cond, img_lr_up=img_lr_up)
+        b, *_, device = *x.shape, x.device
+        model_mean, _, model_log_variance, x0_pred = self.p_mean_variance(
+                x=x, t=t, noise_pred=noise_pred, clip_denoised=clip_denoised)
+        noise = noise_like(x.shape, device, repeat_noise)
+        # no noise when t == 0
+        nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
+        return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0_pred
+    @torch.no_grad()
+    def sample(self, img_lr, img_lr_up, shape, save_intermediate=False):
+        device = self.betas.device
+        b = shape[0]
+        if not hparams['res']:
+            t = torch.full((b,), self.num_timesteps - 1, device=device, dtype=torch.long)
+            img = self.q_sample(img_lr_up, t)
+        else:
+            img = torch.randn(shape, device=device)
+        if hparams['use_rrdb']:
+            rrdb_out, cond = self.rrdb(img_lr, True)
+        else:
+            rrdb_out = img_lr_up
+            cond = img_lr
+        it = reversed(range(0, self.num_timesteps))
+        if self.sample_tqdm:
+            it = tqdm(it, desc='sampling loop time step', total=self.num_timesteps)
+        images = []
+        for i in it:
+            img, x_recon = self.p_sample(
+                    img, torch.full((b,), i, device=device, dtype=torch.long), cond, img_lr_up)
+            if save_intermediate:
+                img_ = self.res2img(img, img_lr_up)
+                x_recon_ = self.res2img(x_recon, img_lr_up)
+                images.append((img_.cpu(), x_recon_.cpu()))
+        img = self.res2img(img, img_lr_up)
+        if save_intermediate:
+            return img, rrdb_out, images
+        else:
+            return img, rrdb_out
+    @torch.no_grad()
+    def interpolate(self, x1, x2, img_lr, img_lr_up, t=None, lam=0.5):
+        b, *_, device = *x1.shape, x1.device
+        t = default(t, self.num_timesteps - 1)
+        if hparams['use_rrdb']:
+            rrdb_out, cond = self.rrdb(img_lr, True)
+        else:
+            cond = img_lr
+        assert x1.shape == x2.shape
+        x1 = self.img2res(x1, img_lr_up)
+        x2 = self.img2res(x2, img_lr_up)
+        t_batched = torch.stack([torch.tensor(t, device=device)] * b)
+        xt1, xt2 = map(lambda x: self.q_sample(x, t=t_batched), (x1, x2))
+        img = (1 - lam) * xt1 + lam * xt2
+        for i in tqdm(reversed(range(0, t)), desc='interpolation sample time step', total=t):
+            img, x_recon = self.p_sample(
+                    img, torch.full((b,), i, device=device, dtype=torch.long), cond, img_lr_up)
+        img = self.res2img(img, img_lr_up)
+        return img
+    def res2img(self, img_, img_lr_up, clip_input=None):
+        if clip_input is None:
+            clip_input = hparams['clip_input']
+        if hparams['res']:
+            if clip_input:
+                img_ = img_.clamp(-1, 1)
+            img_ = img_ / hparams['res_rescale'] + img_lr_up
+        return img_
+    def img2res(self, x, img_lr_up, clip_input=None):
+        if clip_input is None:
+            clip_input = hparams['clip_input']
+        if hparams['res']:
+            x = (x - img_lr_up) * hparams['res_rescale']
+            if clip_input:
+                x = x.clamp(-1, 1)
+        return x

sam_diffsr/models_sr/diffusion_sam.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import torch
+import torch.nn.functional as F
+from tqdm import tqdm
+from sam_diffsr.utils_sr.hparams import hparams
+from .diffusion import GaussianDiffusion, noise_like, extract
+from .module_util import default
+class GaussianDiffusion_sam(GaussianDiffusion):
+    def __init__(self, denoise_fn, rrdb_net, timesteps=1000, loss_type='l1', sam_config=None):
+        super().__init__(denoise_fn, rrdb_net, timesteps, loss_type)
+        self.sam_config = sam_config
+    def p_losses(self, x_start, t, cond, img_lr_up, noise=None, sam_mask=None):
+        noise = default(noise, lambda: torch.randn_like(x_start))
+        if self.sam_config['p_losses_sam']:
+            _sam_mask = F.interpolate(sam_mask, noise.shape[2:], mode='bilinear')
+            if self.sam_config.get('mask_coefficient', False):
+                _sam_mask *= extract(self.mask_coefficient.to(_sam_mask.device), t, x_start.shape)
+            noise += _sam_mask
+        x_tp1_gt = self.q_sample(x_start=x_start, t=t, noise=noise)
+        x_t_gt = self.q_sample(x_start=x_start, t=t - 1, noise=noise)
+        noise_pred = self.denoise_fn(x_tp1_gt, t, cond, img_lr_up, sam_mask=sam_mask)
+        x_t_pred, x0_pred = self.p_sample(x_tp1_gt, t, cond, img_lr_up, noise_pred=noise_pred, sam_mask=sam_mask)
+        if self.loss_type == 'l1':
+            loss = (noise - noise_pred).abs().mean()
+        elif self.loss_type == 'l2':
+            loss = F.mse_loss(noise, noise_pred)
+        elif self.loss_type == 'ssim':
+            loss = (noise - noise_pred).abs().mean()
+            loss = loss + (1 - self.ssim_loss(noise, noise_pred))
+        else:
+            raise NotImplementedError()
+        return loss, x_tp1_gt, noise_pred, x_t_pred, x_t_gt, x0_pred
+    @torch.no_grad()
+    def p_sample(self, x, t, cond, img_lr_up, noise_pred=None, clip_denoised=True, repeat_noise=False, sam_mask=None):
+        if noise_pred is None:
+            noise_pred = self.denoise_fn(x, t, cond=cond, img_lr_up=img_lr_up, sam_mask=sam_mask)
+        b, *_, device = *x.shape, x.device
+        model_mean, _, model_log_variance, x0_pred = self.p_mean_variance(
+            x=x, t=t, noise_pred=noise_pred, clip_denoised=clip_denoised)
+        noise = noise_like(x.shape, device, repeat_noise)
+        # no noise when t == 0
+        nonzero_mask = (1 - (t == 0).float()).reshape(b, *((1,) * (len(x.shape) - 1)))
+        return model_mean + nonzero_mask * (0.5 * model_log_variance).exp() * noise, x0_pred
+    @torch.no_grad()
+    def sample(self, img_lr, img_lr_up, shape, sam_mask=None, save_intermediate=False):
+        device = self.betas.device
+        b = shape[0]
+        if not hparams['res']:
+            t = torch.full((b,), self.num_timesteps - 1, device=device, dtype=torch.long)
+            noise = None
+            img = self.q_sample(img_lr_up, t, noise=noise)
+        else:
+            img = torch.randn(shape, device=device)
+        if hparams['use_rrdb']:
+            rrdb_out, cond = self.rrdb(img_lr, True)
+        else:
+            rrdb_out = img_lr_up
+            cond = img_lr
+        it = reversed(range(0, self.num_timesteps))
+        if self.sample_tqdm:
+            it = tqdm(it, desc='sampling loop time step', total=self.num_timesteps)
+        images = []
+        for i in it:
+            img, x_recon = self.p_sample(
+                img, torch.full((b,), i, device=device, dtype=torch.long), cond, img_lr_up, sam_mask=sam_mask)
+            if save_intermediate:
+                img_ = self.res2img(img, img_lr_up)
+                x_recon_ = self.res2img(x_recon, img_lr_up)
+                images.append((img_.cpu(), x_recon_.cpu()))
+        img = self.res2img(img, img_lr_up)
+        if save_intermediate:
+            return img, rrdb_out, images
+        else:
+            return img, rrdb_out

sam_diffsr/models_sr/module_util.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from inspect import isfunction
+from torch import nn
+from torch.nn import init
+def exists(x):
+    return x is not None
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+def cycle(dl):
+    while True:
+        for data in dl:
+            yield data
+def num_to_groups(num, divisor):
+    groups = num // divisor
+    remainder = num % divisor
+    arr = [divisor] * groups
+    if remainder > 0:
+        arr.append(remainder)
+    return arr
+def initialize_weights(net_l, scale=0.1):
+    if not isinstance(net_l, list):
+        net_l = [net_l]
+    for net in net_l:
+        for m in net.modules():
+            if isinstance(m, nn.Conv2d):
+                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+                m.weight.data *= scale  # for residual block
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.Linear):
+                init.kaiming_normal_(m.weight, a=0, mode='fan_in')
+                m.weight.data *= scale
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                init.constant_(m.weight, 1)
+                init.constant_(m.bias.data, 0.0)
+def make_layer(block, n_layers, seq=False):
+    layers = []
+    for _ in range(n_layers):
+        layers.append(block())
+    if seq:
+        return nn.Sequential(*layers)
+    else:
+        return nn.ModuleList(layers)

sam_diffsr/tasks/__init__.py ADDED Viewed

File without changes

sam_diffsr/tasks/infer.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import importlib
+import os
+import sys
+from collections import OrderedDict
+from pathlib import Path
+from tasks.srdiff_df2k import InferDataSet
+parent_path = Path(__file__).absolute().parent.parent
+sys.path.append(os.path.abspath(parent_path))
+os.chdir(parent_path)
+print(f'>-------------> parent path {parent_path}')
+print(f'>-------------> current work dir {os.getcwd()}')
+cache_path = os.path.join(parent_path, 'cache')
+os.environ["HF_DATASETS_CACHE"] = cache_path
+os.environ["TRANSFORMERS_CACHE"] = cache_path
+os.environ["torch_HOME"] = cache_path
+import torch
+from PIL import Image
+from tqdm import tqdm
+from torch.utils.tensorboard import SummaryWriter
+from utils_sr.hparams import hparams, set_hparams
+def load_ckpt(ckpt_path, model):
+    checkpoint = torch.load(ckpt_path, map_location='cpu')
+    stat_dict = checkpoint['state_dict']['model']
+    new_state_dict = OrderedDict()
+    for k, v in stat_dict.items():
+        if k[:7] == 'module.':
+            k = k[7:]  # 去掉 `module.`
+        new_state_dict[k] = v
+    model.load_state_dict(new_state_dict)
+    model.cuda()
+def infer(trainer, ckpt_path, img_dir, save_dir):
+    trainer.build_model()
+    load_ckpt(ckpt_path, trainer.model)
+    dataset = InferDataSet(img_dir)
+    test_dataloader = torch.utils.data.DataLoader(
+            dataset, batch_size=hparams['eval_batch_size'], shuffle=False, pin_memory=False)
+    torch.backends.cudnn.benchmark = False
+    with torch.no_grad():
+        trainer.model.eval()
+        pbar = tqdm(enumerate(test_dataloader), total=len(test_dataloader))
+        for batch_idx, batch in pbar:
+            img_lr, img_lr_up, img_name = batch
+            img_lr = img_lr.to('cuda')
+            img_lr_up = img_lr_up.to('cuda')
+            img_sr, _ = trainer.model.sample(img_lr, img_lr_up, img_lr_up.shape)
+            img_sr = img_sr.clamp(-1, 1)
+            img_sr = trainer.tensor2img(img_sr)[0]
+            img_sr = Image.fromarray(img_sr)
+            img_sr.save(os.path.join(save_dir, img_name[0]))
+if __name__ == '__main__':
+    set_hparams()
+    img_dir = hparams['img_dir']
+    save_dir = hparams['save_dir']
+    ckpt_path = hparams['ckpt_path']
+    pkg = ".".join(hparams["trainer_cls"].split(".")[:-1])
+    cls_name = hparams["trainer_cls"].split(".")[-1]
+    trainer = getattr(importlib.import_module(pkg), cls_name)()
+    os.makedirs(save_dir, exist_ok=True)
+    infer(trainer, ckpt_path, img_dir, save_dir)

sam_diffsr/tasks/rrdb.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import torch
+import torch.nn.functional as F
+from models_sr.diffsr_modules import RRDBNet
+from tasks.srdiff_df2k import Df2kDataSet
+from tasks.trainer import Trainer
+from utils_sr.hparams import hparams
+from utils_sr.sr_utils import PerceptualLoss
+class RRDBTask(Trainer):
+    def __init__(self):
+        super().__init__()
+        if 'rrdb_loss' in hparams and hparams['rrdb_loss']['percep_loss']:
+            self.percep_loss_fn = PerceptualLoss()
+            self.percep_loss_weight = hparams['rrdb_loss']['percep_loss_weight']
+        else:
+            self.percep_loss_fn = None
+            self.percep_loss_weight = 0
+    def build_model(self):
+        hidden_size = hparams['hidden_size']
+        self.model = RRDBNet(3, 3, hidden_size, hparams['num_block'], hidden_size // 2)
+        return self.model
+    def build_optimizer(self, model):
+        return torch.optim.Adam(model.parameters(), lr=hparams['lr'])
+    def build_scheduler(self, optimizer):
+        return torch.optim.lr_scheduler.StepLR(optimizer, 200000, 0.5)
+    def training_step(self, sample):
+        img_hr = sample['img_hr']
+        img_lr = sample['img_lr']
+        p = self.model(img_lr)
+        total_loss = 0
+        loss = F.l1_loss(p, img_hr, reduction='mean')
+        total_loss += loss
+        if self.percep_loss_fn:
+            loss_percep = self.percep_loss_fn(img_hr, p) * self.percep_loss_weight
+            total_loss += loss_percep
+            return {'l': loss, 'loss_percep': loss_percep, 'total_loss': total_loss,
+                    'lr': self.scheduler.get_last_lr()[0]}, total_loss
+        else:
+            return {'l': loss, 'lr': self.scheduler.get_last_lr()[0]}, total_loss
+    def sample_and_test(self, sample):
+        ret = {k: 0 for k in self.metric_keys}
+        ret['n_samples'] = 0
+        img_hr = sample['img_hr']
+        img_lr = sample['img_lr']
+        img_sr = self.model(img_lr)
+        img_sr = img_sr.clamp(-1, 1)
+        for b in range(img_sr.shape[0]):
+            s = self.measure.measure(img_sr[b], img_hr[b], img_lr[b], hparams['sr_scale'])
+            ret['psnr'] += s['psnr']
+            ret['ssim'] += s['ssim']
+            ret['lpips'] += s['lpips']
+            ret['lr_psnr'] += s['lr_psnr']
+            ret['n_samples'] += 1
+        return img_sr, img_sr, ret
+class RRDBDf2kTask(RRDBTask):
+    def __init__(self):
+        super().__init__()
+        self.dataset_cls = Df2kDataSet

sam_diffsr/tasks/rrdb_sam.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+import torch.nn.functional as F
+from models_sr.diffsr_modules import RRDBNet
+from tasks.srdiff_df2k_sam import Df2kDataSet_sam
+from tasks.trainer import Trainer
+from utils_sr.hparams import hparams
+class RRDBTask_sam(Trainer):
+    def build_model(self):
+        hidden_size = hparams['hidden_size']
+        self.model = RRDBNet(3, 3, hidden_size, hparams['num_block'], hidden_size // 2)
+        return self.model
+    def build_optimizer(self, model):
+        return torch.optim.Adam(model.parameters(), lr=hparams['lr'])
+    def build_scheduler(self, optimizer):
+        return torch.optim.lr_scheduler.StepLR(optimizer, 200000, 0.5)
+    def training_step(self, sample):
+        img_hr = sample['img_hr']
+        img_lr = sample['img_lr']
+        p = self.model(img_lr)
+        loss = F.l1_loss(p, img_hr, reduction='mean')
+        return {'l': loss, 'lr': self.scheduler.get_last_lr()[0]}, loss
+    def sample_and_test(self, sample):
+        ret = {k: 0 for k in self.metric_keys}
+        ret['n_samples'] = 0
+        img_hr = sample['img_hr']
+        img_lr = sample['img_lr']
+        img_sr = self.model(img_lr)
+        img_sr = img_sr.clamp(-1, 1)
+        for b in range(img_sr.shape[0]):
+            s = self.measure.measure(img_sr[b], img_hr[b], img_lr[b], hparams['sr_scale'])
+            ret['psnr'] += s['psnr']
+            ret['ssim'] += s['ssim']
+            ret['lpips'] += s['lpips']
+            ret['lr_psnr'] += s['lr_psnr']
+            ret['n_samples'] += 1
+        return img_sr, img_sr, ret
+class RRDBDf2kTask_sam(RRDBTask_sam):
+    def __init__(self):
+        super().__init__()
+        self.dataset_cls = Df2kDataSet_sam

sam_diffsr/tasks/srdiff.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import os.path
+import torch
+from sam_diffsr.models_sr.diffsr_modules import Unet, RRDBNet
+from sam_diffsr.models_sr.diffusion import GaussianDiffusion
+from sam_diffsr.tasks.trainer import Trainer
+from sam_diffsr.utils_sr.hparams import hparams
+from sam_diffsr.utils_sr.utils import load_ckpt
+class SRDiffTrainer(Trainer):
+    def build_model(self):
+        hidden_size = hparams['hidden_size']
+        dim_mults = hparams['unet_dim_mults']
+        dim_mults = [int(x) for x in dim_mults.split('|')]
+        denoise_fn = Unet(
+                hidden_size, out_dim=3, cond_dim=hparams['rrdb_num_feat'], dim_mults=dim_mults)
+        if hparams['use_rrdb']:
+            rrdb = RRDBNet(3, 3, hparams['rrdb_num_feat'], hparams['rrdb_num_block'],
+                           hparams['rrdb_num_feat'] // 2)
+            if hparams['rrdb_ckpt'] != '' and os.path.exists(hparams['rrdb_ckpt']):
+                load_ckpt(rrdb, hparams['rrdb_ckpt'])
+        else:
+            rrdb = None
+        self.model = GaussianDiffusion(
+                denoise_fn=denoise_fn,
+                rrdb_net=rrdb,
+                timesteps=hparams['timesteps'],
+                loss_type=hparams['loss_type']
+        )
+        self.global_step = 0
+        return self.model
+    def sample_and_test(self, sample):
+        ret = {k: 0 for k in self.metric_keys}
+        ret['n_samples'] = 0
+        img_hr = sample['img_hr']
+        img_lr = sample['img_lr']
+        img_lr_up = sample['img_lr_up']
+        img_sr, rrdb_out = self.model.sample(img_lr, img_lr_up, img_hr.shape)
+        for b in range(img_sr.shape[0]):
+            s = self.measure.measure(img_sr[b], img_hr[b], img_lr[b], hparams['sr_scale'])
+            ret['psnr'] += s['psnr']
+            ret['ssim'] += s['ssim']
+            ret['lpips'] += s['lpips']
+            ret['lr_psnr'] += s['lr_psnr']
+            ret['n_samples'] += 1
+        return img_sr, rrdb_out, ret
+    def build_optimizer(self, model):
+        params = list(model.named_parameters())
+        if not hparams['fix_rrdb']:
+            params = [p for p in params if 'rrdb' not in p[0]]
+        params = [p[1] for p in params]
+        return torch.optim.Adam(params, lr=hparams['lr'])
+    def build_scheduler(self, optimizer):
+        if 'scheduler' in hparams:
+            scheduler_config = hparams['scheduler']
+            if scheduler_config['type'] == 'cosine':
+                lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, hparams['max_updates'],
+                                                                          eta_min=scheduler_config['eta_min'])
+        else:
+            lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, hparams['decay_steps'], gamma=0.5)
+        return lr_scheduler
+    def training_step(self, batch):
+        img_hr = batch['img_hr']
+        img_lr = batch['img_lr']
+        img_lr_up = batch['img_lr_up']
+        losses, _, _ = self.model(img_hr, img_lr, img_lr_up)
+        total_loss = sum(losses.values())
+        return losses, total_loss

sam_diffsr/tasks/srdiff_df2k.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import os
+import random
+import numpy as np
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision import transforms
+from sam_diffsr.tasks.srdiff import SRDiffTrainer
+from sam_diffsr.utils_sr.dataset import SRDataSet
+from sam_diffsr.utils_sr.hparams import hparams
+from sam_diffsr.utils_sr.matlab_resize import imresize
+class InferDataSet(Dataset):
+    def __init__(self, img_dir):
+        super().__init__()
+        self.img_path_list = [os.path.join(img_dir, img_name) for img_name in os.listdir(img_dir)]
+        self.to_tensor_norm = transforms.Compose([
+                transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+        ])
+    def __getitem__(self, index):
+        sr_scale = hparams['sr_scale']
+        img_path = self.img_path_list[index]
+        img_name = os.path.basename(img_path)
+        img_lr = Image.open(img_path).convert('RGB')
+        img_lr = np.uint8(np.asarray(img_lr))
+        h, w, c = img_lr.shape
+        h, w = h * sr_scale, w * sr_scale
+        h = h - h % (sr_scale * 2)
+        w = w - w % (sr_scale * 2)
+        h_l = h // sr_scale
+        w_l = w // sr_scale
+        img_lr = img_lr[:h_l, :w_l]
+        img_lr_up = imresize(img_lr / 256, hparams['sr_scale'])  # np.float [H, W, C]
+        img_lr, img_lr_up = [self.to_tensor_norm(x).float() for x in [img_lr, img_lr_up]]
+        return img_lr, img_lr_up, img_name
+    def __len__(self):
+        return len(self.img_path_list)
+class Df2kDataSet(SRDataSet):
+    def __init__(self, prefix='train'):
+        if prefix == 'valid':
+            _prefix = 'test'
+        else:
+            _prefix = prefix
+        super().__init__(_prefix)
+        self.patch_size = hparams['patch_size']
+        self.patch_size_lr = hparams['patch_size'] // hparams['sr_scale']
+        if prefix == 'valid':
+            self.len = hparams['eval_batch_size'] * hparams['valid_steps']
+        self.data_aug_transforms = transforms.Compose([
+                transforms.RandomHorizontalFlip(),
+                transforms.RandomRotation(20, resample=Image.BICUBIC),
+                transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
+        ])
+    def __getitem__(self, index):
+        item = self._get_item(index)
+        hparams = self.hparams
+        sr_scale = hparams['sr_scale']
+        img_hr = np.uint8(item['img'])
+        img_lr = np.uint8(item['img_lr'])
+        # TODO: clip for SRFlow
+        h, w, c = img_hr.shape
+        h = h - h % (sr_scale * 2)
+        w = w - w % (sr_scale * 2)
+        h_l = h // sr_scale
+        w_l = w // sr_scale
+        img_hr = img_hr[:h, :w]
+        img_lr = img_lr[:h_l, :w_l]
+        # random crop
+        if self.prefix == 'train':
+            if self.data_augmentation and random.random() < 0.5:
+                img_hr, img_lr = self.data_augment(img_hr, img_lr)
+            i = random.randint(0, h - self.patch_size) // sr_scale * sr_scale
+            i_lr = i // sr_scale
+            j = random.randint(0, w - self.patch_size) // sr_scale * sr_scale
+            j_lr = j // sr_scale
+            img_hr = img_hr[i:i + self.patch_size, j:j + self.patch_size]
+            img_lr = img_lr[i_lr:i_lr + self.patch_size_lr, j_lr:j_lr + self.patch_size_lr]
+        img_lr_up = imresize(img_lr / 256, hparams['sr_scale'])  # np.float [H, W, C]
+        img_hr, img_lr, img_lr_up = [self.to_tensor_norm(x).float() for x in [img_hr, img_lr, img_lr_up]]
+        return {
+                'img_hr': img_hr, 'img_lr': img_lr,
+                'img_lr_up': img_lr_up, 'item_name': item['item_name'],
+                'loc': np.array(item['loc']), 'loc_bdr': np.array(item['loc_bdr'])
+        }
+    def __len__(self):
+        return self.len
+    def data_augment(self, img_hr, img_lr):
+        sr_scale = self.hparams['sr_scale']
+        img_hr = Image.fromarray(img_hr)
+        img_hr = self.data_aug_transforms(img_hr)
+        img_hr = np.asarray(img_hr)  # np.uint8 [H, W, C]
+        img_lr = imresize(img_hr, 1 / sr_scale)
+        return img_hr, img_lr
+class SRDiffDf2k(SRDiffTrainer):
+    def __init__(self):
+        super().__init__()
+        self.dataset_cls = Df2kDataSet

sam_diffsr/tasks/srdiff_df2k_sam.py ADDED Viewed

	@@ -0,0 +1,211 @@

+import os
+import random
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from rotary_embedding_torch import RotaryEmbedding
+from torchvision import transforms
+from sam_diffsr.models_sr.diffsr_modules import RRDBNet, Unet
+from sam_diffsr.models_sr.diffusion_sam import GaussianDiffusion_sam
+from sam_diffsr.tasks.srdiff import SRDiffTrainer
+from sam_diffsr.utils_sr.dataset import SRDataSet
+from sam_diffsr.utils_sr.hparams import hparams
+from sam_diffsr.utils_sr.indexed_datasets import IndexedDataset
+from sam_diffsr.utils_sr.matlab_resize import imresize
+from sam_diffsr.utils_sr.utils import load_ckpt
+def normalize_01(data):
+    mu = np.mean(data)
+    sigma = np.std(data)
+    if sigma == 0.:
+        return data - mu
+    else:
+        return (data - mu) / sigma
+def normalize_11(data):
+    mu = np.mean(data)
+    sigma = np.std(data)
+    if sigma == 0.:
+        return data - mu
+    else:
+        return (data - mu) / sigma - 1
+class Df2kDataSet_sam(SRDataSet):
+    def __init__(self, prefix='train'):
+        if prefix == 'valid':
+            _prefix = 'test'
+        else:
+            _prefix = prefix
+        super().__init__(_prefix)
+        self.patch_size = hparams['patch_size']
+        self.patch_size_lr = hparams['patch_size'] // hparams['sr_scale']
+        if prefix == 'valid':
+            self.len = hparams['eval_batch_size'] * hparams['valid_steps']
+        self.data_position_aug_transforms = transforms.Compose([
+                transforms.RandomHorizontalFlip(),
+                transforms.RandomRotation(20, interpolation=Image.BICUBIC),
+        ])
+        self.data_color_aug_transforms = transforms.Compose([
+                transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
+        ])
+        self.sam_config = hparams.get('sam_config', False)
+        if self.sam_config.get('mask_RoPE', False):
+            h, w = map(int, self.sam_config['mask_RoPE_shape'].split('-'))
+            rotary_emb = RotaryEmbedding(dim=h)
+            sam_mask = rotary_emb.rotate_queries_or_keys(torch.ones(1, 1, w, h))
+            self.RoPE_mask = sam_mask.cpu().numpy()[0, 0, ...]
+    def _get_item(self, index):
+        if self.indexed_ds is None:
+            self.indexed_ds = IndexedDataset(f'{self.data_dir}/{self.prefix}')
+        return self.indexed_ds[index]
+    def __getitem__(self, index):
+        item = self._get_item(index)
+        hparams = self.hparams
+        sr_scale = hparams['sr_scale']
+        img_hr = np.uint8(item['img'])
+        img_lr = np.uint8(item['img_lr'])
+        if self.sam_config.get('mask_RoPE', False):
+            sam_mask = self.RoPE_mask
+        else:
+            if 'sam_mask' in item:
+                sam_mask = item['sam_mask']
+                if sam_mask.shape != img_hr.shape[:2]:
+                    sam_mask = cv2.resize(sam_mask, dsize=img_hr.shape[:2][::-1])
+            else:
+                sam_mask = np.zeros_like(img_lr)
+        # TODO: clip for SRFlow
+        h, w, c = img_hr.shape
+        h = h - h % (sr_scale * 2)
+        w = w - w % (sr_scale * 2)
+        h_l = h // sr_scale
+        w_l = w // sr_scale
+        img_hr = img_hr[:h, :w]
+        sam_mask = sam_mask[:h, :w]
+        img_lr = img_lr[:h_l, :w_l]
+        # random crop
+        if self.prefix == 'train':
+            if self.data_augmentation and random.random() < 0.5:
+                img_hr, img_lr, sam_mask = self.data_augment(img_hr, img_lr, sam_mask)
+            i = random.randint(0, h - self.patch_size) // sr_scale * sr_scale
+            i_lr = i // sr_scale
+            j = random.randint(0, w - self.patch_size) // sr_scale * sr_scale
+            j_lr = j // sr_scale
+            img_hr = img_hr[i:i + self.patch_size, j:j + self.patch_size]
+            sam_mask = sam_mask[i:i + self.patch_size, j:j + self.patch_size]
+            img_lr = img_lr[i_lr:i_lr + self.patch_size_lr, j_lr:j_lr + self.patch_size_lr]
+        img_lr_up = imresize(img_lr / 256, hparams['sr_scale'])  # np.float [H, W, C]
+        img_hr, img_lr, img_lr_up = [self.to_tensor_norm(x).float() for x in [img_hr, img_lr, img_lr_up]]
+        if hparams['sam_data_config']['all_same_mask_to_zero']:
+            if len(np.unique(sam_mask)) == 1:
+                sam_mask = np.zeros_like(sam_mask)
+        if hparams['sam_data_config']['normalize_01']:
+            if len(np.unique(sam_mask)) != 1:
+                sam_mask = normalize_01(sam_mask)
+        if hparams['sam_data_config']['normalize_11']:
+            if len(np.unique(sam_mask)) != 1:
+                sam_mask = normalize_11(sam_mask)
+        sam_mask = torch.FloatTensor(sam_mask).unsqueeze(dim=0)
+        return {
+                'img_hr': img_hr, 'img_lr': img_lr,
+                'img_lr_up': img_lr_up, 'item_name': item['item_name'],
+                'loc': np.array(item['loc']), 'loc_bdr': np.array(item['loc_bdr']),
+                'sam_mask': sam_mask
+        }
+    def __len__(self):
+        return self.len
+    def data_augment(self, img_hr, img_lr, sam_mask):
+        sr_scale = self.hparams['sr_scale']
+        img_hr = Image.fromarray(img_hr)
+        img_hr, sam_mask = self.data_position_aug_transforms([img_hr, sam_mask])
+        img_hr = self.data_color_aug_transforms(img_hr)
+        img_hr = np.asarray(img_hr)  # np.uint8 [H, W, C]
+        img_lr = imresize(img_hr, 1 / sr_scale)
+        return img_hr, img_lr, sam_mask
+class SRDiffDf2k_sam(SRDiffTrainer):
+    def __init__(self):
+        super().__init__()
+        self.dataset_cls = Df2kDataSet_sam
+        self.sam_config = hparams['sam_config']
+    def build_model(self):
+        hidden_size = hparams['hidden_size']
+        dim_mults = hparams['unet_dim_mults']
+        dim_mults = [int(x) for x in dim_mults.split('|')]
+        denoise_fn = Unet(
+                hidden_size, out_dim=3, cond_dim=hparams['rrdb_num_feat'], dim_mults=dim_mults)
+        if hparams['use_rrdb']:
+            rrdb = RRDBNet(3, 3, hparams['rrdb_num_feat'], hparams['rrdb_num_block'],
+                           hparams['rrdb_num_feat'] // 2)
+            if hparams['rrdb_ckpt'] != '' and os.path.exists(hparams['rrdb_ckpt']):
+                load_ckpt(rrdb, hparams['rrdb_ckpt'])
+        else:
+            rrdb = None
+        self.model = GaussianDiffusion_sam(
+                denoise_fn=denoise_fn,
+                rrdb_net=rrdb,
+                timesteps=hparams['timesteps'],
+                loss_type=hparams['loss_type'],
+                sam_config=hparams['sam_config']
+        )
+        self.global_step = 0
+        return self.model
+    # def sample_and_test(self, sample):
+    #     ret = {k: 0 for k in self.metric_keys}
+    #     ret['n_samples'] = 0
+    #     img_hr = sample['img_hr']
+    #     img_lr = sample['img_lr']
+    #     img_lr_up = sample['img_lr_up']
+    #     sam_mask = sample['sam_mask']
+    #
+    #     img_sr, rrdb_out = self.model.sample(img_lr, img_lr_up, img_hr.shape, sam_mask=sam_mask)
+    #
+    #     for b in range(img_sr.shape[0]):
+    #         s = self.measure.measure(img_sr[b], img_hr[b], img_lr[b], hparams['sr_scale'])
+    #         ret['psnr'] += s['psnr']
+    #         ret['ssim'] += s['ssim']
+    #         ret['lpips'] += s['lpips']
+    #         ret['lr_psnr'] += s['lr_psnr']
+    #         ret['n_samples'] += 1
+    #     return img_sr, rrdb_out, ret
+    def training_step(self, batch):
+        img_hr = batch['img_hr']
+        img_lr = batch['img_lr']
+        img_lr_up = batch['img_lr_up']
+        sam_mask = batch['sam_mask']
+        losses, _, _ = self.model(img_hr, img_lr, img_lr_up, sam_mask=sam_mask)
+        total_loss = sum(losses.values())
+        return losses, total_loss

sam_diffsr/tasks/trainer.py ADDED Viewed

	@@ -0,0 +1,346 @@

+import importlib
+import json
+import os
+import subprocess
+import sys
+from collections import OrderedDict
+from pathlib import Path
+parent_path = Path(__file__).absolute().parent.parent
+sys.path.append(os.path.abspath(parent_path))
+os.chdir(parent_path)
+print(f'>-------------> parent path {parent_path}')
+print(f'>-------------> current work dir {os.getcwd()}')
+cache_path = os.path.join(parent_path, 'cache')
+os.environ["HF_DATASETS_CACHE"] = cache_path
+os.environ["TRANSFORMERS_CACHE"] = cache_path
+os.environ["torch_HOME"] = cache_path
+import torch
+from PIL import Image
+from tqdm import tqdm
+import numpy as np
+from torch.utils.tensorboard import SummaryWriter
+from sam_diffsr.utils_sr.hparams import hparams, set_hparams
+from sam_diffsr.utils_sr.utils import plot_img, move_to_cuda, load_checkpoint, save_checkpoint, tensors_to_scalars, Measure, \
+    get_all_ckpts
+class Trainer:
+    def __init__(self):
+        self.logger = self.build_tensorboard(save_dir=hparams['work_dir'], name='tb_logs')
+        self.measure = Measure()
+        self.dataset_cls = None
+        self.metric_keys = ['psnr', 'ssim', 'lpips', 'lr_psnr']
+        self.metric_2_keys = ['psnr-Y', 'ssim', 'fid']
+        self.work_dir = hparams['work_dir']
+        self.first_val = True
+        self.val_steps = hparams['val_steps']
+    def build_tensorboard(self, save_dir, name, **kwargs):
+        log_dir = os.path.join(save_dir, name)
+        os.makedirs(log_dir, exist_ok=True)
+        return SummaryWriter(log_dir=log_dir, **kwargs)
+    def build_train_dataloader(self):
+        dataset = self.dataset_cls('train')
+        return torch.utils.data.DataLoader(
+                dataset, batch_size=hparams['batch_size'], shuffle=True,
+                pin_memory=False, num_workers=hparams['num_workers'])
+    def build_val_dataloader(self):
+        return torch.utils.data.DataLoader(
+                self.dataset_cls('valid'), batch_size=hparams['eval_batch_size'], shuffle=False, pin_memory=False)
+    def build_test_dataloader(self):
+        return torch.utils.data.DataLoader(
+                self.dataset_cls('test'), batch_size=hparams['eval_batch_size'], shuffle=False, pin_memory=False)
+    def build_model(self):
+        raise NotImplementedError
+    def sample_and_test(self, sample):
+        raise NotImplementedError
+    def build_optimizer(self, model):
+        raise NotImplementedError
+    def build_scheduler(self, optimizer):
+        raise NotImplementedError
+    def training_step(self, batch):
+        raise NotImplementedError
+    def train(self):
+        model = self.build_model()
+        optimizer = self.build_optimizer(model)
+        self.global_step = training_step = load_checkpoint(model, optimizer, hparams['work_dir'], steps=self.val_steps)
+        self.scheduler = scheduler = self.build_scheduler(optimizer)
+        scheduler.step(training_step)
+        dataloader = self.build_train_dataloader()
+        train_pbar = tqdm(dataloader, initial=training_step, total=float('inf'),
+                          dynamic_ncols=True, unit='step')
+        while self.global_step < hparams['max_updates']:
+            for batch in train_pbar:
+                if training_step % hparams['val_check_interval'] == 0:
+                    with torch.no_grad():
+                        model.eval()
+                        self.validate(training_step)
+                    save_checkpoint(model, optimizer, self.work_dir, training_step, hparams['num_ckpt_keep'])
+                model.train()
+                batch = move_to_cuda(batch)
+                losses, total_loss = self.training_step(batch)
+                optimizer.zero_grad()
+                total_loss.backward()
+                optimizer.step()
+                training_step += 1
+                scheduler.step(training_step)
+                self.global_step = training_step
+                if training_step % 100 == 0:
+                    self.log_metrics({f'tr/{k}': v for k, v in losses.items()}, training_step)
+                train_pbar.set_postfix(**tensors_to_scalars(losses))
+    def validate(self, training_step):
+        val_dataloader = self.build_val_dataloader()
+        pbar = tqdm(enumerate(val_dataloader), total=len(val_dataloader))
+        metrics = {}
+        for batch_idx, batch in pbar:
+            # 每次运行的第一次validation只跑一小部分数据，来验证代码能否跑通
+            if self.first_val and batch_idx > hparams['num_sanity_val_steps'] - 1:
+                break
+            batch = move_to_cuda(batch)
+            img, rrdb_out, ret = self.sample_and_test(batch)
+            img_hr = batch['img_hr']
+            img_lr = batch['img_lr']
+            img_lr_up = batch['img_lr_up']
+            if img is not None:
+                self.logger.add_image(f'Pred_{batch_idx}', plot_img(img[0]), self.global_step)
+                if hparams.get('aux_l1_loss'):
+                    self.logger.add_image(f'rrdb_out_{batch_idx}', plot_img(rrdb_out[0]), self.global_step)
+                if self.global_step <= hparams['val_check_interval']:
+                    self.logger.add_image(f'HR_{batch_idx}', plot_img(img_hr[0]), self.global_step)
+                    self.logger.add_image(f'LR_{batch_idx}', plot_img(img_lr[0]), self.global_step)
+                    self.logger.add_image(f'BL_{batch_idx}', plot_img(img_lr_up[0]), self.global_step)
+            metrics = {}
+            metrics.update({k: np.mean(ret[k]) for k in self.metric_keys})
+            pbar.set_postfix(**tensors_to_scalars(metrics))
+        if hparams['infer']:
+            print('Val results:', metrics)
+        else:
+            if not self.first_val:
+                self.log_metrics({f'val/{k}': v for k, v in metrics.items()}, training_step)
+                print('Val results:', metrics)
+            else:
+                print('Sanity val results:', metrics)
+        self.first_val = False
+    def build_test_my_dataloader(self, data_name):
+        return torch.utils.data.DataLoader(
+                self.dataset_cls(data_name), batch_size=hparams['eval_batch_size'], shuffle=False, pin_memory=False)
+    def benchmark(self, benchmark_name_list, metric_list):
+        from sam_diffsr.tools.caculate_iqa import eval_img_IQA
+        model = self.build_model()
+        optimizer = self.build_optimizer(model)
+        training_step = load_checkpoint(model, optimizer, hparams['work_dir'], hparams['val_steps'])
+        self.global_step = training_step
+        optimizer = None
+        for data_name in benchmark_name_list:
+            test_dataloader = self.build_test_my_dataloader(data_name)
+            self.results = {k: 0 for k in self.metric_keys}
+            self.n_samples = 0
+            self.gen_dir = f"{hparams['work_dir']}/results_{self.global_step}_{hparams['gen_dir_name']}/benchmark/{data_name}"
+            if hparams['test_save_png']:
+                subprocess.check_call(f'rm -rf {self.gen_dir}', shell=True)
+                os.makedirs(f'{self.gen_dir}/outputs', exist_ok=True)
+                os.makedirs(f'{self.gen_dir}/SR', exist_ok=True)
+            self.model.sample_tqdm = False
+            torch.backends.cudnn.benchmark = False
+            if hparams['test_save_png']:
+                if hasattr(self.model.denoise_fn, 'make_generation_fast_'):
+                    self.model.denoise_fn.make_generation_fast_()
+                os.makedirs(f'{self.gen_dir}/HR', exist_ok=True)
+            result_dict = {}
+            with torch.no_grad():
+                model.eval()
+                pbar = tqdm(enumerate(test_dataloader), total=len(test_dataloader))
+                for batch_idx, batch in pbar:
+                    move_to_cuda(batch)
+                    gen_dir = self.gen_dir
+                    item_names = batch['item_name']
+                    img_hr = batch['img_hr']
+                    img_lr = batch['img_lr']
+                    img_lr_up = batch['img_lr_up']
+                    res = self.sample_and_test(batch)
+                    if len(res) == 3:
+                        img_sr, rrdb_out, ret = res
+                    else:
+                        img_sr, ret = res
+                        rrdb_out = img_sr
+                    img_lr_up = batch.get('img_lr_up', img_lr_up)
+                    if img_sr is not None:
+                        metrics = list(self.metric_keys)
+                        result_dict[batch['item_name'][0]] = {}
+                        for k in metrics:
+                            self.results[k] += ret[k]
+                            result_dict[batch['item_name'][0]][k] = ret[k]
+                        self.n_samples += ret['n_samples']
+                        print({k: round(self.results[k] / self.n_samples, 3) for k in self.results}, 'total:',
+                              self.n_samples)
+                        if hparams['test_save_png'] and img_sr is not None:
+                            img_sr = self.tensor2img(img_sr)
+                            img_hr = self.tensor2img(img_hr)
+                            img_lr = self.tensor2img(img_lr)
+                            img_lr_up = self.tensor2img(img_lr_up)
+                            rrdb_out = self.tensor2img(rrdb_out)
+                            for item_name, hr_p, hr_g, lr, lr_up, rrdb_o in zip(
+                                    item_names, img_sr, img_hr, img_lr, img_lr_up, rrdb_out):
+                                item_name = os.path.splitext(item_name)[0]
+                                hr_p = Image.fromarray(hr_p)
+                                hr_g = Image.fromarray(hr_g)
+                                hr_p.save(f"{gen_dir}/SR/{item_name}.png")
+                                hr_g.save(f"{gen_dir}/HR/{item_name}.png")
+            exp_name = hparams['work_dir'].split('/')[-1]
+            sr_img_dir = f"{gen_dir}/SR/"
+            gt_img_dir = f"{gen_dir}/HR/"
+            excel_path = f"{hparams['work_dir']}/IQA-val-benchmark-{exp_name}.xlsx"
+            epoch = training_step
+            eval_img_IQA(gt_img_dir, sr_img_dir, excel_path, metric_list, epoch, data_name)
+            os.makedirs(f'{self.gen_dir}', exist_ok=True)
+            eval_json_path = os.path.join(self.gen_dir, 'eval.json')
+            avg_result = {k: round(self.results[k] / self.n_samples, 4) for k in self.results}
+            with open(eval_json_path, 'w+') as file:
+                json.dump(avg_result, file, sort_keys=True, indent=4, separators=(',', ': '), ensure_ascii=False)
+                json.dump(result_dict, file, sort_keys=True, indent=4, separators=(',', ': '), ensure_ascii=False)
+    def benchmark_loop(self, benchmark_name_list, metric_list, gt_path):
+        # infer and evaluation all save checkpoint
+        from sam_diffsr.tools.caculate_iqa import eval_img_IQA
+        model = self.build_model()
+        def get_checkpoint(model, checkpoint):
+            stat_dict = checkpoint['state_dict']['model']
+            new_state_dict = OrderedDict()
+            for k, v in stat_dict.items():
+                if k[:7] == 'module.':
+                    k = k[7:]  # 去掉 `module.`
+                new_state_dict[k] = v
+            model.load_state_dict(new_state_dict)
+            model.cuda()
+            training_step = checkpoint['global_step']
+            del checkpoint
+            torch.cuda.empty_cache()
+            return training_step
+        ckpt_paths = get_all_ckpts(hparams['work_dir'])
+        for ckpt_path in ckpt_paths:
+            checkpoint = torch.load(ckpt_path, map_location='cpu')
+            training_step = get_checkpoint(model, checkpoint)
+            self.global_step = training_step
+            for data_name in benchmark_name_list:
+                test_dataloader = self.build_test_my_dataloader(data_name)
+                self.results = {k: 0 for k in self.metric_keys + self.metric_2_keys}
+                self.n_samples = 0
+                self.gen_dir = f"{hparams['work_dir']}/results_{training_step}_{hparams['gen_dir_name']}/benchmark/{data_name}"
+                os.makedirs(f'{self.gen_dir}/outputs', exist_ok=True)
+                os.makedirs(f'{self.gen_dir}/SR', exist_ok=True)
+                self.model.sample_tqdm = False
+                torch.backends.cudnn.benchmark = False
+                with torch.no_grad():
+                    model.eval()
+                    pbar = tqdm(enumerate(test_dataloader), total=len(test_dataloader))
+                    for batch_idx, batch in pbar:
+                        move_to_cuda(batch)
+                        gen_dir = self.gen_dir
+                        item_names = batch['item_name']
+                        res = self.sample_and_test(batch)
+                        if len(res) == 3:
+                            img_sr, rrdb_out, ret = res
+                        else:
+                            img_sr, ret = res
+                            rrdb_out = img_sr
+                        img_sr = self.tensor2img(img_sr)
+                        for item_name, hr_p in zip(item_names, img_sr):
+                            item_name = os.path.splitext(item_name)[0]
+                            hr_p = Image.fromarray(hr_p)
+                            hr_p.save(f"{gen_dir}/SR/{item_name}.png")
+                exp_name = hparams['work_dir'].split('/')[-1]
+                sr_img_dir = f"{gen_dir}/SR/"
+                gt_img_dir = f"{gt_path}/{data_name}/HR"
+                excel_path = f"{hparams['work_dir']}/IQA-val-benchmark_loop-{exp_name}.xlsx"
+                epoch = training_step
+                eval_img_IQA(gt_img_dir, sr_img_dir, excel_path, metric_list, epoch, data_name)
+    # utils_sr
+    def log_metrics(self, metrics, step):
+        metrics = self.metrics_to_scalars(metrics)
+        logger = self.logger
+        for k, v in metrics.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            logger.add_scalar(k, v, step)
+    def metrics_to_scalars(self, metrics):
+        new_metrics = {}
+        for k, v in metrics.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            if type(v) is dict:
+                v = self.metrics_to_scalars(v)
+            new_metrics[k] = v
+        return new_metrics
+    @staticmethod
+    def tensor2img(img):
+        img = np.round((img.permute(0, 2, 3, 1).cpu().numpy() + 1) * 127.5)
+        img = img.clip(min=0, max=255).astype(np.uint8)
+        return img
+if __name__ == '__main__':
+    set_hparams()
+    pkg = ".".join(hparams["trainer_cls"].split(".")[:-1])
+    cls_name = hparams["trainer_cls"].split(".")[-1]
+    trainer = getattr(importlib.import_module(pkg), cls_name)()
+    if hparams['benchmark_loop']:
+        trainer.benchmark_loop(hparams['benchmark_name_list'], hparams['metric_list'], hparams['gt_img_path'])
+    elif hparams['benchmark']:
+        trainer.benchmark(hparams['benchmark_name_list'], hparams['metric_list'])
+    else:
+        trainer.train()

sam_diffsr/tb_logs/events.out.tfevents.1709283169.wangchengchengdeMacBook-Pro.local.99018.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba4ab7fc71e002fcd70117a9bb9ad042341fc80f7d51f5bd4bd9610c508a655
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709284054.wangchengchengdeMacBook-Pro.local.99188.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13225cd295f9d05736be890cd9b70fdf332846531c35760d37b7aae5ca02e584
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709284076.wangchengchengdeMacBook-Pro.local.99198.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf4eadf9f990294906e556c51d92636c7af10c6aec626003341eb0d7b3f5bc38
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709284101.wangchengchengdeMacBook-Pro.local.99211.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:687ac32fefdcbdb917bfa7c9197f8e64b050a506f9676d365f23484183da5629
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709284193.wangchengchengdeMacBook-Pro.local.99233.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fd3acbfec3acc9d5d582e81cd6819b2e5512f45bbf85918b89e6ea371ffbdf2
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709284415.wangchengchengdeMacBook-Pro.local.99289.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5867fd2539cfbb896867c770c5c7c0f5d9687e29ea0322646fe820dae1cae08c
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709284460.wangchengchengdeMacBook-Pro.local.99308.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7999747d297acf0c198f9b8739134ee8ba7d4bc4fae209e47fc4bbd09a7206c
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709284491.wangchengchengdeMacBook-Pro.local.99315.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:172e36eb89b0238ad5cdac335de91bc2721a1944c5e9807027a6c3eeea64f918
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709285127.wangchengchengdeMacBook-Pro.local.785.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f51161b5901b64dd694dd06243d6143281b4df0f839ec89601407aad680cfc34
+size 88

sam_diffsr/tb_logs/events.out.tfevents.1709285146.wangchengchengdeMacBook-Pro.local.901.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:776fa889c3c898a1b157f45aed579d9976791c718b507508421100c03baeb401
+size 88

sam_diffsr/tools/caculate_iqa.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import os
+import ssl
+from os.path import join
+from pathlib import Path
+from statistics import mean
+parent_path = Path(__file__).absolute().parent.parent
+parent_path = os.path.abspath(parent_path)
+os.environ["CURL_CA_BUNDLE"] = ""
+ssl._create_default_https_context = ssl._create_unverified_context
+cache_path = os.path.join(parent_path, 'cache')
+os.environ["HF_DATASETS_CACHE"] = cache_path
+os.environ["TRANSFORMERS_CACHE"] = cache_path
+os.environ["torch_HOME"] = cache_path
+import PIL
+import numpy as np
+import pandas as pd
+import pyiqa
+import torch
+from PIL import Image
+from tqdm import tqdm
+device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+metric_dict = {
+        'psnr-Y': pyiqa.create_metric('psnr', test_y_channel=True, color_space='ycbcr'),
+        'ssim': pyiqa.create_metric('ssim', color_space='ycbcr'),
+        'fid': pyiqa.create_metric('fid'),
+}
+def load_img(path, target_size=None):
+    image = Image.open(path).convert("RGB")
+    if target_size:
+        h, w = target_size
+        image = image.resize((w, h), resample=PIL.Image.LANCZOS)
+    image = np.array(image).astype(np.float32) / 255.0
+    image = image[None].transpose(0, 3, 1, 2)
+    image = torch.from_numpy(image)
+    return image
+def eval_img_IQA(gt_dir, sr_dir, excel_path, metric_list, exp_name, data_name):
+    gt_img_list = os.listdir(gt_dir)
+    iqa_result = {}
+    for metric in metric_list:
+        iqa_metric = metric_dict[metric].to(device)
+        score_fr_list = []
+        if metric == 'fid':
+            score_fr = iqa_metric(sr_dir, gt_dir)
+            iqa_result[metric] = float(score_fr)
+            print(f'{metric}: {float(score_fr)}')
+        else:
+            for img_name in tqdm(gt_img_list):
+                base_name = img_name.split('.')[0]
+                sr_img_name = f'{base_name}.png'
+                gt_img_path = join(gt_dir, img_name)
+                sr_img_path = join(sr_dir, sr_img_name)
+                if not os.path.exists(sr_img_path):
+                    print(f'File not exist: {sr_img_path}')
+                    continue
+                gt_img = load_img(gt_img_path, target_size=None)
+                target_size = gt_img.shape[2:]
+                sr_img = load_img(sr_img_path, target_size=target_size)
+                score_fr = iqa_metric(sr_img, gt_img)
+                if score_fr.shape == (1,):
+                    score_fr = score_fr[0]
+                    if isinstance(score_fr, torch.Tensor):
+                        score_fr = float(score_fr.cpu().numpy())
+                else:
+                    score_fr = float(score_fr)
+                score_fr_list.append(score_fr)
+            mean_score = mean(score_fr_list)
+            iqa_result[metric] = float(mean_score)
+            print(f'{metric}: {mean_score}')
+    if os.path.exists(excel_path):
+        df = pd.read_excel(excel_path)
+    else:
+        df = pd.DataFrame(columns=['exp'])
+    new_index = len(df.index)
+    exp_name = int(exp_name)
+    if exp_name in df['exp'].to_list():
+        new_index = df[df['exp'] == exp_name].index.tolist()[0]
+    else:
+        df.loc[new_index, 'exp'] = exp_name
+    for index, metric in enumerate(metric_list):
+        df_metric = f'{data_name}-{metric}'
+        if df_metric not in df.columns.tolist():
+            df[df_metric] = ''
+        df.loc[new_index, df_metric] = iqa_result[metric]
+    df.sort_values(by='exp', inplace=True)
+    df.to_excel(excel_path, startcol=0, index=False)
+def main():
+    epoch = 400000
+    add_name = ''
+    exp_root = '/home/ma-user/work/code/SRDiff-main/checkpoints'
+    model_type_list = ['diffsr_df2k4x_sam-pl_qs-zero']
+    metric_list = ['psnr-Y', 'ssim', 'fid']
+    benchmark_name_list = ['test_Set5', 'test_Set14', 'test_Urban100', 'test_Manga109', 'test_BSDS100']
+    # if benchmark:
+    for model_type in model_type_list:
+        excel_path = join(exp_root, model_type, f'IQA-val-{model_type}.xls')
+        for benchmark_name in benchmark_name_list:
+            exp_dir = join(exp_root, f'{model_type}/results_{epoch}_{add_name}/benchmark/{benchmark_name}')
+            gt_img_dir = join(exp_dir, 'HR')
+            sr_img_dir = join(exp_dir, 'SR')
+            data_name = benchmark_name[5:]
+            eval_img_IQA(gt_img_dir, sr_img_dir, excel_path, metric_list, epoch, data_name)
+if __name__ == '__main__':
+    main()

sam_diffsr/tools/visualize_sam_mask.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import glob
+import os
+import numpy as np
+from matplotlib import pyplot as plt
+from tqdm import tqdm
+num = '0824'
+sam_npy = '/home/ma-user/work/data/sr_sam/merge_RoPE/DF2K/DF2K_train_HR'
+save_dir = '/home/ma-user/work/data/sr_sam/merge_RoPE/vis/DF2K/DF2K_train_HR'
+os.makedirs(save_dir, exist_ok=True)
+for file in tqdm(glob.glob(f'{sam_npy}/*.npy')):
+    name = os.path.basename(file).split('.')[0]
+    save_path = os.path.join(save_dir, f'{name}.png')
+    img = np.load(file)
+    plt.imshow(img)
+    plt.savefig(save_path)

sam_diffsr/utils_sr/__init__.py ADDED Viewed

File without changes

sam_diffsr/utils_sr/dataset.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import numpy as np
+from PIL import Image
+from torch.utils.data import Dataset
+from torchvision import transforms
+from .hparams import hparams
+from .indexed_datasets import IndexedDataset
+from .matlab_resize import imresize
+class SRDataSet(Dataset):
+    def __init__(self, prefix='train'):
+        self.hparams = hparams
+        self.data_dir = hparams['binary_data_dir']
+        self.prefix = prefix
+        self.len = len(IndexedDataset(f'{self.data_dir}/{self.prefix}'))
+        self.to_tensor_norm = transforms.Compose([
+            transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+        ])
+        assert hparams['data_interp'] in ['bilinear', 'bicubic']
+        self.data_augmentation = hparams['data_augmentation']
+        self.indexed_ds = None
+        if self.prefix == 'valid':
+            self.len = hparams['eval_batch_size'] * hparams['valid_steps']
+    def _get_item(self, index):
+        if self.indexed_ds is None:
+            self.indexed_ds = IndexedDataset(f'{self.data_dir}/{self.prefix}')
+        return self.indexed_ds[index]
+    def __getitem__(self, index):
+        item = self._get_item(index)
+        hparams = self.hparams
+        img_hr = item['img']
+        img_hr = Image.fromarray(np.uint8(img_hr))
+        img_hr = self.pre_process(img_hr)  # PIL
+        img_hr = np.asarray(img_hr)  # np.uint8 [H, W, C]
+        img_lr = imresize(img_hr, 1 / hparams['sr_scale'], method=hparams['data_interp'])  # np.uint8 [H, W, C]
+        img_lr_up = imresize(img_lr / 256, hparams['sr_scale'])  # np.float [H, W, C]
+        img_hr, img_lr, img_lr_up = [self.to_tensor_norm(x).float() for x in [img_hr, img_lr, img_lr_up]]
+        return {
+            'img_hr': img_hr, 'img_lr': img_lr, 'img_lr_up': img_lr_up,
+            'item_name': item['item_name']
+        }
+    def pre_process(self, img_hr):
+        return img_hr
+    def __len__(self):
+        return self.len

sam_diffsr/utils_sr/hparams.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import argparse
+import os
+from pathlib import Path
+import yaml
+global_print_hparams = True
+hparams = {}
+class Args:
+    def __init__(self, **kwargs):
+        for k, v in kwargs.items():
+            self.__setattr__(k, v)
+def override_config(old_config: dict, new_config: dict):
+    for k, v in new_config.items():
+        if isinstance(v, dict) and k in old_config:
+            override_config(old_config[k], new_config[k])
+        else:
+            old_config[k] = v
+def set_hparams(config='', exp_name='', hparams_str='', print_hparams=True, global_hparams=True):
+    parent_path = Path(__file__).absolute().parent.parent
+    fill_root = os.path.abspath(parent_path)
+    if config == '' and exp_name == '':
+        parser = argparse.ArgumentParser(description='')
+        parser.add_argument('--config', type=str, default=os.path.join(fill_root, 'configs/sam/sam_diffsr_df2k4x.yaml'),
+                            help='location of the data corpus')
+        parser.add_argument('--exp_name', type=str, default='', help='exp_name')
+        parser.add_argument('--work_dir', type=str, default='', help='work dir')
+        parser.add_argument('--gt_img_path', type=str, default='data/sr_diff/benchmark', help='gt_img_path')
+        parser.add_argument('-hp', '--hparams', type=str, default='',
+                            help='location of the data corpus')
+        parser.add_argument('--infer', action='store_true', help='infer')
+        parser.add_argument('--benchmark', action='store_true', help='test benchmark')
+        parser.add_argument('--benchmark_loop', action='store_true', help='loop test benchmark for all checkpoint')
+        parser.add_argument('--benchmark_name_list', nargs='+',
+                            default=['test_Set5', 'test_Set14', 'test_Urban100', 'test_Manga109', 'test_BSDS100'])
+        parser.add_argument('--metric_list', nargs='+', default=['psnr-Y', 'ssim', 'fid'])
+        parser.add_argument('--validate', action='store_true', help='validate')
+        parser.add_argument('--val_steps', type=int, default=None, help='validate steps')
+        parser.add_argument('--reset', action='store_true', help='reset hparams')
+        parser.add_argument('--debug', action='store_true', help='debug')
+        parser.add_argument('--img_dir', type=str, default='', help='infer input image dir')
+        parser.add_argument('--save_dir', type=str, default='', help='infer output image dir')
+        parser.add_argument('--ckpt_path', type=str, default='', help='infer ckpt path')
+        args, unknown = parser.parse_known_args()
+        print("| Unknow hparams: ", unknown)
+    else:
+        args = Args(config=config, exp_name=exp_name, hparams=hparams_str,
+                    infer=False, validate=False, reset=False, debug=False)
+    global hparams
+    assert args.config != '' or args.exp_name != ''
+    if args.config != '':
+        assert os.path.exists(args.config)
+    config_chains = []
+    loaded_config = set()
+    def load_config(config_fn):
+        # deep first inheritance and avoid the second visit of one node
+        if not os.path.exists(config_fn):
+            return {}
+        with open(config_fn) as f:
+            hparams_ = yaml.safe_load(f)
+        loaded_config.add(config_fn)
+        if 'base_config' in hparams_:
+            ret_hparams = {}
+            if not isinstance(hparams_['base_config'], list):
+                hparams_['base_config'] = [hparams_['base_config']]
+            for c in hparams_['base_config']:
+                if c.startswith('.'):
+                    c = f'{os.path.dirname(config_fn)}/{c}'
+                    c = os.path.normpath(c)
+                if c not in loaded_config:
+                    override_config(ret_hparams, load_config(c))
+            override_config(ret_hparams, hparams_)
+        else:
+            ret_hparams = hparams_
+        config_chains.append(config_fn)
+        return ret_hparams
+    saved_hparams = {}
+    args_work_dir = ''
+    if args.exp_name != '':
+        args_work_dir = os.path.join(args.work_dir, 'checkpoints', args.exp_name)
+        ckpt_config_path = f'{args_work_dir}/config.yaml'
+        if os.path.exists(ckpt_config_path):
+            with open(ckpt_config_path) as f:
+                saved_hparams_ = yaml.safe_load(f)
+                if saved_hparams_ is not None:
+                    saved_hparams.update(saved_hparams_)
+    hparams_ = {}
+    if args.config != '':
+        hparams_.update(load_config(args.config))
+    if not args.reset:
+        hparams_.update(saved_hparams)
+    hparams_['work_dir'] = args_work_dir
+    # Support config overriding in command line. Support list type config overriding.
+    # Examples: --hparams="a=1,b.c=2,d=[1 1 1]"
+    if args.hparams != "":
+        for new_hparam in args.hparams.split(","):
+            k, v = new_hparam.split("=")
+            v = v.strip("\'\" ")
+            config_node = hparams_
+            for k_ in k.split(".")[:-1]:
+                config_node = config_node[k_]
+            k = k.split(".")[-1]
+            if k not in config_node:
+                config_node[k] = v
+            elif v in ['True', 'False'] or type(config_node[k]) in [bool, list, dict]:
+                if type(config_node[k]) == list:
+                    v = v.replace(" ", ",")
+                config_node[k] = eval(v)
+            else:
+                config_node[k] = type(config_node[k])(v)
+    if args_work_dir != '' and (not os.path.exists(ckpt_config_path) or args.reset) and not args.infer:
+        os.makedirs(hparams_['work_dir'], exist_ok=True)
+        with open(ckpt_config_path, 'w') as f:
+            yaml.safe_dump(hparams_, f)
+    hparams_['infer'] = args.infer
+    hparams_['debug'] = args.debug
+    hparams_['validate'] = args.validate
+    hparams_['exp_name'] = args.exp_name
+    hparams_['val_steps'] = args.val_steps
+    hparams_['benchmark'] = args.benchmark
+    hparams_['benchmark_loop'] = args.benchmark_loop
+    hparams_['benchmark_name_list'] = args.benchmark_name_list
+    hparams_['gt_img_path'] = args.gt_img_path
+    hparams_['metric_list'] = args.metric_list
+    hparams_['img_dir'] = args.img_dir
+    hparams_['save_dir'] = args.save_dir
+    hparams_['ckpt_path'] = args.ckpt_path
+    global global_print_hparams
+    if global_hparams:
+        hparams.clear()
+        hparams.update(hparams_)
+    if print_hparams and global_print_hparams and global_hparams:
+        print('| Hparams chains: ', config_chains)
+        print('| Hparams: ')
+        for i, (k, v) in enumerate(sorted(hparams_.items())):
+            print(f"\033[;33;m{k}\033[0m: {v}, ", end="\n" if i % 5 == 4 else "")
+        print("")
+        global_print_hparams = False
+    return hparams_

sam_diffsr/utils_sr/indexed_datasets.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import pickle
+import numpy as np
+class IndexedDataset:
+    def __init__(self, path):
+        super().__init__()
+        self.path = path
+        self.data_file = None
+        index_data = np.load(f"{path}.idx", allow_pickle=True).item()
+        self.byte_offsets = index_data['offsets']
+        self.id2pos = index_data.get('id2pos', {})
+        self.data_file = open(f"{path}.data", 'rb', buffering=-1)
+    def check_index(self, i):
+        if i < 0 or i >= len(self.byte_offsets) - 1:
+            raise IndexError('index out of range')
+    def __del__(self):
+        if self.data_file:
+            self.data_file.close()
+    def __getitem__(self, i):
+        if self.id2pos is not None and len(self.id2pos) > 0:
+            i = self.id2pos[i]
+        self.check_index(i)
+        self.data_file.seek(self.byte_offsets[i])
+        b = self.data_file.read(self.byte_offsets[i + 1] - self.byte_offsets[i])
+        item = pickle.loads(b)
+        return item
+    def __len__(self):
+        return len(self.byte_offsets) - 1
+    def __iter__(self):
+        self.iter_i = 0
+        return self
+    def __next__(self):
+        if self.iter_i == len(self):
+            raise StopIteration
+        else:
+            item = self[self.iter_i]
+            self.iter_i += 1
+            return item
+class IndexedDatasetBuilder:
+    def __init__(self, path, append=False):
+        self.path = path
+        if append:
+            self.data_file = open(f"{path}.data", 'ab')
+            index_data = np.load(f"{path}.idx", allow_pickle=True).item()
+            self.byte_offsets = index_data['offsets']
+            self.id2pos = index_data.get('id2pos', {})
+        else:
+            self.data_file = open(f"{path}.data", 'wb')
+            self.byte_offsets = [0]
+            self.id2pos = {}
+    def add_item(self, item, id=None):
+        s = pickle.dumps(item)
+        bytes = self.data_file.write(s)
+        if id is not None:
+            self.id2pos[id] = len(self.byte_offsets) - 1
+        self.byte_offsets.append(self.byte_offsets[-1] + bytes)
+    def finalize(self):
+        self.data_file.close()
+        np.save(open(f"{self.path}.idx", 'wb'),
+                {'offsets': self.byte_offsets, 'id2pos': self.id2pos})

sam_diffsr/utils_sr/matlab_resize.py ADDED Viewed

	@@ -0,0 +1,181 @@

+# https://github.com/fatheral/matlab_imresize
+#
+# MIT License
+#
+# Copyright (c) 2020 Alex
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+from __future__ import print_function
+import numpy as np
+from math import ceil
+def deriveSizeFromScale(img_shape, scale):
+    output_shape = []
+    for k in range(2):
+        output_shape.append(int(ceil(scale[k] * img_shape[k])))
+    return output_shape
+def deriveScaleFromSize(img_shape_in, img_shape_out):
+    scale = []
+    for k in range(2):
+        scale.append(1.0 * img_shape_out[k] / img_shape_in[k])
+    return scale
+def triangle(x):
+    x = np.array(x).astype(np.float64)
+    lessthanzero = np.logical_and((x >= -1), x < 0)
+    greaterthanzero = np.logical_and((x <= 1), x >= 0)
+    f = np.multiply((x + 1), lessthanzero) + np.multiply((1 - x), greaterthanzero)
+    return f
+def cubic(x):
+    x = np.array(x).astype(np.float64)
+    absx = np.absolute(x)
+    absx2 = np.multiply(absx, absx)
+    absx3 = np.multiply(absx2, absx)
+    f = np.multiply(1.5 * absx3 - 2.5 * absx2 + 1, absx <= 1) + np.multiply(-0.5 * absx3 + 2.5 * absx2 - 4 * absx + 2,
+                                                                            (1 < absx) & (absx <= 2))
+    return f
+def contributions(in_length, out_length, scale, kernel, k_width):
+    if scale < 1:
+        h = lambda x: scale * kernel(scale * x)
+        kernel_width = 1.0 * k_width / scale
+    else:
+        h = kernel
+        kernel_width = k_width
+    x = np.arange(1, out_length + 1).astype(np.float64)
+    u = x / scale + 0.5 * (1 - 1 / scale)
+    left = np.floor(u - kernel_width / 2)
+    P = int(ceil(kernel_width)) + 2
+    ind = np.expand_dims(left, axis=1) + np.arange(P) - 1  # -1 because indexing from 0
+    indices = ind.astype(np.int32)
+    weights = h(np.expand_dims(u, axis=1) - indices - 1)  # -1 because indexing from 0
+    weights = np.divide(weights, np.expand_dims(np.sum(weights, axis=1), axis=1))
+    aux = np.concatenate((np.arange(in_length), np.arange(in_length - 1, -1, step=-1))).astype(np.int32)
+    indices = aux[np.mod(indices, aux.size)]
+    ind2store = np.nonzero(np.any(weights, axis=0))
+    weights = weights[:, ind2store]
+    indices = indices[:, ind2store]
+    return weights, indices
+def imresizemex(inimg, weights, indices, dim):
+    in_shape = inimg.shape
+    w_shape = weights.shape
+    out_shape = list(in_shape)
+    out_shape[dim] = w_shape[0]
+    outimg = np.zeros(out_shape)
+    if dim == 0:
+        for i_img in range(in_shape[1]):
+            for i_w in range(w_shape[0]):
+                w = weights[i_w, :]
+                ind = indices[i_w, :]
+                im_slice = inimg[ind, i_img].astype(np.float64)
+                outimg[i_w, i_img] = np.sum(np.multiply(np.squeeze(im_slice, axis=0), w.T), axis=0)
+    elif dim == 1:
+        for i_img in range(in_shape[0]):
+            for i_w in range(w_shape[0]):
+                w = weights[i_w, :]
+                ind = indices[i_w, :]
+                im_slice = inimg[i_img, ind].astype(np.float64)
+                outimg[i_img, i_w] = np.sum(np.multiply(np.squeeze(im_slice, axis=0), w.T), axis=0)
+    if inimg.dtype == np.uint8:
+        outimg = np.clip(outimg, 0, 255)
+        return np.around(outimg).astype(np.uint8)
+    else:
+        return outimg
+def imresizevec(inimg, weights, indices, dim):
+    wshape = weights.shape
+    if dim == 0:
+        weights = weights.reshape((wshape[0], wshape[2], 1, 1))
+        outimg = np.sum(weights * ((inimg[indices].squeeze(axis=1)).astype(np.float64)), axis=1)
+    elif dim == 1:
+        weights = weights.reshape((1, wshape[0], wshape[2], 1))
+        outimg = np.sum(weights * ((inimg[:, indices].squeeze(axis=2)).astype(np.float64)), axis=2)
+    if inimg.dtype == np.uint8:
+        outimg = np.clip(outimg, 0, 255)
+        return np.around(outimg).astype(np.uint8)
+    else:
+        return outimg
+def resizeAlongDim(A, dim, weights, indices, mode="vec"):
+    if mode == "org":
+        out = imresizemex(A, weights, indices, dim)
+    else:
+        out = imresizevec(A, weights, indices, dim)
+    return out
+def imresize(I, scale=None, method='bicubic', sizes=None, mode="vec"):
+    if method == 'bicubic':
+        kernel = cubic
+    elif method == 'bilinear':
+        kernel = triangle
+    else:
+        print('Error: Unidentified method supplied')
+    kernel_width = 4.0
+    # Fill scale and output_size
+    if scale is not None:
+        scale = float(scale)
+        scale = [scale, scale]
+        output_size = deriveSizeFromScale(I.shape, scale)
+    elif sizes is not None:
+        scale = deriveScaleFromSize(I.shape, sizes)
+        output_size = list(sizes)
+    else:
+        print('Error: scalar_scale OR output_shape should be defined!')
+        return
+    scale_np = np.array(scale)
+    order = np.argsort(scale_np)
+    weights = []
+    indices = []
+    for k in range(2):
+        w, ind = contributions(I.shape[k], output_size[k], scale[k], kernel, kernel_width)
+        weights.append(w)
+        indices.append(ind)
+    B = np.copy(I)
+    flag2D = False
+    if B.ndim == 2:
+        B = np.expand_dims(B, axis=2)
+        flag2D = True
+    for k in range(2):
+        dim = order[k]
+        B = resizeAlongDim(B, dim, weights[dim], indices[dim], mode)
+    if flag2D:
+        B = np.squeeze(B, axis=2)
+    return B
+def convertDouble2Byte(I):
+    B = np.clip(I, 0.0, 1.0)
+    B = 255 * B
+    return np.around(B).astype(np.uint8)

sam_diffsr/utils_sr/plt_img.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import math
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+from torchvision.utils import make_grid
+def tensor2img(tensor, out_type=np.uint8, min_max=(0, 1)):
+    """Convert torch Tensors into image numpy arrays.
+    After clamping to (min, max), image values will be normalized to [0, 1].
+    For different tensor shapes, this function will have different behaviors:
+        1. 4D mini-batch Tensor of shape (N x 3/1 x H x W):
+            Use `make_grid` to stitch images in the batch dimension, and then
+            convert it to numpy array.
+        2. 3D Tensor of shape (3/1 x H x W) and 2D Tensor of shape (H x W):
+            Directly change to numpy array.
+    Note that the image channel in input tensors should be RGB order. This
+    function will convert it to cv2 convention, i.e., (H x W x C) with BGR
+    order.
+    Args:
+        tensor (Tensor | list[Tensor]): Input tensors.
+        out_type (numpy type): Output types. If ``np.uint8``, transform outputs
+            to uint8 type with range [0, 255]; otherwise, float type with
+            range [0, 1]. Default: ``np.uint8``.
+        min_max (tuple): min and max values for clamp.
+    Returns:
+        (Tensor | list[Tensor]): 3D ndarray of shape (H x W x C) or 2D ndarray
+        of shape (H x W).
+    """
+    if not (torch.is_tensor(tensor) or (isinstance(tensor, list) and all(torch.is_tensor(t) for t in tensor))):
+        raise TypeError(
+                f'tensor or list of tensors expected, got {type(tensor)}')
+    if torch.is_tensor(tensor):
+        tensor = [tensor]
+    result = []
+    for _tensor in tensor:
+        # Squeeze two times so that:
+        # 1. (1, 1, h, w) -> (h, w) or
+        # 3. (1, 3, h, w) -> (3, h, w) or
+        # 2. (n>1, 3/1, h, w) -> (n>1, 3/1, h, w)
+        _tensor = _tensor.squeeze(0).squeeze(0)
+        _tensor = _tensor.float().detach().cpu().clamp_(*min_max)
+        _tensor = (_tensor - min_max[0]) / (min_max[1] - min_max[0])
+        n_dim = _tensor.dim()
+        if n_dim == 4:
+            img_np = make_grid(
+                    _tensor, nrow=int(math.sqrt(_tensor.size(0))),
+                    normalize=False).numpy()
+            img_np = np.transpose(img_np[[2, 1, 0], :, :], (1, 2, 0))
+        elif n_dim == 3:
+            img_np = _tensor.numpy()
+            img_np = np.transpose(img_np[[2, 1, 0], :, :], (1, 2, 0))
+        elif n_dim == 2:
+            img_np = _tensor.numpy()
+        else:
+            raise ValueError('Only support 4D, 3D or 2D tensor. '
+                             f'But received with dimension: {n_dim}')
+        if out_type == np.uint8:
+            # Unlike MATLAB, numpy.unit8() WILL NOT round by default.
+            img_np = (img_np * 255.0).round()
+        img_np = img_np.astype(out_type)
+        result.append(img_np)
+    result = result[0] if len(result) == 1 else result
+    return result
+def plt_tensor_img(tensor, save_path=None):
+    plt.imshow(tensor2img(tensor))
+    plt.show()
+    if save_path:
+        plt.savefig(save_path)
+def plt_tensor_img_one(tensor, t_dim=1):
+    if isinstance(tensor, list):
+        tensor = torch.cat(tensor, dim=t_dim)
+    nums = tensor.shape[t_dim]
+    mash = math.ceil(math.sqrt(nums))
+    plt.figure(dpi=300)
+    plt_range = min(nums, mash ** 2)
+    for i in range(plt_range):
+        plt.subplot(mash, mash, i + 1)
+        if t_dim == 1:
+            img = tensor2img(tensor[:, i, ...])
+        elif t_dim == 0:
+            img = tensor2img(tensor[i, ...])
+        plt.imshow(img)
+        plt.xticks([])
+        plt.yticks([])
+    plt.subplots_adjust(wspace=0, hspace=0)
+    plt.tight_layout()
+    plt.show()
+def plt_img(img, save_path=None):
+    plt.imshow(img)
+    plt.show()
+    if save_path:
+        plt.savefig(save_path)

sam_diffsr/utils_sr/sr_utils.py ADDED Viewed

	@@ -0,0 +1,171 @@

+import torch
+import torch.nn.functional as F
+import torchvision
+from torch.autograd import Variable
+import numpy as np
+from math import exp
+import torch.nn as nn
+class ImgMerger:
+    def __init__(self, eval_fn):
+        self.eval_fn = eval_fn
+        self.loc2imgs = {}
+        self.max_x = 0
+        self.max_y = 0
+        self.clear()
+    def clear(self):
+        self.loc2imgs = {}
+        self.max_x = 0
+        self.max_y = 0
+    def push(self, imgs, loc, loc_bdr):
+        """
+        Args:
+            imgs: each of img is [C, H, W] np.array, range: [0, 255]
+            loc: string, e.g., 0_0, 0_1 ...
+        """
+        self.max_x, self.max_y = loc_bdr
+        x, y = loc
+        self.loc2imgs[f'{x},{y}'] = imgs
+        if len(self.loc2imgs) == self.max_x * self.max_y:
+            return self.compute()
+    def compute(self):
+        img_inputs = []
+        for i in range(len(self.loc2imgs['0,0'])):
+            img_full = []
+            for x in range(self.max_x):
+                imgx = []
+                for y in range(self.max_y):
+                    imgx.append(self.loc2imgs[f'{x},{y}'][i])
+                img_full.append(np.concatenate(imgx, 2))
+            img_inputs.append(np.concatenate(img_full, 1))
+        self.clear()
+        return self.eval_fn(*img_inputs)
+##########
+# SSIM
+##########
+def gaussian(window_size, sigma):
+    gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
+    return gauss / gauss.sum()
+def create_window(window_size, channel):
+    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+    window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
+    return window
+def _ssim(img1, img2, window, window_size, channel, size_average=True):
+    mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
+    mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
+    mu1_sq = mu1.pow(2)
+    mu2_sq = mu2.pow(2)
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
+    sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
+    sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
+    C1 = 0.01 ** 2
+    C2 = 0.03 ** 2
+    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
+    if size_average:
+        return ssim_map.mean()
+    else:
+        return ssim_map.mean(1).mean(1).mean(1)
+class SSIM(torch.nn.Module):
+    def __init__(self, window_size=11, size_average=True):
+        super(SSIM, self).__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.channel = 1
+        self.window = create_window(window_size, self.channel)
+    def forward(self, img1, img2):
+        img1 = img1 * 0.5 + 0.5
+        img2 = img2 * 0.5 + 0.5
+        (_, channel, _, _) = img1.size()
+        if channel == self.channel and self.window.data.type() == img1.data.type():
+            window = self.window
+        else:
+            window = create_window(self.window_size, channel)
+            if img1.is_cuda:
+                window = window.cuda(img1.get_device())
+            window = window.type_as(img1)
+            self.window = window
+            self.channel = channel
+        return _ssim(img1, img2, window, self.window_size, channel, self.size_average)
+def ssim(img1, img2, window_size=11, size_average=True):
+    (_, channel, _, _) = img1.size()
+    window = create_window(window_size, channel)
+    if img1.is_cuda:
+        window = window.cuda(img1.get_device())
+    window = window.type_as(img1)
+    return _ssim(img1, img2, window, window_size, channel, size_average)
+class VGGFeatureExtractor(nn.Module):
+    def __init__(self, feature_layer=34, use_bn=False, use_input_norm=True):
+        super(VGGFeatureExtractor, self).__init__()
+        self.use_input_norm = use_input_norm
+        if use_bn:
+            model = torchvision.models.vgg19_bn(pretrained=True)
+        else:
+            model = torchvision.models.vgg19(pretrained=True)
+        if self.use_input_norm:
+            mean = torch.Tensor([0.485 - 1, 0.456 - 1, 0.406 - 1]).view(1, 3, 1, 1)
+            # mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
+            # [0.485 - 1, 0.456 - 1, 0.406 - 1] if input in range [-1, 1]
+            std = torch.Tensor([0.229 * 2, 0.224 * 2, 0.225 * 2]).view(1, 3, 1, 1)
+            # std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
+            # [0.229 * 2, 0.224 * 2, 0.225 * 2] if input in range [-1, 1]
+            self.register_buffer('mean', mean)
+            self.register_buffer('std', std)
+        self.features = nn.Sequential(*list(model.features.children())[:(feature_layer + 1)])
+        # No need to BP to variable
+        for k, v in self.features.named_parameters():
+            v.requires_grad = False
+    def forward(self, x):
+        # Assume input range is [0, 1]
+        if self.use_input_norm:
+            x = (x - self.mean) / self.std
+        output = self.features(x)
+        return output
+class PerceptualLoss(nn.Module):
+    def __init__(self):
+        super(PerceptualLoss, self).__init__()
+        loss_network = VGGFeatureExtractor()
+        for param in loss_network.parameters():
+            param.requires_grad = False
+        self.loss_network = loss_network
+        self.l1_loss = nn.L1Loss()
+    def forward(self, high_resolution, fake_high_resolution):
+        if next(self.loss_network.parameters()).device != high_resolution.device:
+            self.loss_network.to(high_resolution.device)
+            self.loss_network.eval()
+        perception_loss = self.l1_loss(self.loss_network(high_resolution), self.loss_network(fake_high_resolution))
+        return perception_loss

sam_diffsr/utils_sr/utils.py ADDED Viewed

	@@ -0,0 +1,269 @@

+import glob
+import os
+import re
+import subprocess
+from collections import OrderedDict
+import lpips
+import numpy as np
+import torch
+import torch.distributed as dist
+from skimage.metrics import peak_signal_noise_ratio as psnr
+from skimage.metrics import structural_similarity as ssim
+from .matlab_resize import imresize
+def reduce_tensors(metrics):
+    new_metrics = {}
+    for k, v in metrics.items():
+        if isinstance(v, torch.Tensor):
+            dist.all_reduce(v)
+            v = v / dist.get_world_size()
+        if type(v) is dict:
+            v = reduce_tensors(v)
+        new_metrics[k] = v
+    return new_metrics
+def tensors_to_scalars(tensors):
+    if isinstance(tensors, torch.Tensor):
+        tensors = tensors.item()
+        return tensors
+    elif isinstance(tensors, dict):
+        new_tensors = {}
+        for k, v in tensors.items():
+            v = tensors_to_scalars(v)
+            new_tensors[k] = v
+        return new_tensors
+    elif isinstance(tensors, list):
+        return [tensors_to_scalars(v) for v in tensors]
+    else:
+        return tensors
+def tensors_to_np(tensors):
+    if isinstance(tensors, dict):
+        new_np = {}
+        for k, v in tensors.items():
+            if isinstance(v, torch.Tensor):
+                v = v.cpu().numpy()
+            if type(v) is dict:
+                v = tensors_to_np(v)
+            new_np[k] = v
+    elif isinstance(tensors, list):
+        new_np = []
+        for v in tensors:
+            if isinstance(v, torch.Tensor):
+                v = v.cpu().numpy()
+            if type(v) is dict:
+                v = tensors_to_np(v)
+            new_np.append(v)
+    elif isinstance(tensors, torch.Tensor):
+        v = tensors
+        if isinstance(v, torch.Tensor):
+            v = v.cpu().numpy()
+        if type(v) is dict:
+            v = tensors_to_np(v)
+        new_np = v
+    else:
+        raise Exception(f'tensors_to_np does not support type {type(tensors)}.')
+    return new_np
+def move_to_cpu(tensors):
+    ret = {}
+    for k, v in tensors.items():
+        if isinstance(v, torch.Tensor):
+            v = v.cpu()
+        if type(v) is dict:
+            v = move_to_cpu(v)
+        ret[k] = v
+    return ret
+def move_to_cuda(batch, gpu_id=0):
+    # base case: object can be directly moved using `cuda` or `to`
+    if callable(getattr(batch, 'cuda', None)):
+        return batch.cuda(gpu_id, non_blocking=True)
+    elif callable(getattr(batch, 'to', None)):
+        return batch.to(torch.device('cuda', gpu_id), non_blocking=True)
+    elif isinstance(batch, list):
+        for i, x in enumerate(batch):
+            batch[i] = move_to_cuda(x, gpu_id)
+        return batch
+    elif isinstance(batch, tuple):
+        batch = list(batch)
+        for i, x in enumerate(batch):
+            batch[i] = move_to_cuda(x, gpu_id)
+        return tuple(batch)
+    elif isinstance(batch, dict):
+        for k, v in batch.items():
+            batch[k] = move_to_cuda(v, gpu_id)
+        return batch
+    return batch
+def get_last_checkpoint(work_dir, steps=None):
+    checkpoint = None
+    last_ckpt_path = None
+    ckpt_paths = get_all_ckpts(work_dir, steps)
+    if len(ckpt_paths) > 0:
+        last_ckpt_path = ckpt_paths[0]
+        checkpoint = torch.load(last_ckpt_path, map_location='cpu')
+    return checkpoint, last_ckpt_path
+def get_all_ckpts(work_dir, steps=None):
+    if steps is None:
+        ckpt_path_pattern = f'{work_dir}/model_ckpt_steps_*.ckpt'
+    else:
+        ckpt_path_pattern = f'{work_dir}/model_ckpt_steps_{steps}.ckpt'
+    return sorted(glob.glob(ckpt_path_pattern),
+                  key=lambda x: -int(re.findall('.*steps\_(\d+)\.ckpt', x)[0]))
+def load_checkpoint(model, optimizer, work_dir, steps=None):
+    checkpoint, last_ckpt_path = get_last_checkpoint(work_dir, steps)
+    print(f'loding check from: {last_ckpt_path}')
+    if checkpoint is not None:
+        stat_dict = checkpoint['state_dict']['model']
+        new_state_dict = OrderedDict()
+        for k, v in stat_dict.items():
+            if k[:7] == 'module.':
+                k = k[7:]  # 去掉 `module.`
+            new_state_dict[k] = v
+        model.load_state_dict(new_state_dict)
+        model.cuda()
+        optimizer.load_state_dict(checkpoint['optimizer_states'][0])
+        training_step = checkpoint['global_step']
+        del checkpoint
+        torch.cuda.empty_cache()
+    else:
+        training_step = 0
+        model.cuda()
+    return training_step
+def save_checkpoint(model, optimizer, work_dir, global_step, num_ckpt_keep):
+    ckpt_path = f'{work_dir}/model_ckpt_steps_{global_step}.ckpt'
+    print(f'Step@{global_step}: saving model to {ckpt_path}')
+    checkpoint = {'global_step': global_step}
+    optimizer_states = []
+    optimizer_states.append(optimizer.state_dict())
+    checkpoint['optimizer_states'] = optimizer_states
+    checkpoint['state_dict'] = {'model': model.state_dict()}
+    torch.save(checkpoint, ckpt_path, _use_new_zipfile_serialization=False)
+    for old_ckpt in get_all_ckpts(work_dir)[num_ckpt_keep:]:
+        remove_file(old_ckpt)
+        print(f'Delete ckpt: {os.path.basename(old_ckpt)}')
+def remove_file(*fns):
+    for f in fns:
+        subprocess.check_call(f'rm -rf "{f}"', shell=True)
+def plot_img(img):
+    img = img.data.cpu().numpy()
+    return np.clip(img, 0, 1)
+def load_ckpt(cur_model, ckpt_base_dir, model_name='model', force=True, strict=True):
+    if os.path.isfile(ckpt_base_dir):
+        base_dir = os.path.dirname(ckpt_base_dir)
+        ckpt_path = ckpt_base_dir
+        checkpoint = torch.load(ckpt_base_dir, map_location='cpu')
+    else:
+        base_dir = ckpt_base_dir
+        checkpoint, ckpt_path = get_last_checkpoint(ckpt_base_dir)
+    if checkpoint is not None:
+        state_dict = checkpoint["state_dict"]
+        if len([k for k in state_dict.keys() if '.' in k]) > 0:
+            state_dict = {k[len(model_name) + 1:]: v for k, v in state_dict.items()
+                          if k.startswith(f'{model_name}.')}
+        else:
+            state_dict = state_dict[model_name]
+        if not strict:
+            cur_model_state_dict = cur_model.state_dict()
+            unmatched_keys = []
+            for key, param in state_dict.items():
+                if key in cur_model_state_dict:
+                    new_param = cur_model_state_dict[key]
+                    if new_param.shape != param.shape:
+                        unmatched_keys.append(key)
+                        print("| Unmatched keys: ", key, new_param.shape, param.shape)
+            for key in unmatched_keys:
+                del state_dict[key]
+        cur_model.load_state_dict(state_dict, strict=strict)
+        print(f"| load '{model_name}' from '{ckpt_path}'.")
+    else:
+        e_msg = f"| ckpt not found in {base_dir}."
+        if force:
+            assert False, e_msg
+        else:
+            print(e_msg)
+class Measure:
+    def __init__(self, net='alex'):
+        self.model = lpips.LPIPS(net=net)
+    def measure(self, imgA, imgB, img_lr, sr_scale):
+        """
+        Args:
+            imgA: [C, H, W] uint8 or torch.FloatTensor [-1,1]
+            imgB: [C, H, W] uint8 or torch.FloatTensor [-1,1]
+            img_lr: [C, H, W] uint8  or torch.FloatTensor [-1,1]
+            sr_scale:
+        Returns: dict of metrics
+        """
+        if isinstance(imgA, torch.Tensor):
+            imgA = np.round((imgA.cpu().numpy() + 1) * 127.5).clip(min=0, max=255).astype(np.uint8)
+            imgB = np.round((imgB.cpu().numpy() + 1) * 127.5).clip(min=0, max=255).astype(np.uint8)
+            img_lr = np.round((img_lr.cpu().numpy() + 1) * 127.5).clip(min=0, max=255).astype(np.uint8)
+        imgA = imgA.transpose(1, 2, 0)
+        imgA_lr = imresize(imgA, 1 / sr_scale)
+        imgB = imgB.transpose(1, 2, 0)
+        img_lr = img_lr.transpose(1, 2, 0)
+        psnr = self.psnr(imgA, imgB)
+        ssim = self.ssim(imgA, imgB)
+        lpips = self.lpips(imgA, imgB)
+        lr_psnr = self.psnr(imgA_lr, img_lr)
+        res = {'psnr': psnr, 'ssim': ssim, 'lpips': lpips, 'lr_psnr': lr_psnr}
+        return {k: float(v) for k, v in res.items()}
+    def lpips(self, imgA, imgB, model=None):
+        device = next(self.model.parameters()).device
+        tA = t(imgA).to(device)
+        tB = t(imgB).to(device)
+        dist01 = self.model.forward(tA, tB).item()
+        return dist01
+    def ssim(self, imgA, imgB):
+        score, diff = ssim(imgA, imgB, full=True, channel_axis=2, data_range=255)
+        return score
+    def psnr(self, imgA, imgB):
+        return psnr(imgA, imgB, data_range=255)
+def t(img):
+    def to_4d(img):
+        assert len(img.shape) == 3
+        img_new = np.expand_dims(img, axis=0)
+        assert len(img_new.shape) == 4
+        return img_new
+    def to_CHW(img):
+        return np.transpose(img, [2, 0, 1])
+    def to_tensor(img):
+        return torch.Tensor(img)
+    return to_tensor(to_4d(to_CHW(img))) / 127.5 - 1

sam_diffsr/weight/model_ckpt_steps_400000.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab89ee4160be868422459918eb69880042dc12544b1bf7807aa479c7eb329e55
+size 204945145