Spaces:

furonghuang-lab
/

Erasing-Invisible-Demo

Running

App Files Files Community

mcding commited on about 7 hours ago

Commit

23c4bb7

•

1 Parent(s): 32afe8e

fix lfs issue

Browse files

Files changed (30) hide show

app.py +137 -49
attacked_image.png +0 -0
kit/__init__.py +1 -1
kit/metrics/__init__.py +0 -2
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_bigg_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_h_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_l_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_bigg_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_h_14.pth +3 -0
kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_l_14.pth +3 -0
kit/metrics/lpips/weights/v0.0/alex.pth +3 -0
kit/metrics/lpips/weights/v0.0/squeeze.pth +3 -0
kit/metrics/lpips/weights/v0.0/vgg.pth +3 -0
kit/metrics/lpips/weights/v0.1/alex.pth +3 -0
kit/metrics/lpips/weights/v0.1/squeeze.pth +3 -0
kit/metrics/lpips/weights/v0.1/vgg.pth +3 -0
kit/metrics/perceptual.py +1 -30
kit/metrics/watson/__init__.py +0 -4
kit/metrics/watson/color_wrapper.py +0 -103
kit/metrics/watson/dct2d.py +0 -105
kit/metrics/watson/deep_loss.py +0 -307
kit/metrics/watson/loss_provider.py +0 -180
kit/metrics/watson/rfft2d.py +0 -87
kit/metrics/watson/shift_wrapper.py +0 -51
kit/metrics/watson/ssim.py +0 -95
kit/metrics/watson/watson.py +0 -123
kit/metrics/watson/watson_fft.py +0 -139
kit/metrics/watson/watson_vgg.py +0 -202
kit/models/stable_signature.onnx +3 -0
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,11 +1,8 @@
 import os
 import gradio as gr
 import numpy as np
 import json
 import redis
-from PIL import Image
-import time
 import plotly.graph_objects as go
 from datetime import datetime
 from kit import compute_performance, compute_quality
@@ -13,6 +10,35 @@ import dotenv
 dotenv.load_dotenv()
 # Connect to Redis
 redis_client = redis.Redis(
@@ -39,7 +65,10 @@ def get_submissions_from_redis():
     return [json.loads(submission) for submission in submissions]
-def update_leaderboard(submissions):
     names = [sub["name"] for sub in submissions]
     performances = [float(sub["performance"]) for sub in submissions]
     qualities = [float(sub["quality"]) for sub in submissions]
@@ -47,16 +76,26 @@ def update_leaderboard(submissions):
     # Create scatter plot
     fig = go.Figure()
-    fig.add_trace(
-        go.Scatter(
-            x=qualities,
-            y=performances,
-            mode="markers+text",
-            text=names,
-            textposition="top center",
-            name="Submissions",
         )
-    )
     # Add circles
     circle_radii = np.linspace(0, 1, 5)
@@ -76,13 +115,17 @@ def update_leaderboard(submissions):
     # Update layout
     fig.update_layout(
-        title="Submissions Leaderboard",
-        xaxis_title="Quality",
-        yaxis_title="Performance",
-        xaxis=dict(range=[0, 1]),
-        yaxis=dict(range=[0, 1]),
-        width=600,
-        height=600,
     )
     return fig
@@ -91,17 +134,15 @@ def update_leaderboard(submissions):
 def process_submission(name, image):
     original_image = Image.open("./image.png")
     progress = gr.Progress()
-    progress(0, desc="Processing")
-    time.sleep(0.5)
-    progress(0.1, desc="Decoding")
     performance = compute_performance(image)
-    progress(0.6, desc="Computing metric")
     quality = compute_quality(image, original_image)
-    progress(0.9, desc="Saving results")
     save_to_redis(name, performance, quality)
     submissions = get_submissions_from_redis()
-    leaderboard_plot = update_leaderboard(submissions)
     # Calculate rank
     distances = [
@@ -111,13 +152,15 @@ def process_submission(name, image):
     rank = (
         sorted(distances, reverse=True).index(np.sqrt(quality**2 + performance**2)) + 1
     )
-    progress(1.0, desc="Complete")
-    return leaderboard_plot, rank, name, performance, quality
-def download_image():
-    return "./image.png"
 def upload_and_evaluate(name, image):
@@ -129,61 +172,105 @@ def upload_and_evaluate(name, image):
 def create_interface():
-    with gr.Blocks() as demo:
         gr.Markdown(
             """
-            # Erasing the Invisible -- NeurIPS24 Watermark Removal Challenge Demo
             """
         )
-        with gr.Tabs() as tabs:
             with gr.Tab("Original Watermarked Image", id="download"):
                 with gr.Column():
                     original_image = gr.Image(
                         value="./image.png",
                         label="Original Watermarked Image",
                         show_label=True,
                         height=512,
                         show_download_button=False,
                         show_share_button=False,
                         show_fullscreen_button=False,
                     )
                     with gr.Row():
-                        gr.DownloadButton(
-                            "Download Watermarked Image", value="./image.png", scale=3
                         )
-                        submit_btn = gr.Button("Submit Your Removal", scale=3)
-            with gr.Tab("Submit Watermark Removed Image", id="submit"):
                 with gr.Column():
                     uploaded_image = gr.Image(
                         label="Your Watermark Removed Image",
                         show_label=True,
                         height=512,
                         type="pil",
                         show_download_button=False,
                         show_share_button=False,
                         show_fullscreen_button=False,
                     )
                     with gr.Row():
                         name_input = gr.Textbox(
-                            label="Your Name",
                         )
                         upload_btn = gr.Button("Upload and Evaluate")
-            with gr.Tab("Evaluation Results and Your Ranking", id="leaderboard"):
                 with gr.Column():
                     leaderboard_plot = gr.Plot(
-                        label="Evalution Results", show_label=True
                     )
                     with gr.Row():
-                        rank_output = gr.Number(label="Your Ranking")
                         name_output = gr.Textbox(label="Your Name")
-                        performance_output = gr.Number(
-                            label="Watermark Performance Score (lower is better)"
                         )
-                        quality_output = gr.Number(
-                            label="Quality Degredation Score (lower is better)"
                         )
         submit_btn.click(lambda: gr.Tabs(selected="submit"), None, tabs)
@@ -197,13 +284,14 @@ def create_interface():
                 name_output,
                 performance_output,
                 quality_output,
             ],
         )
         demo.load(
             lambda: [
                 gr.Image(value="./image.png", height=512, width=512),
-                gr.Plot(update_leaderboard(get_submissions_from_redis())),
             ],
             outputs=[original_image, leaderboard_plot],
         )

 import os
 import gradio as gr
 import numpy as np
 import json
 import redis
 import plotly.graph_objects as go
 from datetime import datetime
 from kit import compute_performance, compute_quality
 dotenv.load_dotenv()
+CSS = """
+.tabs button{
+    font-size: 24px;
+}
+#download_btn {
+    height: 91.6px;
+}
+#submit_btn {
+    height: 91.6px;
+}
+#original_image {
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+}
+#uploaded_image {
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+}
+#leaderboard_plot {
+    display: block;
+    margin-left: auto;
+    margin-right: auto;
+    width: 512px;  /* Adjust width as needed */
+    height: 512px;  /* Adjust height as needed */
+}
+"""
 # Connect to Redis
 redis_client = redis.Redis(
     return [json.loads(submission) for submission in submissions]
+def update_plot(
+    submissions,
+    current_name=None,
+):
     names = [sub["name"] for sub in submissions]
     performances = [float(sub["performance"]) for sub in submissions]
     qualities = [float(sub["quality"]) for sub in submissions]
     # Create scatter plot
     fig = go.Figure()
+    for name, quality, performance in zip(names, qualities, performances):
+        if name == current_name:
+            marker = dict(symbol="star", size=15, color="blue")
+        elif name.startswith("Baseline: "):
+            marker = dict(symbol="square", size=10, color="grey")
+        else:
+            marker = dict(symbol="circle", size=10, color="green")
+        fig.add_trace(
+            go.Scatter(
+                x=[quality],
+                y=[performance],
+                mode="markers+text",
+                text=[name],
+                textposition="top center",
+                name=name,
+                marker=marker,
+                hovertemplate=f"{'Name: ' + name if not name.startswith('Baseline: ') else name}<br>(Performance, Quality) = ({performance:.3f}, {quality:.3f})",
+            )
         )
     # Add circles
     circle_radii = np.linspace(0, 1, 5)
     # Update layout
     fig.update_layout(
+        xaxis_title="Image Quality Degredation",
+        yaxis_title="Watermark Detection Performance",
+        xaxis=dict(
+            range=[0, 1.1], titlefont=dict(size=16)  # Adjust this value as needed
+        ),
+        yaxis=dict(
+            range=[0, 1.1], titlefont=dict(size=16)  # Adjust this value as needed
+        ),
+        width=512,
+        height=512,
+        showlegend=False,  # Remove legend
     )
     return fig
 def process_submission(name, image):
     original_image = Image.open("./image.png")
     progress = gr.Progress()
+    progress(0, desc="Detecting Watermark")
     performance = compute_performance(image)
+    progress(0.4, desc="Evaluating Image Quality")
     quality = compute_quality(image, original_image)
+    progress(1.0, desc="Uploading Results")
     save_to_redis(name, performance, quality)
     submissions = get_submissions_from_redis()
+    leaderboard_plot = update_plot(submissions, current_name=name)
     # Calculate rank
     distances = [
     rank = (
         sorted(distances, reverse=True).index(np.sqrt(quality**2 + performance**2)) + 1
     )
+    gr.Info(f"You ranked {rank} out of {len(submissions)}!")
+    return (
+        leaderboard_plot,
+        f"{rank} out of {len(submissions)}",
+        name,
+        f"{performance:.3f}",
+        f"{quality:.3f}",
+        f"{np.sqrt(quality**2 + performance**2):.3f}",
+    )
 def upload_and_evaluate(name, image):
 def create_interface():
+    with gr.Blocks(css=CSS) as demo:
         gr.Markdown(
             """
+            # Erasing the Invisible Demo
+            TODO: Improve title and add description, add icon.jpg, also improve configs in README.md
             """
         )
+        with gr.Tabs(elem_classes=["tabs"]) as tabs:
             with gr.Tab("Original Watermarked Image", id="download"):
+                gr.Markdown(
+                    """
+                    TODO: Add descriptions
+                    """
+                )
                 with gr.Column():
                     original_image = gr.Image(
                         value="./image.png",
+                        format="png",
                         label="Original Watermarked Image",
                         show_label=True,
                         height=512,
+                        width=512,
+                        type="filepath",
                         show_download_button=False,
                         show_share_button=False,
                         show_fullscreen_button=False,
+                        container=True,
+                        elem_id="original_image",
                     )
                     with gr.Row():
+                        download_btn = gr.DownloadButton(
+                            "Download Watermarked Image",
+                            value="./image.png",
+                            elem_id="download_btn",
+                        )
+                        submit_btn = gr.Button(
+                            "Submit Your Removal", elem_id="submit_btn"
                         )
+            with gr.Tab(
+                "Submit Watermark Removed Image",
+                id="submit",
+                elem_classes="gr-tab-header",
+            ):
+                gr.Markdown(
+                    """
+                    TODO: Add descriptions
+                    """
+                )
                 with gr.Column():
                     uploaded_image = gr.Image(
                         label="Your Watermark Removed Image",
+                        format="png",
                         show_label=True,
                         height=512,
+                        width=512,
+                        sources=["upload"],
                         type="pil",
                         show_download_button=False,
                         show_share_button=False,
                         show_fullscreen_button=False,
+                        container=True,
+                        placeholder="Upload your watermark removed image",
+                        elem_id="uploaded_image",
                     )
                     with gr.Row():
                         name_input = gr.Textbox(
+                            label="Your Name", placeholder="Anonymous"
                         )
                         upload_btn = gr.Button("Upload and Evaluate")
+            with gr.Tab(
+                "Evaluation Results and Your Ranking",
+                id="leaderboard",
+                elem_classes="gr-tab-header",
+            ):
+                gr.Markdown(
+                    """
+                    TODO: Add descriptions
+                    """
+                )
                 with gr.Column():
                     leaderboard_plot = gr.Plot(
+                        value=update_plot(get_submissions_from_redis()),
+                        show_label=False,
+                        elem_id="leaderboard_plot",
                     )
                     with gr.Row():
+                        rank_output = gr.Textbox(label="Your Ranking")
                         name_output = gr.Textbox(label="Your Name")
+                        performance_output = gr.Textbox(
+                            label="Watermark Performance (lower is better)"
+                        )
+                        quality_output = gr.Textbox(
+                            label="Quality Degredation (lower is better)"
                         )
+                        overall_output = gr.Textbox(
+                            label="Overall Score (lower is better)"
                         )
         submit_btn.click(lambda: gr.Tabs(selected="submit"), None, tabs)
                 name_output,
                 performance_output,
                 quality_output,
+                overall_output,
             ],
         )
         demo.load(
             lambda: [
                 gr.Image(value="./image.png", height=512, width=512),
+                gr.Plot(update_plot(get_submissions_from_redis())),
             ],
             outputs=[original_image, leaderboard_plot],
         )

attacked_image.png DELETED Viewed

Binary file (155 kB)

kit/__init__.py CHANGED Viewed

@@ -81,7 +81,7 @@ def compute_quality(attacked_image, clean_image, quiet=True):
     # Compress the image
     buffer = io.BytesIO()
-    attacked_image.save(buffer, format="JPEG", quality=90)
     buffer.seek(0)
     # Update attacked_image with the compressed version

     # Compress the image
     buffer = io.BytesIO()
+    attacked_image.save(buffer, format="JPEG", quality=95)
     buffer.seek(0)
     # Update attacked_image with the compressed version

kit/metrics/__init__.py CHANGED Viewed

@@ -13,9 +13,7 @@ from .image import (
 from .perceptual import (
     load_perceptual_models,
     compute_lpips,
-    compute_watson,
     compute_lpips_repeated,
-    compute_watson_repeated,
     compute_perceptual_metric_repeated,
 )
 from .aesthetics import (

 from .perceptual import (
     load_perceptual_models,
     compute_lpips,
     compute_lpips_repeated,
     compute_perceptual_metric_repeated,
 )
 from .aesthetics import (

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_bigg_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39a5d014670226d52c408e0dfec840b7626d80a73d003a6a144caafd5e02d031
+size 19423219

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_h_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dc48a8a2315cfdbc7bb8278be55f645e8a995e1a2fa234baec5eb41c4d33e070
+size 17850319

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_artifacts_openclip_vit_l_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4a9481fdbce5ff02b252bcb25109b9f3b29841289fadf7e79e884d59f9357d5
+size 16801743

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_bigg_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19b016304f54ae866e27f1eb498c0861f704958e7c37693adc5ce094e63904a8
+size 19423099

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_h_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03603eee1864c2e5e97ef7079229609653db5b10594ca8b1de9e541d838cae9c
+size 17850199

kit/metrics/aesthetics_scorer/weights/aesthetics_scorer_rating_openclip_vit_l_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb7fe561369ab6c7dad34b9316a56d2c6070582f0323656148e1107a242cd666
+size 16801623

kit/metrics/lpips/weights/v0.0/alex.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18720f55913d0af89042f13faa7e536a6ce1444a0914e6db9461355ece1e8cd5
+size 5455

kit/metrics/lpips/weights/v0.0/squeeze.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c27abd3a0145541baa50990817df58d3759c3f8154949f42af3b59b4e042d0bf
+size 10057

kit/metrics/lpips/weights/v0.0/vgg.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9e4236260c3dd988fc79d2a48d645d885afcbb21f9fd595e6744cf7419b582c
+size 6735

kit/metrics/lpips/weights/v0.1/alex.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df73285e35b22355a2df87cdb6b70b343713b667eddbda73e1977e0c860835c0
+size 6009

kit/metrics/lpips/weights/v0.1/squeeze.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a5350f23600cb79923ce65bb07cbf57dca461329894153e05a1346bd531cf76
+size 10811

kit/metrics/lpips/weights/v0.1/vgg.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a78928a0af1e5f0fcb1f3b9e8f8c3a2a5a3de244d830ad5c1feddc79b8432868
+size 7289

kit/metrics/perceptual.py CHANGED Viewed

@@ -2,7 +2,6 @@ import torch
 from PIL import Image
 from torchvision import transforms
 from .lpips import LPIPS
-from .watson import LossProvider
 # Normalize image tensors
@@ -33,19 +32,10 @@ def to_tensor(images, norm_type="naive"):
 def load_perceptual_models(metric_name, mode, device=torch.device("cuda")):
-    assert metric_name in ["lpips", "watson"]
     if metric_name == "lpips":
         assert mode in ["vgg", "alex"]
         perceptual_model = LPIPS(net=mode).to(device)
-    elif metric_name == "watson":
-        assert mode in ["vgg", "dft", "fft"]
-        perceptual_model = (
-            LossProvider()
-            .get_loss_function(
-                "Watson-" + mode, colorspace="RGB", pretrained=True, reduction="none"
-            )
-            .to(device)
-        )
     else:
         assert False
     return perceptual_model
@@ -65,12 +55,6 @@ def compute_lpips(image1, image2, mode="alex", device=torch.device("cuda")):
     return compute_metric(image1, image2, perceptual_model, device)
-# Compute Watson distance between two images
-def compute_watson(image1, image2, mode="dft", device=torch.device("cuda")):
-    perceptual_model = load_perceptual_models("watson", mode, device)
-    return compute_metric(image1, image2, perceptual_model, device)
 # Compute metrics between pairs of images
 def compute_perceptual_metric_repeated(
     images1,
@@ -107,16 +91,3 @@ def compute_lpips_repeated(
     return compute_perceptual_metric_repeated(
         images1, images2, "lpips", mode, model, device
     )
-# Compute Watson distance between pairs of images
-def compute_watson_repeated(
-    images1,
-    images2,
-    mode="dft",
-    model=None,
-    device=torch.device("cuda"),
-):
-    return compute_perceptual_metric_repeated(
-        images1, images2, "watson", mode, model, device
-    )

 from PIL import Image
 from torchvision import transforms
 from .lpips import LPIPS
 # Normalize image tensors
 def load_perceptual_models(metric_name, mode, device=torch.device("cuda")):
+    assert metric_name in ["lpips"]
     if metric_name == "lpips":
         assert mode in ["vgg", "alex"]
         perceptual_model = LPIPS(net=mode).to(device)
     else:
         assert False
     return perceptual_model
     return compute_metric(image1, image2, perceptual_model, device)
 # Compute metrics between pairs of images
 def compute_perceptual_metric_repeated(
     images1,
     return compute_perceptual_metric_repeated(
         images1, images2, "lpips", mode, model, device
     )

kit/metrics/watson/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-"""
-From https://github.com/facebookresearch/stable_signature
-"""
-from .loss_provider import LossProvider

kit/metrics/watson/color_wrapper.py DELETED Viewed

@@ -1,103 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-class RGB2YCbCr(nn.Module):
-    def __init__(self):
-        super().__init__()
-        transf = torch.tensor(
-            [[0.299, 0.587, 0.114], [-0.1687, -0.3313, 0.5], [0.5, -0.4187, -0.0813]]
-        ).transpose(0, 1)
-        self.transform = nn.Parameter(transf, requires_grad=False)
-        bias = torch.tensor([0, 0.5, 0.5])
-        self.bias = nn.Parameter(bias, requires_grad=False)
-    def forward(self, rgb):
-        N, C, H, W = rgb.shape
-        assert C == 3
-        rgb = rgb.transpose(1, 3)
-        cbcr = torch.matmul(rgb, self.transform)
-        cbcr += self.bias
-        return cbcr.transpose(1, 3)
-class ColorWrapper(nn.Module):
-    """
-    Extension for single-channel loss to work on color images
-    """
-    def __init__(self, lossclass, args, kwargs, trainable=False):
-        """
-        Parameters:
-        lossclass: class of the individual loss functions
-        trainable: bool, if True parameters of the loss are trained.
-        args: tuple, arguments for instantiation of loss fun
-        kwargs: dict, key word arguments for instantiation of loss fun
-        """
-        super().__init__()
-        # submodules
-        self.add_module("to_YCbCr", RGB2YCbCr())
-        self.add_module("ly", lossclass(*args, **kwargs))
-        self.add_module("lcb", lossclass(*args, **kwargs))
-        self.add_module("lcr", lossclass(*args, **kwargs))
-        # weights
-        self.w_tild = nn.Parameter(torch.zeros(3), requires_grad=trainable)
-    @property
-    def w(self):
-        return F.softmax(self.w_tild, dim=0)
-    def forward(self, input, target):
-        # convert color space
-        input = self.to_YCbCr(input)
-        target = self.to_YCbCr(target)
-        ly = self.ly(input[:, [0], :, :], target[:, [0], :, :])
-        lcb = self.lcb(input[:, [1], :, :], target[:, [1], :, :])
-        lcr = self.lcr(input[:, [2], :, :], target[:, [2], :, :])
-        w = self.w
-        return ly * w[0] + lcb * w[1] + lcr * w[2]
-class GreyscaleWrapper(nn.Module):
-    """
-    Maps 3 channel RGB or 1 channel greyscale input to 3 greyscale channels
-    """
-    def __init__(self, lossclass, args, kwargs):
-        """
-        Parameters:
-        lossclass: class of the individual loss function
-        args: tuple, arguments for instantiation of loss fun
-        kwargs: dict, key word arguments for instantiation of loss fun
-        """
-        super().__init__()
-        # submodules
-        self.add_module("loss", lossclass(*args, **kwargs))
-    def to_greyscale(self, tensor):
-        return (
-            tensor[:, [0], :, :] * 0.3
-            + tensor[:, [1], :, :] * 0.59
-            + tensor[:, [2], :, :] * 0.11
-        )
-    def forward(self, input, target):
-        (N, C, X, Y) = input.size()
-        if N == 3:
-            # convert input to greyscale
-            input = self.to_greyscale(input)
-            target = self.to_greyscale(target)
-        # input in now greyscale, expand to 3 channels
-        input = input.expand(N, 3, X, Y)
-        target = target.expand(N, 3, X, Y)
-        return self.loss.forward(input, target)

kit/metrics/watson/dct2d.py DELETED Viewed

@@ -1,105 +0,0 @@
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-class Dct2d(nn.Module):
-    """
-    Blockwhise 2D DCT
-    """
-    def __init__(self, blocksize=8, interleaving=False):
-        """
-        Parameters:
-        blocksize: int, size of the Blocks for discrete cosine transform
-        interleaving: bool, should the blocks interleave?
-        """
-        super().__init__()  # call super constructor
-        self.blocksize = blocksize
-        self.interleaving = interleaving
-        if interleaving:
-            self.stride = self.blocksize // 2
-        else:
-            self.stride = self.blocksize
-        # precompute DCT weight matrix
-        A = np.zeros((blocksize, blocksize))
-        for i in range(blocksize):
-            c_i = 1 / np.sqrt(2) if i == 0 else 1.0
-            for n in range(blocksize):
-                A[i, n] = (
-                    np.sqrt(2 / blocksize)
-                    * c_i
-                    * np.cos((2 * n + 1) / (blocksize * 2) * i * np.pi)
-                )
-        # set up conv layer
-        self.A = nn.Parameter(torch.tensor(A, dtype=torch.float32), requires_grad=False)
-        self.unfold = torch.nn.Unfold(
-            kernel_size=blocksize, padding=0, stride=self.stride
-        )
-        return
-    def forward(self, x):
-        """
-        performs 2D blockwhise DCT
-        Parameters:
-        x: tensor of dimension (N, 1, h, w)
-        Return:
-        tensor of dimension (N, k, blocksize, blocksize)
-        where the 2nd dimension indexes the block. Dimensions 3 and 4 are the block DCT coefficients
-        """
-        (N, C, H, W) = x.shape
-        assert C == 1, "DCT is only implemented for a single channel"
-        assert H >= self.blocksize, "Input too small for blocksize"
-        assert W >= self.blocksize, "Input too small for blocksize"
-        assert (H % self.stride == 0) and (
-            W % self.stride == 0
-        ), "FFT is only for dimensions divisible by the blocksize"
-        # unfold to blocks
-        x = self.unfold(x)
-        # now shape (N, blocksize**2, k)
-        (N, _, k) = x.shape
-        x = x.view(-1, self.blocksize, self.blocksize, k).permute(0, 3, 1, 2)
-        # now shape (N, #k, blocksize, blocksize)
-        # perform DCT
-        coeff = self.A.matmul(x).matmul(self.A.transpose(0, 1))
-        return coeff
-    def inverse(self, coeff, output_shape):
-        """
-        performs 2D blockwhise iDCT
-        Parameters:
-        coeff: tensor of dimension (N, k, blocksize, blocksize)
-        where the 2nd dimension indexes the block. Dimensions 3 and 4 are the block DCT coefficients
-        output_shape: (h, w) dimensions of the reconstructed image
-        Return:
-        tensor of dimension (N, 1, h, w)
-        """
-        if self.interleaving:
-            raise Exception(
-                "Inverse block DCT is not implemented for interleaving blocks!"
-            )
-        # perform iDCT
-        x = self.A.transpose(0, 1).matmul(coeff).matmul(self.A)
-        (N, k, _, _) = x.shape
-        x = x.permute(0, 2, 3, 1).view(-1, self.blocksize**2, k)
-        x = F.fold(
-            x,
-            output_size=(output_shape[-2], output_shape[-1]),
-            kernel_size=self.blocksize,
-            padding=0,
-            stride=self.blocksize,
-        )
-        return x

kit/metrics/watson/deep_loss.py DELETED Viewed

@@ -1,307 +0,0 @@
-# Deeploss function from Zhang et al. (2018)
-import torch
-import torch.nn as nn
-from torchvision import models
-from collections import namedtuple
-class NetLinLayer(nn.Module):
-    """A single linear layer which does a 1x1 conv"""
-    def __init__(self, chn_in, chn_out=1, use_dropout=False):
-        super(NetLinLayer, self).__init__()
-        layers = (
-            [
-                nn.Dropout(),
-            ]
-            if (use_dropout)
-            else [
-                nn.Dropout(p=0.0),
-            ]
-        )
-        layers += [
-            nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False),
-        ]
-        self.model = nn.Sequential(*layers)
-def normalize_tensor(in_feat, eps=1e-10):
-    # norm_factor = torch.sqrt(torch.sum(in_feat**2,dim=1)).view(in_feat.size()[0],1,in_feat.size()[2],in_feat.size()[3]).repeat(1,in_feat.size()[1],1,1)
-    norm_factor = torch.sqrt(torch.sum(in_feat**2, dim=1)).view(
-        in_feat.size()[0], 1, in_feat.size()[2], in_feat.size()[3]
-    )
-    return in_feat / (norm_factor.expand_as(in_feat) + eps)
-class vgg16(torch.nn.Module):
-    def __init__(self, requires_grad=False, pretrained=True):
-        super(vgg16, self).__init__()
-        vgg_pretrained_features = models.vgg16(pretrained=pretrained).features
-        self.slice1 = torch.nn.Sequential()
-        self.slice2 = torch.nn.Sequential()
-        self.slice3 = torch.nn.Sequential()
-        self.slice4 = torch.nn.Sequential()
-        self.slice5 = torch.nn.Sequential()
-        self.N_slices = 5
-        for x in range(4):
-            self.slice1.add_module(str(x), vgg_pretrained_features[x])
-        for x in range(4, 9):
-            self.slice2.add_module(str(x), vgg_pretrained_features[x])
-        for x in range(9, 16):
-            self.slice3.add_module(str(x), vgg_pretrained_features[x])
-        for x in range(16, 23):
-            self.slice4.add_module(str(x), vgg_pretrained_features[x])
-        for x in range(23, 30):
-            self.slice5.add_module(str(x), vgg_pretrained_features[x])
-        if not requires_grad:
-            for param in self.parameters():
-                param.requires_grad = False
-    def forward(self, X):
-        h = self.slice1(X)
-        h_relu1_2 = h
-        h = self.slice2(h)
-        h_relu2_2 = h
-        h = self.slice3(h)
-        h_relu3_3 = h
-        h = self.slice4(h)
-        h_relu4_3 = h
-        h = self.slice5(h)
-        h_relu5_3 = h
-        vgg_outputs = namedtuple(
-            "VggOutputs", ["relu1_2", "relu2_2", "relu3_3", "relu4_3", "relu5_3"]
-        )
-        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
-        return out
-class squeezenet(torch.nn.Module):
-    def __init__(self, requires_grad=False, pretrained=True):
-        super(squeezenet, self).__init__()
-        pretrained_features = models.squeezenet1_1(pretrained=pretrained).features
-        self.slice1 = torch.nn.Sequential()
-        self.slice2 = torch.nn.Sequential()
-        self.slice3 = torch.nn.Sequential()
-        self.slice4 = torch.nn.Sequential()
-        self.slice5 = torch.nn.Sequential()
-        self.slice6 = torch.nn.Sequential()
-        self.slice7 = torch.nn.Sequential()
-        self.N_slices = 7
-        for x in range(2):
-            self.slice1.add_module(str(x), pretrained_features[x])
-        for x in range(2, 5):
-            self.slice2.add_module(str(x), pretrained_features[x])
-        for x in range(5, 8):
-            self.slice3.add_module(str(x), pretrained_features[x])
-        for x in range(8, 10):
-            self.slice4.add_module(str(x), pretrained_features[x])
-        for x in range(10, 11):
-            self.slice5.add_module(str(x), pretrained_features[x])
-        for x in range(11, 12):
-            self.slice6.add_module(str(x), pretrained_features[x])
-        for x in range(12, 13):
-            self.slice7.add_module(str(x), pretrained_features[x])
-        if not requires_grad:
-            for param in self.parameters():
-                param.requires_grad = False
-    def forward(self, X):
-        h = self.slice1(X)
-        h_relu1 = h
-        h = self.slice2(h)
-        h_relu2 = h
-        h = self.slice3(h)
-        h_relu3 = h
-        h = self.slice4(h)
-        h_relu4 = h
-        h = self.slice5(h)
-        h_relu5 = h
-        h = self.slice6(h)
-        h_relu6 = h
-        h = self.slice7(h)
-        h_relu7 = h
-        vgg_outputs = namedtuple(
-            "SqueezeOutputs",
-            ["relu1", "relu2", "relu3", "relu4", "relu5", "relu6", "relu7"],
-        )
-        out = vgg_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5, h_relu6, h_relu7)
-        return out
-class alexnet(torch.nn.Module):
-    def __init__(self, requires_grad=False, pretrained=True):
-        super(alexnet, self).__init__()
-        alexnet_pretrained_features = models.alexnet(pretrained=pretrained).features
-        self.slice1 = torch.nn.Sequential()
-        self.slice2 = torch.nn.Sequential()
-        self.slice3 = torch.nn.Sequential()
-        self.slice4 = torch.nn.Sequential()
-        self.slice5 = torch.nn.Sequential()
-        self.N_slices = 5
-        for x in range(2):
-            self.slice1.add_module(str(x), alexnet_pretrained_features[x])
-        for x in range(2, 5):
-            self.slice2.add_module(str(x), alexnet_pretrained_features[x])
-        for x in range(5, 8):
-            self.slice3.add_module(str(x), alexnet_pretrained_features[x])
-        for x in range(8, 10):
-            self.slice4.add_module(str(x), alexnet_pretrained_features[x])
-        for x in range(10, 12):
-            self.slice5.add_module(str(x), alexnet_pretrained_features[x])
-        if not requires_grad:
-            for param in self.parameters():
-                param.requires_grad = False
-    def forward(self, X):
-        h = self.slice1(X)
-        h_relu1 = h
-        h = self.slice2(h)
-        h_relu2 = h
-        h = self.slice3(h)
-        h_relu3 = h
-        h = self.slice4(h)
-        h_relu4 = h
-        h = self.slice5(h)
-        h_relu5 = h
-        alexnet_outputs = namedtuple(
-            "AlexnetOutputs", ["relu1", "relu2", "relu3", "relu4", "relu5"]
-        )
-        out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
-        return out
-class PNetLin(nn.Module):
-    def __init__(
-        self,
-        pnet_type="vgg",
-        pnet_rand=False,
-        pnet_tune=False,
-        use_dropout=True,
-        use_gpu=True,
-        spatial=False,
-        version="0.1",
-        colorspace="RGB",
-        reduction="none",
-    ):
-        super(PNetLin, self).__init__()
-        self.use_gpu = use_gpu
-        self.pnet_type = pnet_type
-        self.pnet_tune = pnet_tune
-        self.pnet_rand = pnet_rand
-        self.spatial = spatial
-        self.version = version
-        self.colorspace = colorspace
-        self.reduction = reduction
-        if self.pnet_type in ["vgg", "vgg16"]:
-            net_type = vgg16
-            self.chns = [64, 128, 256, 512, 512]
-        elif self.pnet_type == "alex":
-            net_type = alexnet
-            self.chns = [64, 192, 384, 256, 256]
-        elif self.pnet_type == "squeeze":
-            net_type = squeezenet
-            self.chns = [64, 128, 256, 384, 384, 512, 512]
-        if self.pnet_tune:
-            self.net = net_type(pretrained=not self.pnet_rand, requires_grad=True)
-        else:
-            self.net = [
-                net_type(pretrained=not self.pnet_rand, requires_grad=False),
-            ]
-        self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
-        self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
-        self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
-        self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
-        self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
-        self.lins = [self.lin0, self.lin1, self.lin2, self.lin3, self.lin4]
-        if self.pnet_type == "squeeze":  # 7 layers for squeezenet
-            self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout)
-            self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout)
-            self.lins += [self.lin5, self.lin6]
-        self.shift = torch.autograd.Variable(
-            torch.Tensor([-0.030, -0.088, -0.188]).view(1, 3, 1, 1)
-        )
-        self.scale = torch.autograd.Variable(
-            torch.Tensor([0.458, 0.448, 0.450]).view(1, 3, 1, 1)
-        )
-        if use_gpu:
-            if self.pnet_tune:
-                self.net.cuda()
-            else:
-                self.net[0].cuda()
-            self.shift = self.shift.cuda()
-            self.scale = self.scale.cuda()
-            self.lin0.cuda()
-            self.lin1.cuda()
-            self.lin2.cuda()
-            self.lin3.cuda()
-            self.lin4.cuda()
-            if self.pnet_type == "squeeze":
-                self.lin5.cuda()
-                self.lin6.cuda()
-    def forward(self, in0, in1):
-        in0_sc = (in0 - self.shift.expand_as(in0)) / self.scale.expand_as(in0)
-        in1_sc = (in1 - self.shift.expand_as(in0)) / self.scale.expand_as(in0)
-        if self.colorspace == "Gray":
-            in0_sc = util.tensor2tensorGrayscaleLazy(in0_sc)
-            in1_sc = util.tensor2tensorGrayscaleLazy(in1_sc)
-        if self.version == "0.0":
-            # v0.0 - original release had a bug, where input was not scaled
-            in0_input = in0
-            in1_input = in1
-        else:
-            # v0.1
-            in0_input = in0_sc
-            in1_input = in1_sc
-        if self.pnet_tune:
-            outs0 = self.net.forward(in0_input)
-            outs1 = self.net.forward(in1_input)
-        else:
-            outs0 = self.net[0].forward(in0_input)
-            outs1 = self.net[0].forward(in1_input)
-        feats0 = {}
-        feats1 = {}
-        diffs = [0] * len(outs0)
-        for kk, out0 in enumerate(outs0):
-            feats0[kk] = normalize_tensor(outs0[kk])  # norm NN outputs
-            feats1[kk] = normalize_tensor(outs1[kk])
-            diffs[kk] = (feats0[kk] - feats1[kk]) ** 2  # squared diff
-        if self.spatial:
-            lin_models = [self.lin0, self.lin1, self.lin2, self.lin3, self.lin4]
-            if self.pnet_type == "squeeze":
-                lin_models.extend([self.lin5, self.lin6])
-            res = [lin_models[kk].model(diffs[kk]) for kk in range(len(diffs))]
-            return res
-        val = torch.mean(
-            torch.mean(self.lin0.model(diffs[0]), dim=3), dim=2
-        )  # sum means over H, W
-        val = val + torch.mean(torch.mean(self.lin1.model(diffs[1]), dim=3), dim=2)
-        val = val + torch.mean(torch.mean(self.lin2.model(diffs[2]), dim=3), dim=2)
-        val = val + torch.mean(torch.mean(self.lin3.model(diffs[3]), dim=3), dim=2)
-        val = val + torch.mean(torch.mean(self.lin4.model(diffs[4]), dim=3), dim=2)
-        if self.pnet_type == "squeeze":
-            val = val + torch.mean(torch.mean(self.lin5.model(diffs[5]), dim=3), dim=2)
-            val = val + torch.mean(torch.mean(self.lin6.model(diffs[6]), dim=3), dim=2)
-        val = val.view(val.size()[0], val.size()[1], 1, 1)
-        if self.reduction == "sum":
-            val = torch.sum(val)
-        return val

kit/metrics/watson/loss_provider.py DELETED Viewed

@@ -1,180 +0,0 @@
-import torch
-import torch.nn as nn
-import os
-import warnings
-from .color_wrapper import ColorWrapper, GreyscaleWrapper
-from .shift_wrapper import ShiftWrapper
-from .watson import WatsonDistance
-from .watson_fft import WatsonDistanceFft
-from .watson_vgg import WatsonDistanceVgg
-from .deep_loss import PNetLin
-from .ssim import SSIM
-class LossProvider:
-    def __init__(self):
-        self.loss_functions = [
-            "L1",
-            "L2",
-            "SSIM",
-            "Watson-dct",
-            "Watson-fft",
-            "Watson-vgg",
-            "Deeploss-vgg",
-            "Deeploss-squeeze",
-            "Adaptive",
-        ]
-        self.color_models = ["LA", "RGB"]
-    def load_state_dict(self, filename):
-        current_dir = os.path.dirname(__file__)
-        path = os.path.join(current_dir, "weights", filename)
-        return torch.load(path, map_location="cpu")
-    def get_loss_function(
-        self,
-        model,
-        colorspace="RGB",
-        reduction="sum",
-        deterministic=False,
-        pretrained=True,
-        image_size=None,
-    ):
-        """
-        returns a trained loss class.
-        model: one of the values returned by self.loss_functions
-        colorspace: 'LA' or 'RGB'
-        deterministic: bool, if false (default) uses shifting of image blocks for watson-fft
-        image_size: tuple, size of input images. Only required for adaptive loss. Eg: [3, 64, 64]
-        """
-        warnings.filterwarnings("ignore")
-        is_greyscale = colorspace in ["grey", "Grey", "LA", "greyscale", "grey-scale"]
-        if model.lower() in ["l2"]:
-            loss = nn.MSELoss(reduction=reduction)
-        elif model.lower() in ["l1"]:
-            loss = nn.L1Loss(reduction=reduction)
-        elif model.lower() in ["ssim"]:
-            loss = SSIM(size_average=(reduction in ["sum", "mean"]))
-        elif model.lower() in ["watson", "watson-dct"]:
-            if is_greyscale:
-                if deterministic:
-                    loss = WatsonDistance(reduction=reduction)
-                    if pretrained:
-                        loss.load_state_dict(
-                            self.load_state_dict("gray_watson_dct_trial0.pth")
-                        )
-                else:
-                    loss = ShiftWrapper(WatsonDistance, (), {"reduction": reduction})
-                    if pretrained:
-                        loss.loss.load_state_dict(
-                            self.load_state_dict("gray_watson_dct_trial0.pth")
-                        )
-            else:
-                if deterministic:
-                    loss = ColorWrapper(WatsonDistance, (), {"reduction": reduction})
-                    if pretrained:
-                        loss.load_state_dict(
-                            self.load_state_dict("rgb_watson_dct_trial0.pth")
-                        )
-                else:
-                    loss = ShiftWrapper(
-                        ColorWrapper, (WatsonDistance, (), {"reduction": reduction}), {}
-                    )
-                    if pretrained:
-                        loss.loss.load_state_dict(
-                            self.load_state_dict("rgb_watson_dct_trial0.pth")
-                        )
-        elif model.lower() in ["watson-fft", "watson-dft"]:
-            if is_greyscale:
-                if deterministic:
-                    loss = WatsonDistanceFft(reduction=reduction)
-                    if pretrained:
-                        loss.load_state_dict(
-                            self.load_state_dict("gray_watson_fft_trial0.pth")
-                        )
-                else:
-                    loss = ShiftWrapper(WatsonDistanceFft, (), {"reduction": reduction})
-                    if pretrained:
-                        loss.loss.load_state_dict(
-                            self.load_state_dict("gray_watson_fft_trial0.pth")
-                        )
-            else:
-                if deterministic:
-                    loss = ColorWrapper(WatsonDistanceFft, (), {"reduction": reduction})
-                    if pretrained:
-                        loss.load_state_dict(
-                            self.load_state_dict("rgb_watson_fft_trial0.pth")
-                        )
-                else:
-                    loss = ShiftWrapper(
-                        ColorWrapper,
-                        (WatsonDistanceFft, (), {"reduction": reduction}),
-                        {},
-                    )
-                    if pretrained:
-                        loss.loss.load_state_dict(
-                            self.load_state_dict("rgb_watson_fft_trial0.pth")
-                        )
-        elif model.lower() in ["watson-vgg", "watson-deep"]:
-            if is_greyscale:
-                loss = GreyscaleWrapper(WatsonDistanceVgg, (), {"reduction": reduction})
-                if pretrained:
-                    loss.loss.load_state_dict(
-                        self.load_state_dict("gray_watson_vgg_trial0.pth")
-                    )
-            else:
-                loss = WatsonDistanceVgg(reduction=reduction)
-                if pretrained:
-                    loss.load_state_dict(
-                        self.load_state_dict("rgb_watson_vgg_trial0.pth")
-                    )
-        elif model.lower() in ["deeploss-vgg"]:
-            if is_greyscale:
-                loss = GreyscaleWrapper(
-                    PNetLin,
-                    (),
-                    {"pnet_type": "vgg", "reduction": reduction, "use_dropout": False},
-                )
-                if pretrained:
-                    loss.loss.load_state_dict(
-                        self.load_state_dict("gray_pnet_lin_vgg_trial0.pth")
-                    )
-            else:
-                loss = PNetLin(pnet_type="vgg", reduction=reduction, use_dropout=False)
-                if pretrained:
-                    loss.load_state_dict(
-                        self.load_state_dict("rgb_pnet_lin_vgg_trial0.pth")
-                    )
-        elif model.lower() in ["deeploss-squeeze"]:
-            if is_greyscale:
-                loss = GreyscaleWrapper(
-                    PNetLin,
-                    (),
-                    {
-                        "pnet_type": "squeeze",
-                        "reduction": reduction,
-                        "use_dropout": False,
-                    },
-                )
-                if pretrained:
-                    loss.loss.load_state_dict(
-                        self.load_state_dict("gray_pnet_lin_squeeze_trial0.pth")
-                    )
-            else:
-                loss = PNetLin(
-                    pnet_type="squeeze", reduction=reduction, use_dropout=False
-                )
-                if pretrained:
-                    loss.load_state_dict(
-                        self.load_state_dict("rgb_pnet_lin_squeeze_trial0.pth")
-                    )
-        else:
-            raise Exception('Metric "{}" not implemented'.format(model))
-        # freeze all training of the loss functions
-        if pretrained:
-            for param in loss.parameters():
-                param.requires_grad = False
-        return loss

kit/metrics/watson/rfft2d.py DELETED Viewed

@@ -1,87 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.fft as fft
-import torch.nn.functional as F
-class Rfft2d(nn.Module):
-    """
-    Blockwhise 2D FFT
-    for fixed blocksize of 8x8
-    """
-    def __init__(self, blocksize=8, interleaving=False):
-        """
-        Parameters:
-        """
-        super().__init__()  # call super constructor
-        self.blocksize = blocksize
-        self.interleaving = interleaving
-        if interleaving:
-            self.stride = self.blocksize // 2
-        else:
-            self.stride = self.blocksize
-        self.unfold = torch.nn.Unfold(
-            kernel_size=self.blocksize, padding=0, stride=self.stride
-        )
-        return
-    def forward(self, x):
-        """
-        performs 2D blockwhise DCT
-        Parameters:
-        x: tensor of dimension (N, 1, h, w)
-        Return:
-        tensor of dimension (N, k, b, b/2, 2)
-        where the 2nd dimension indexes the block. Dimensions 3 and 4 are the block real FFT coefficients.
-        The last dimension is pytorches representation of complex values
-        """
-        (N, C, H, W) = x.shape
-        assert C == 1, "FFT is only implemented for a single channel"
-        assert H >= self.blocksize, "Input too small for blocksize"
-        assert W >= self.blocksize, "Input too small for blocksize"
-        assert (H % self.stride == 0) and (
-            W % self.stride == 0
-        ), "FFT is only for dimensions divisible by the blocksize"
-        # unfold to blocks
-        x = self.unfold(x)
-        # now shape (N, 64, k)
-        (N, _, k) = x.shape
-        x = x.view(-1, self.blocksize, self.blocksize, k).permute(0, 3, 1, 2)
-        # now shape (N, #k, b, b)
-        # perform DCT
-        coeff = fft.rfft(x)
-        coeff = torch.view_as_real(coeff)
-        return coeff / self.blocksize**2
-    def inverse(self, coeff, output_shape):
-        """
-        performs 2D blockwhise inverse rFFT
-        Parameters:
-        output_shape: Tuple, dimensions of the outpus sample
-        """
-        if self.interleaving:
-            raise Exception(
-                "Inverse block FFT is not implemented for interleaving blocks!"
-            )
-        # perform iRFFT
-        x = fft.irfft(coeff, dim=2, signal_sizes=(self.blocksize, self.blocksize))
-        (N, k, _, _) = x.shape
-        x = x.permute(0, 2, 3, 1).view(-1, self.blocksize**2, k)
-        x = F.fold(
-            x,
-            output_size=(output_shape[-2], output_shape[-1]),
-            kernel_size=self.blocksize,
-            padding=0,
-            stride=self.blocksize,
-        )
-        return x * (self.blocksize**2)

kit/metrics/watson/shift_wrapper.py DELETED Viewed

@@ -1,51 +0,0 @@
-import torch.nn as nn
-import numpy as np
-class ShiftWrapper(nn.Module):
-    """
-    Extension for 2-dimensional inout loss functions.
-    Shifts the inputs by up to 4 pixels. Uses replication padding.
-    """
-    def __init__(self, lossclass, args, kwargs):
-        """
-        Parameters:
-        lossclass: class of the individual loss functions
-        trainable: bool, if True parameters of the loss are trained.
-        args: tuple, arguments for instantiation of loss fun
-        kwargs: dict, key word arguments for instantiation of loss fun
-        """
-        super().__init__()
-        # submodules
-        self.add_module("loss", lossclass(*args, **kwargs))
-        # shift amount
-        self.max_shift = 8
-        # padding
-        self.pad = nn.ReplicationPad2d(self.max_shift // 2)
-    def forward(self, input, target):
-        # convert color space
-        input = self.pad(input)
-        target = self.pad(target)
-        shift_x = np.random.randint(self.max_shift)
-        shift_y = np.random.randint(self.max_shift)
-        input = input[
-            :,
-            :,
-            shift_x : -(self.max_shift - shift_x),
-            shift_y : -(self.max_shift - shift_y),
-        ]
-        target = target[
-            :,
-            :,
-            shift_x : -(self.max_shift - shift_x),
-            shift_y : -(self.max_shift - shift_y),
-        ]
-        return self.loss(input, target)

kit/metrics/watson/ssim.py DELETED Viewed

@@ -1,95 +0,0 @@
-# SSIM implementation from https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/pytorch_ssim/__init__.py
-import torch
-import torch.nn.functional as F
-from torch.autograd import Variable
-from math import exp
-def gaussian(window_size, sigma):
-    gauss = torch.Tensor(
-        [
-            exp(-((x - window_size // 2) ** 2) / float(2 * sigma**2))
-            for x in range(window_size)
-        ]
-    )
-    return gauss / gauss.sum()
-def create_window(window_size, channel):
-    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
-    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
-    window = Variable(
-        _2D_window.expand(channel, 1, window_size, window_size).contiguous()
-    )
-    return window
-def _ssim(img1, img2, window, window_size, channel, size_average=True):
-    mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
-    mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
-    mu1_sq = mu1.pow(2)
-    mu2_sq = mu2.pow(2)
-    mu1_mu2 = mu1 * mu2
-    sigma1_sq = (
-        F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
-    )
-    sigma2_sq = (
-        F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
-    )
-    sigma12 = (
-        F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel)
-        - mu1_mu2
-    )
-    C1 = 0.01**2
-    C2 = 0.03**2
-    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / (
-        (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)
-    )
-    if size_average:
-        return ssim_map.mean()
-    else:
-        return ssim_map.mean(1).mean(1).mean(1)
-class SSIM(torch.nn.Module):
-    def __init__(self, window_size=11, size_average=True):
-        super(SSIM, self).__init__()
-        self.window_size = window_size
-        self.size_average = size_average
-        self.channel = 1
-        self.window = create_window(window_size, self.channel)
-    def forward(self, img1, img2):
-        (_, channel, _, _) = img1.size()
-        if channel == self.channel and self.window.data.type() == img1.data.type():
-            window = self.window
-        else:
-            window = create_window(self.window_size, channel)
-            if img1.is_cuda:
-                window = window.cuda(img1.get_device())
-            window = window.type_as(img1)
-            self.window = window
-            self.channel = channel
-        return 1 - _ssim(
-            img1, img2, window, self.window_size, channel, self.size_average
-        )
-def ssim(img1, img2, window_size=11, size_average=True):
-    (_, channel, _, _) = img1.size()
-    window = create_window(window_size, channel)
-    if img1.is_cuda:
-        window = window.cuda(img1.get_device())
-    window = window.type_as(img1)
-    return _ssim(img1, img2, window, window_size, channel, size_average)

kit/metrics/watson/watson.py DELETED Viewed

@@ -1,123 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from .dct2d import Dct2d
-EPS = 1e-10
-def softmax(a, b, factor=1):
-    concat = torch.cat([a.unsqueeze(-1), b.unsqueeze(-1)], dim=-1)
-    softmax_factors = F.softmax(concat * factor, dim=-1)
-    return a * softmax_factors[:, :, :, :, 0] + b * softmax_factors[:, :, :, :, 1]
-class WatsonDistance(nn.Module):
-    """
-    Loss function based on Watsons perceptual distance.
-    Based on DCT quantization
-    """
-    def __init__(self, blocksize=8, trainable=False, reduction="sum"):
-        """
-        Parameters:
-        blocksize: int, size of the Blocks for discrete cosine transform
-        trainable: bool, if True parameters of the loss are trained and dropout is enabled.
-        reduction: 'sum' or 'none', determines return format
-        """
-        super().__init__()
-        # input mapping
-        blocksize = torch.as_tensor(blocksize)
-        # module to perform 2D blockwise DCT
-        self.add_module("dct", Dct2d(blocksize=blocksize.item(), interleaving=False))
-        # parameters, initialized with values from watson paper
-        self.blocksize = nn.Parameter(blocksize, requires_grad=False)
-        if self.blocksize == 8:
-            # init with Jpeg QM
-            self.t_tild = nn.Parameter(
-                torch.log(
-                    torch.tensor(  # log-scaled weights
-                        [
-                            [1.40, 1.01, 1.16, 1.66, 2.40, 3.43, 4.79, 6.56],
-                            [1.01, 1.45, 1.32, 1.52, 2.00, 2.71, 3.67, 4.93],
-                            [1.16, 1.32, 2.24, 2.59, 2.98, 3.64, 4.60, 5.88],
-                            [1.66, 1.52, 2.59, 3.77, 4.55, 5.30, 6.28, 7.60],
-                            [2.40, 2.00, 2.98, 4.55, 6.15, 7.46, 8.71, 10.17],
-                            [3.43, 2.71, 3.64, 5.30, 7.46, 9.62, 11.58, 13.51],
-                            [4.79, 3.67, 4.60, 6.28, 8.71, 11.58, 14.50, 17.29],
-                            [6.56, 4.93, 5.88, 7.60, 10.17, 13.51, 17.29, 21.15],
-                        ]
-                    )
-                ),
-                requires_grad=trainable,
-            )
-        else:
-            # init with uniform QM
-            self.t_tild = nn.Parameter(
-                torch.zeros((self.blocksize, self.blocksize)), requires_grad=trainable
-            )
-        # other default parameters
-        self.alpha = nn.Parameter(
-            torch.tensor(0.649), requires_grad=trainable
-        )  # luminance masking
-        w = torch.tensor(0.7)  # contrast masking
-        self.w_tild = nn.Parameter(
-            torch.log(w / (1 - w)), requires_grad=trainable
-        )  # inverse of sigmoid
-        self.beta = nn.Parameter(torch.tensor(4.0), requires_grad=trainable)  # pooling
-        # dropout for training
-        self.dropout = nn.Dropout(0.5 if trainable else 0)
-        # reduction
-        self.reduction = reduction
-        if reduction not in ["sum", "none"]:
-            raise Exception(
-                'Reduction "{}" not supported. Valid values are: "sum", "none".'.format(
-                    reduction
-                )
-            )
-    @property
-    def t(self):
-        # returns QM
-        qm = torch.exp(self.t_tild)
-        return qm
-    @property
-    def w(self):
-        # return luminance masking parameter
-        return torch.sigmoid(self.w_tild)
-    def forward(self, input, target):
-        # dct
-        c0 = self.dct(target)
-        c1 = self.dct(input)
-        N, K, B, B = c0.shape
-        # luminance masking
-        avg_lum = torch.mean(c0[:, :, 0, 0])
-        t_l = self.t.view(1, 1, B, B).expand(N, K, B, B)
-        t_l = t_l * (((c0[:, :, 0, 0] + EPS) / (avg_lum + EPS)) ** self.alpha).view(
-            N, K, 1, 1
-        )
-        # contrast masking
-        s = softmax(t_l, (c0.abs() + EPS) ** self.w * t_l ** (1 - self.w))
-        # pooling
-        watson_dist = (((c0 - c1) / s).abs() + EPS) ** self.beta
-        watson_dist = self.dropout(watson_dist) + EPS
-        watson_dist = torch.sum(watson_dist, dim=(1, 2, 3))
-        watson_dist = watson_dist ** (1 / self.beta)
-        # reduction
-        if self.reduction == "sum":
-            watson_dist = torch.sum(watson_dist)
-        return watson_dist

kit/metrics/watson/watson_fft.py DELETED Viewed

@@ -1,139 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from .rfft2d import Rfft2d
-EPS = 1e-10
-def softmax(a, b, factor=1):
-    concat = torch.cat([a.unsqueeze(-1), b.unsqueeze(-1)], dim=-1)
-    softmax_factors = F.softmax(concat * factor, dim=-1)
-    return a * softmax_factors[:, :, :, :, 0] + b * softmax_factors[:, :, :, :, 1]
-class WatsonDistanceFft(nn.Module):
-    """
-    Loss function based on Watsons perceptual distance.
-    Based on FFT quantization
-    """
-    def __init__(self, blocksize=8, trainable=False, reduction="sum"):
-        """
-        Parameters:
-        blocksize: int, size of the Blocks for discrete cosine transform
-        trainable: bool, if True parameters of the loss are trained and dropout is enabled.
-        reduction: 'sum' or 'none', determines return format
-        """
-        super().__init__()
-        self.trainable = trainable
-        # input mapping
-        blocksize = torch.as_tensor(blocksize)
-        # module to perform 2D blockwise rFFT
-        self.add_module("fft", Rfft2d(blocksize=blocksize.item(), interleaving=False))
-        # parameters
-        self.weight_size = (blocksize, blocksize // 2 + 1)
-        self.blocksize = nn.Parameter(blocksize, requires_grad=False)
-        # init with uniform QM
-        self.t_tild = nn.Parameter(
-            torch.zeros(self.weight_size), requires_grad=trainable
-        )
-        self.alpha = nn.Parameter(
-            torch.tensor(0.1), requires_grad=trainable
-        )  # luminance masking
-        w = torch.tensor(0.2)  # contrast masking
-        self.w_tild = nn.Parameter(
-            torch.log(w / (1 - w)), requires_grad=trainable
-        )  # inverse of sigmoid
-        self.beta = nn.Parameter(torch.tensor(1.0), requires_grad=trainable)  # pooling
-        # phase weights
-        self.w_phase_tild = nn.Parameter(
-            torch.zeros(self.weight_size) - 2.0, requires_grad=trainable
-        )
-        # dropout for training
-        self.dropout = nn.Dropout(0.5 if trainable else 0)
-        # reduction
-        self.reduction = reduction
-        if reduction not in ["sum", "none"]:
-            raise Exception(
-                'Reduction "{}" not supported. Valid values are: "sum", "none".'.format(
-                    reduction
-                )
-            )
-    @property
-    def t(self):
-        # returns QM
-        qm = torch.exp(self.t_tild)
-        return qm
-    @property
-    def w(self):
-        # return luminance masking parameter
-        return torch.sigmoid(self.w_tild)
-    @property
-    def w_phase(self):
-        # return weights for phase
-        w_phase = torch.exp(self.w_phase_tild)
-        # set weights of non-phases to 0
-        if not self.trainable:
-            w_phase[0, 0] = 0.0
-            w_phase[0, self.weight_size[1] - 1] = 0.0
-            w_phase[self.weight_size[1] - 1, self.weight_size[1] - 1] = 0.0
-            w_phase[self.weight_size[1] - 1, 0] = 0.0
-        return w_phase
-    def forward(self, input, target):
-        # fft
-        c0 = self.fft(target)
-        c1 = self.fft(input)
-        N, K, H, W, _ = c0.shape
-        # get amplitudes
-        c0_amp = torch.norm(c0 + EPS, p="fro", dim=4)
-        c1_amp = torch.norm(c1 + EPS, p="fro", dim=4)
-        # luminance masking
-        avg_lum = torch.mean(c0_amp[:, :, 0, 0])
-        t_l = self.t.view(1, 1, H, W).expand(N, K, H, W)
-        t_l = t_l * (((c0_amp[:, :, 0, 0] + EPS) / (avg_lum + EPS)) ** self.alpha).view(
-            N, K, 1, 1
-        )
-        # contrast masking
-        s = softmax(t_l, (c0_amp.abs() + EPS) ** self.w * t_l ** (1 - self.w))
-        # pooling
-        watson_dist = (((c0_amp - c1_amp) / s).abs() + EPS) ** self.beta
-        watson_dist = self.dropout(watson_dist) + EPS
-        watson_dist = torch.sum(watson_dist, dim=(1, 2, 3))
-        watson_dist = watson_dist ** (1 / self.beta)
-        # get phases
-        c0_phase = torch.atan2(c0[:, :, :, :, 1], c0[:, :, :, :, 0] + EPS)
-        c1_phase = torch.atan2(c1[:, :, :, :, 1], c1[:, :, :, :, 0] + EPS)
-        # angular distance
-        phase_dist = (
-            torch.acos(torch.cos(c0_phase - c1_phase) * (1 - EPS * 10**3))
-            * self.w_phase
-        )  # we multiply with a factor ->1 to prevent taking the gradient of acos(-1) or acos(1). The gradient in this case would be -/+ inf
-        phase_dist = self.dropout(phase_dist)
-        phase_dist = torch.sum(phase_dist, dim=(1, 2, 3))
-        # perceptual distance
-        distance = watson_dist + phase_dist
-        # reduce
-        if self.reduction == "sum":
-            distance = torch.sum(distance)
-        return distance

kit/metrics/watson/watson_vgg.py DELETED Viewed

@@ -1,202 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torchvision
-EPS = 1e-10
-class VggFeatureExtractor(nn.Module):
-    def __init__(self):
-        super(VggFeatureExtractor, self).__init__()
-        # download vgg
-        vgg16 = torchvision.models.vgg16(pretrained=True).features
-        # set non trainable
-        for param in vgg16.parameters():
-            param.requires_grad = False
-        # slice model
-        self.slice1 = torch.nn.Sequential()
-        self.slice2 = torch.nn.Sequential()
-        self.slice3 = torch.nn.Sequential()
-        self.slice4 = torch.nn.Sequential()
-        self.slice5 = torch.nn.Sequential()
-        for x in range(4):  # conv relu conv relu
-            self.slice1.add_module(str(x), vgg16[x])
-        for x in range(4, 9):  # max conv relu conv relu
-            self.slice2.add_module(str(x), vgg16[x])
-        for x in range(9, 16):  # max cov relu conv relu conv relu
-            self.slice3.add_module(str(x), vgg16[x])
-        for x in range(16, 23):  # conv relu max conv relu conv relu
-            self.slice4.add_module(str(x), vgg16[x])
-        for x in range(23, 30):  # conv relu conv relu max conv relu
-            self.slice5.add_module(str(x), vgg16[x])
-    def forward(self, X):
-        h = self.slice1(X)
-        h_relu1_2 = h
-        h = self.slice2(h)
-        h_relu2_2 = h
-        h = self.slice3(h)
-        h_relu3_3 = h
-        h = self.slice4(h)
-        h_relu4_3 = h
-        h = self.slice5(h)
-        h_relu5_3 = h
-        return [h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3]
-def normalize_tensor(t):
-    # norms a tensor over the channel dimension to an euclidean length of 1.
-    N, C, H, W = t.shape
-    norm_factor = torch.sqrt(torch.sum(t**2, dim=1)).view(N, 1, H, W)
-    return t / (norm_factor.expand_as(t) + EPS)
-def softmax(a, b, factor=1):
-    concat = torch.cat([a.unsqueeze(-1), b.unsqueeze(-1)], dim=-1)
-    softmax_factors = F.softmax(concat * factor, dim=-1)
-    return a * softmax_factors[:, :, :, :, 0] + b * softmax_factors[:, :, :, :, 1]
-class WatsonDistanceVgg(nn.Module):
-    """
-    Loss function based on Watsons perceptual distance.
-    Based on deep feature extraction
-    """
-    def __init__(self, trainable=False, reduction="sum"):
-        """
-        Parameters:
-        trainable: bool, if True parameters of the loss are trained and dropout is enabled.
-        reduction: 'sum' or 'none', determines return format
-        """
-        super().__init__()
-        # module to perform feature extraction
-        self.add_module("vgg", VggFeatureExtractor())
-        # imagenet-normalization
-        self.shift = nn.Parameter(
-            torch.Tensor([-0.030, -0.088, -0.188]).view(1, 3, 1, 1), requires_grad=False
-        )
-        self.scale = nn.Parameter(
-            torch.Tensor([0.458, 0.448, 0.450]).view(1, 3, 1, 1), requires_grad=False
-        )
-        # channel dimensions
-        self.L = 5
-        self.channels = [64, 128, 256, 512, 512]
-        # sensitivity parameters
-        self.t0_tild = nn.Parameter(
-            torch.zeros((self.channels[0])), requires_grad=trainable
-        )
-        self.t1_tild = nn.Parameter(
-            torch.zeros((self.channels[1])), requires_grad=trainable
-        )
-        self.t2_tild = nn.Parameter(
-            torch.zeros((self.channels[2])), requires_grad=trainable
-        )
-        self.t3_tild = nn.Parameter(
-            torch.zeros((self.channels[3])), requires_grad=trainable
-        )
-        self.t4_tild = nn.Parameter(
-            torch.zeros((self.channels[4])), requires_grad=trainable
-        )
-        # other default parameters
-        w = torch.tensor(0.2)  # contrast masking
-        self.w0_tild = nn.Parameter(
-            torch.log(w / (1 - w)), requires_grad=trainable
-        )  # inverse of sigmoid
-        self.w1_tild = nn.Parameter(torch.log(w / (1 - w)), requires_grad=trainable)
-        self.w2_tild = nn.Parameter(torch.log(w / (1 - w)), requires_grad=trainable)
-        self.w3_tild = nn.Parameter(torch.log(w / (1 - w)), requires_grad=trainable)
-        self.w4_tild = nn.Parameter(torch.log(w / (1 - w)), requires_grad=trainable)
-        self.beta = nn.Parameter(torch.tensor(1.0), requires_grad=trainable)  # pooling
-        # dropout for training
-        self.dropout = nn.Dropout(0.5 if trainable else 0)
-        # reduction
-        self.reduction = reduction
-        if reduction not in ["sum", "none"]:
-            raise Exception(
-                'Reduction "{}" not supported. Valid values are: "sum", "none".'.format(
-                    reduction
-                )
-            )
-    @property
-    def t(self):
-        return [
-            torch.exp(t)
-            for t in [
-                self.t0_tild,
-                self.t1_tild,
-                self.t2_tild,
-                self.t3_tild,
-                self.t4_tild,
-            ]
-        ]
-    @property
-    def w(self):
-        # return luminance masking parameter
-        return [
-            torch.sigmoid(w)
-            for w in [
-                self.w0_tild,
-                self.w1_tild,
-                self.w2_tild,
-                self.w3_tild,
-                self.w4_tild,
-            ]
-        ]
-    def forward(self, input, target):
-        # normalization
-        input = (input - self.shift.expand_as(input)) / self.scale.expand_as(input)
-        target = (target - self.shift.expand_as(target)) / self.scale.expand_as(target)
-        # feature extraction
-        c0 = self.vgg(target)
-        c1 = self.vgg(input)
-        # norm over channels
-        for l in range(self.L):
-            c0[l] = normalize_tensor(c0[l])
-            c1[l] = normalize_tensor(c1[l])
-        # contrast masking
-        t = self.t
-        w = self.w
-        s = []
-        for l in range(self.L):
-            N, C_l, H_l, W_l = c0[l].shape
-            t_l = t[l].view(1, C_l, 1, 1).expand(N, C_l, H_l, W_l)
-            s.append(softmax(t_l, (c0[l].abs() + EPS) ** w[l] * t_l ** (1 - w[l])))
-        # pooling
-        watson_dist = 0
-        for l in range(self.L):
-            _, _, H_l, W_l = c0[l].shape
-            layer_dist = (((c0[l] - c1[l]) / s[l]).abs() + EPS) ** self.beta
-            layer_dist = self.dropout(layer_dist) + EPS
-            layer_dist = torch.sum(
-                layer_dist, dim=(1, 2, 3)
-            )  # sum over dimensions of layer
-            layer_dist = (1 / (H_l * W_l)) * layer_dist  # normalize by layer size
-            watson_dist += layer_dist  # sum over layers
-        watson_dist = watson_dist ** (1 / self.beta)
-        # reduction
-        if self.reduction == "sum":
-            watson_dist = torch.sum(watson_dist)
-        return watson_dist

kit/models/stable_signature.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b58841ab09f23e89acf5aedade09c7f65908ae33437c5242ad987d99b5cd2c1
+size 1228161

requirements.txt CHANGED Viewed

@@ -5,6 +5,7 @@ torchvision
 transformers
 open_clip_torch
 numpy
 Pillow
 redis
 plotly

 transformers
 open_clip_torch
 numpy
+scipy
 Pillow
 redis
 plotly