Spaces:

deepinv
/

denoising

Sleeping

App Files Files Community

mterris commited on 25 days ago

Commit

ed95f9b

1 Parent(s): 9037f29

update

Browse files

Files changed (5) hide show

model_factory.py +6 -136
models/blocks.py +0 -924
models/heads.py +0 -270
models/ram.py +854 -0
models/unext_wip.py +0 -1238

model_factory.py CHANGED Viewed

@@ -1,103 +1,7 @@
 import torch
-import torch.nn as nn
-import deepinv as dinv
-from models.unext_wip import UNeXt
-from physics.multiscale import Pad
-class ArtifactRemoval(nn.Module):
-    r"""
-    Artifact removal architecture :math:`\phi(A^{\top}y)`.
-    This differs from the dinv.models.ArtifactRemoval in that it allows to forward the physics.
-    In the end we should not use this for unext !!
-    """
-    def __init__(self, backbone_net, pinv=False, ckpt_path=None, device=None, fm_mode=False):
-        super(ArtifactRemoval, self).__init__()
-        self.pinv = pinv
-        self.backbone_net = backbone_net
-        self.fm_mode = fm_mode
-        if ckpt_path is not None:
-            self.backbone_net.load_state_dict(torch.load(ckpt_path), strict=True)
-            self.backbone_net.eval()
-        if type(self.backbone_net).__name__ == "UNetRes":
-            for _, v in self.backbone_net.named_parameters():
-                v.requires_grad = False
-            self.backbone_net = self.backbone_net.to(device)
-    def forward_basic(self, y=None, physics=None, x_in=None, t=None, **kwargs):
-        r"""
-        Reconstructs a signal estimate from measurements y
-        :param torch.tensor y: measurements
-        :param deepinv.physics.Physics physics: forward operator
-        """
-        if physics is None:
-            physics = dinv.physics.Denoising(noise_model=dinv.physics.GaussianNoise(sigma=0.), device=y.device)
-        if not self.training:
-            x_temp = physics.A_adjoint(y)
-            pad = (-x_temp.size(-2) % 8, -x_temp.size(-1) % 8)
-            physics = Pad(physics, pad)
-        x_in = physics.A_adjoint(y) if not self.pinv else physics.A_dagger(y)
-        if hasattr(physics.noise_model, "sigma"):
-            sigma = physics.noise_model.sigma
-        else:
-            sigma = 1e-5  # WARNING: this is a default value that we may not want to use?
-        if hasattr(physics.noise_model, "gain"):
-            gamma = physics.noise_model.gain
-        else:
-            gamma = 1e-5  # WARNING: this is a default value that we may not want to use?
-        out = self.backbone_net(x_in, physics=physics, y=y, sigma=sigma, gamma=gamma, t=t)
-        if not self.training:
-            out = physics.remove_pad(out)
-        return out
-    def forward(self,  y=None, physics=None, x_in=None, **kwargs):
-        if 'unext' in type(self.backbone_net).__name__.lower():
-            return self.forward_basic(physics=physics, y=y, x_in=x_in, **kwargs)
-        else:
-            return self.backbone_net(physics=physics, y=y, **kwargs)
-def get_model(
-    model_name="unext_emb_physics_config_C",
-    device="cpu",
-    in_channels=[1, 2, 3],
-    grayscale=False,
-    conv_type="base",
-    pool_type="base",
-    layer_scale_init_value=1e-6,
-    init_type="ortho",
-    gain_init_conv=1.0,
-    gain_init_linear=1.0,
-    drop_prob=0.0,
-    replk=False,
-    mult_fact=4,
-    antialias="gaussian",
-    nc_base=64,
-    cond_type="base",
-    blind=False,
-    pretrained_pth=None,
-    weight_tied=True,
-    N=4,
-    c_mult=1,
-    depth_encoding=1,
-    relu_in_encoding=False,
-    skip_in_encoding=True,
-):
     """
     Load the model.
@@ -107,41 +11,7 @@ def get_model(
     :param bool train: if True, the model is trained
     :return: model
     """
-    model_name = model_name.lower()
-    if model_name == "unext_emb_physics_config_c":
-        n_chan = [1, 2, 3] # 6 for old head grayscale, complex and color = 1 + 2 + 3
-        residual = True if "residual" in model_name else False
-        nc = [nc_base * 2**i for i in range(4)]
-        model = UNeXt(
-            in_channels=in_channels,
-            out_channels=in_channels,
-            device=device,
-            residual=residual,
-            conv_type=conv_type,
-            pool_type=pool_type,
-            layer_scale_init_value=layer_scale_init_value,
-            init_type=init_type,
-            gain_init_conv=gain_init_conv,
-            gain_init_linear=gain_init_linear,
-            drop_prob=drop_prob,
-            replk=replk,
-            mult_fact=mult_fact,
-            antialias=antialias,
-            nc=nc,
-            cond_type=cond_type,
-            emb_physics=True,
-            config="C",
-            pretrained_pth=pretrained_pth,
-            N=N,
-            c_mult=c_mult,
-            depth_encoding=depth_encoding,
-            relu_in_encoding=relu_in_encoding,
-            skip_in_encoding=skip_in_encoding,
-        ).to(device)
-        return ArtifactRemoval(model, pinv=False, device=device)
-    else:
-        raise ValueError(f"Model {model_name} is not supported.")

 import torch
+from models.ram import RAM
+def get_model():
     """
     Load the model.
     :param bool train: if True, the model is trained
     :return: model
     """
+    model = RAM()
+    state_dict = torch.load('ckpt/ram.pth.tar')
+    model.load_state_dict(state_dict)
+    return model

models/blocks.py DELETED Viewed

@@ -1,924 +0,0 @@
-import math
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from deepinv.models.unet import BFBatchNorm2d
-from deepinv.physics.blur import gaussian_blur
-from deepinv.physics.functional import conv2d
-from deepinv.utils import TensorList
-from timm.models.layers import trunc_normal_, DropPath
-def normalize(x, dim=None, eps=1e-4):
-    if dim is None:
-        dim = list(range(1, x.ndim))
-    norm = torch.linalg.vector_norm(x, dim=dim, keepdim=True, dtype=torch.float32)
-    norm = torch.add(eps, norm, alpha=np.sqrt(norm.numel() / x.numel()))
-    return x / norm.to(x.dtype)
-class TimestepEmbedding(nn.Module):
-    def __init__(self, hidden_size, frequency_embedding_size=256):
-        super().__init__()
-        self.mlp = nn.Sequential(
-            nn.Linear(frequency_embedding_size, hidden_size),
-            nn.SiLU(),
-            nn.Linear(hidden_size, hidden_size),
-        )
-        self.frequency_embedding_size = frequency_embedding_size
-    @staticmethod
-    def timestep_embedding(t, dim, max_period=10000):
-        half = dim // 2
-        freqs = torch.exp(
-            -math.log(max_period) * torch.arange(start=0, end=half) / half
-        ).to(t.device)
-        args = t[:, None] * freqs[None]
-        embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
-        if dim % 2:
-            embedding = torch.cat(
-                [embedding, torch.zeros_like(embedding[:, :1])], dim=-1
-            )
-        return embedding
-    def forward(self, t):
-        t_freq = self.timestep_embedding(t, self.frequency_embedding_size).to(
-            dtype=next(self.parameters()).dtype
-        )
-        t_emb = self.mlp(t_freq)
-        return t_emb
-class MPConv(torch.nn.Module):
-    def __init__(self, in_channels, out_channels, kernel):
-        super().__init__()
-        self.out_channels = out_channels
-        self.weight = torch.nn.Parameter(torch.randn(out_channels, in_channels, *kernel))
-    def forward(self, x, gain=1):
-        w = self.weight.to(torch.float32)
-        if self.training:
-            with torch.no_grad():
-                self.weight.copy_(normalize(w))  # forced weight normalization
-        w = normalize(w)  # traditional weight normalization
-        w = w * (gain / np.sqrt(w[0].numel()))  # magnitude-preserving scaling
-        w = w.to(x.dtype)
-        if w.ndim == 2:
-            return x @ w.t()
-        assert w.ndim == 4
-        return F.conv2d(x, w, padding=(w.shape[-1] // 2,))
-# --------------------------------------------------------------------------------------
-def mp_silu(x):
-    return torch.nn.functional.silu(x) / 0.596
-class MPFourier(torch.nn.Module):
-    def __init__(self, num_channels, bandwidth=1, device="cpu"):
-        super().__init__()
-        self.register_buffer(
-            "freqs", 2 * np.pi * torch.rand(num_channels, device=device) * bandwidth
-        )
-        self.register_buffer(
-            "phases", 2 * np.pi * torch.rand(num_channels, device=device)
-        )
-    def forward(self, x):
-        y = x.to(torch.float32)
-        y = y.ger(self.freqs.to(torch.float32))
-        y = y + self.phases.to(torch.float32)
-        y = y.cos() * np.sqrt(2)
-        return y.to(x.dtype)
-class NoiseEmbedding(torch.nn.Module):
-    def __init__(self, num_channels=1, emb_channels=512, device="cpu", biasfree=True):
-        super().__init__()
-        self.emb_fourier = MPFourier(num_channels, device=device)
-        self.emb_noise = MPConv(num_channels, emb_channels, kernel=[])
-        self.biasfree = biasfree
-    def forward(self, y, physics, factor):
-        if hasattr(physics, "noise_model") and not callable(physics.noise_model):
-            sigma = getattr(physics.noise_model, "sigma", 0.0)
-        else:
-            sigma = 0.0
-        if isinstance(y, TensorList):
-            sigma = sigma / (y[0].abs().reshape(y[0].size(0),-1).mean(1) + 1e-8) / factor
-        else:
-            sigma = sigma / (y.abs().reshape(y.size(0),-1).mean(1) + 1e-8) / factor
-        emb_four = self.emb_fourier(sigma)
-        emb = self.emb_noise(emb_four)
-        if self.biasfree:
-            emb = F.relu(emb)
-        else:
-            emb = mp_silu(emb)
-        return emb.unsqueeze(-1).unsqueeze(-1)
-# --------------------------------------------------------------------------------------
-class AffineConv2d(nn.Conv2d):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        mode="affine",
-        bias=False,
-        stride=1,
-        padding=0,
-        dilation=1,
-        groups=1,
-        padding_mode="circular",
-        blind=True,
-    ):
-        if mode == "affine":  # f(a*x + 1) = a*f(x) + 1
-            bias = False
-        super().__init__(
-            in_channels,
-            out_channels,
-            kernel_size,
-            bias=bias,
-            stride=stride,
-            padding=padding,
-            dilation=dilation,
-            groups=groups,
-            padding_mode=padding_mode,
-        )
-        self.blind = blind
-        self.mode = mode
-    def affine(self, w):
-        """returns new kernels that encode affine combinations"""
-        return (
-            w.view(self.out_channels, -1).roll(1, 1).view(w.size())
-            - w
-            + 1 / w[0, ...].numel()
-        )
-    def forward(self, x):
-        if self.mode != "affine":
-            return super().forward(x)
-        else:
-            kernel = (
-                self.affine(self.weight)
-                if self.blind
-                else torch.cat(
-                    (self.affine(self.weight[:, :-1, :, :]), self.weight[:, -1:, :, :]),
-                    dim=1,
-                )
-            )
-            padding = tuple(
-                elt for elt in reversed(self.padding) for _ in range(2)
-            )  # used to translate padding arg used by Conv module to the ones used by F.pad
-            padding_mode = (
-                self.padding_mode if self.padding_mode != "zeros" else "constant"
-            )  # used to translate padding_mode arg used by Conv module to the ones used by F.pad
-            return F.conv2d(
-                F.pad(x, padding, mode=padding_mode),
-                kernel,
-                stride=self.stride,
-                dilation=self.dilation,
-                groups=self.groups,
-            )
-# --------------------------------------------------------------------------------------
-def kaiser_window(beta, length):
-    """Return the Kaiser window of length `length` and shape parameter `beta`."""
-    if beta < 0:
-        raise ValueError("beta must be greater than 0")
-    if length < 1:
-        raise ValueError("length must be greater than 0")
-    if length == 1:
-        return torch.tensor([1.0])
-    half = (length - 1) / 2
-    n = torch.arange(length)
-    beta = torch.tensor(beta)
-    return torch.i0(beta * torch.sqrt(1 - ((n - half) / half) ** 2)) / torch.i0(beta)
-def sinc_filter(factor=2, length=11, windowed=True):
-    r"""
-    Anti-aliasing sinc filter multiplied by a Kaiser window.
-    :param float factor: Downsampling factor.
-    :param int length: Length of the filter.
-    """
-    deltaf = 1 / factor
-    n = torch.arange(length) - (length - 1) / 2
-    filter = torch.sinc(n / factor)
-    if windowed:
-        A = 2.285 * (length - 1) * 3.14 * deltaf + 7.95
-        if A <= 21:
-            beta = 0
-        elif A <= 50:
-            beta = 0.5842 * (A - 21) ** 0.4 + 0.07886 * (A - 21)
-        else:
-            beta = 0.1102 * (A - 8.7)
-        filter = filter * kaiser_window(beta, length)
-    filter = filter.unsqueeze(0)
-    filter = filter * filter.T
-    filter = filter.unsqueeze(0).unsqueeze(0)
-    filter = filter / filter.sum()
-    return filter
-class EquivMaxPool(nn.Module):
-    r"""
-    Max pooling layer that is equivariant to translations.
-    :param int kernel_size: size of the pooling window.
-    :param int stride: stride of the pooling operation.
-    :param int padding: padding to apply before pooling.
-    :param bool circular_padding: circular padding for the convolutional layers.
-    """
-    def __init__(
-        self,
-        antialias="gaussian",
-        factor=2,
-        device="cuda",
-        in_channels=64,
-        out_channels=64,
-        bias=False,
-        padding_mode="circular",
-    ):
-        super(EquivMaxPool, self).__init__()
-        self.antialias = antialias
-        if antialias == "gaussian":
-            self.antialias_kernel = gaussian_blur(factor / 3.14).to(device)
-        elif antialias == "sinc":
-            self.antialias_kernel = sinc_filter(
-                factor=factor, length=11, windowed=True
-            ).to(device)
-        self.conv_down = AffineConv2d(
-            in_channels,
-            out_channels,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            bias=bias,
-            padding_mode=padding_mode,
-            groups=1,
-        )
-        self.conv_up = AffineConv2d(
-            out_channels,
-            in_channels,
-            kernel_size=3,
-            stride=1,
-            padding=1,
-            bias=bias,
-            padding_mode=padding_mode,
-            groups=1,
-        )
-    def forward(self, x):
-        return self.downscale(x)
-    def downscale(self, x):
-        r"""
-        Apply the equivariant pooling.
-        :param torch.Tensor x: input tensor.
-        """
-        B, C, H, W = x.shape
-        x = self.conv_down(x)
-        if self.antialias == "gaussian" or self.antialias == "sinc":
-            x = conv2d(x, self.antialias_kernel, padding="circular")
-        x1 = x[:, :, ::2, ::2].unsqueeze(0)
-        x2 = x[:, :, ::2, 1::2].unsqueeze(0)
-        x3 = x[:, :, 1::2, ::2].unsqueeze(0)
-        x4 = x[:, :, 1::2, 1::2].unsqueeze(0)
-        out = torch.cat([x1, x2, x3, x4], dim=0)  # (4, B, C, H/2, W/2)
-        ind = torch.norm(out, dim=(2, 3, 4), p=2)  # (4, B)
-        ind = torch.argmax(ind, dim=0)  # (B)
-        out = out[ind, torch.arange(B), ...]  # (B, C, H/2, W/2)
-        self.ind = ind
-        return out
-    def upscale(self, x):
-        B, C, H, W = x.shape
-        out = torch.zeros((B, C, H * 2, W * 2), device=x.device)
-        out[:, :, ::2, ::2] = x
-        ind = self.ind
-        filter = torch.zeros((B, 1, 2, 2), device=x.device)
-        filter[ind == 0, :, 0, 0] = 1
-        filter[ind == 1, :, 0, 1] = 1
-        filter[ind == 2, :, 1, 0] = 1
-        filter[ind == 3, :, 1, 1] = 1
-        out = conv2d(out, filter, padding="constant")
-        if self.antialias == "gaussian" or self.antialias == "sinc":
-            out = conv2d(out, self.antialias_kernel, padding="circular")
-        out = self.conv_up(out)
-        return out
-# --------------------------------------------------------------------------------------
-class ConvNextBaseBlock(nn.Module):
-    r"""
-    ConvNeXt Block mimicking DRUNet base layer (Conv + Relu + Conv)
-    Args:
-        in_channels (int): Number of input channels.
-        out_channels (int): Number of output channels.
-        mode (str): Mode for the AffineConv2d (if needed, else ignored).
-        bias (bool): Whether to use bias in convolutions. Default: False.
-        ksize (int): Kernel size for the convolutions. Default: 7.
-        padding_mode (str): Padding mode for convolutions. Default: 'circular'.
-        mult_fact (int): Multiplier factor for expanding the number of channels.
-        residual (bool): Whether to use a residual connection. Default: False.
-    """
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        mode="",
-        bias=False,
-        ksize=7,
-        padding_mode="circular",
-        mult_fact=1,
-        residual=False,
-    ):
-        super().__init__()
-        ### DEPTHWISE SEPARABLE CONVOLUTION: (N,C,H,W) -> (N,4*C,H,W)
-        # depthwise conv with big kernel
-        self.dwconv_a = AffineConv2d(
-            in_channels,
-            in_channels,
-            kernel_size=ksize,
-            padding=ksize // 2,
-            groups=in_channels,
-            padding_mode=padding_mode,
-            bias=bias,
-            mode=mode,
-        )
-        # depthwise conv with small kernel
-        self.dwconv_a_small = AffineConv2d(
-            in_channels,
-            in_channels,
-            kernel_size=3,
-            padding=3 // 2,
-            groups=in_channels,
-            padding_mode=padding_mode,
-            bias=bias,
-            mode=mode,
-        )
-        # pointwise conv to change number of channels
-        self.pwconv_a1 = AffineConv2d(
-            in_channels,
-            mult_fact * in_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            mode=mode,
-            bias=bias,
-            padding_mode=padding_mode,
-            groups=1,
-        )
-        ### ACTIVATION
-        self.act_a = nn.ReLU()
-        ### POINTWISE CONVOLUTION: (N,4*C,H,W) -> (N,O,H,W)
-        self.pwconv_a2 = AffineConv2d(
-            mult_fact * in_channels,
-            out_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=bias,
-            padding_mode=padding_mode,
-            groups=1,
-        )
-        ### Needed to match the number of channels : (N,C,H,W) -> (C,O,H,W)
-        self.residual = residual
-        if self.residual:
-            self.residual_conv = AffineConv2d(
-                in_channels,
-                out_channels,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                groups=1,
-                padding_mode=padding_mode,
-                bias=bias,
-                mode=mode,
-            )
-    def forward(self, x_in, stream1=None, stream2=None):
-        """Forward with GPU parallelization using multiple cuda streams."""
-        if stream1 is not None and stream2 is not None:
-            # Use the streams
-            with torch.cuda.stream(stream1):
-                output_a = self.dwconv_a(x_in)  # Run the first convolution in stream1
-            with torch.cuda.stream(stream2):
-                output_a_small = self.dwconv_a_small(
-                    x_in
-                )  # Run the second convolution in stream2
-            # Ensure the streams are synchronized before adding the results
-            torch.cuda.synchronize()
-            x = self.pwconv_a(output_a + output_a_small)
-        else:
-            x = self.dwconv_a(x_in) + self.dwconv_a_small(x_in)  # replk 7x7 with 3x3
-            x = self.pwconv_a1(x)
-        x = self.act_a(x)
-        x = self.pwconv_a2(x)  # (N,O,H,W)
-        if self.residual:
-            x = self.residual_conv(x_in) + x
-        return x
-class ConvNextBlock2(nn.Module):
-    r"""
-    ConvNeXt Block mimicking DRUNet base layer (Conv + Relu + Conv)
-    Args:
-        ???
-    """
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        mode="affine",
-        bias=False,
-        ksize=7,
-        padding_mode="circular",
-        mult_fact=4,
-        s1=None,
-        s2=None,
-    ):
-        super().__init__()
-        self.block_0 = ConvNextBaseBlock(
-            in_channels,
-            out_channels,
-            mode=mode,
-            bias=bias,
-            ksize=ksize,
-            padding_mode=padding_mode,
-            mult_fact=mult_fact,
-        )
-        self.block_1 = ConvNextBaseBlock(
-            in_channels,
-            out_channels,
-            mode=mode,
-            bias=bias,
-            ksize=ksize,
-            padding_mode=padding_mode,
-            mult_fact=mult_fact,
-        )
-        # self.relu = nn.ReLU(inplace=True) # issue with the network when working in FP16 ???
-        self.relu = nn.ReLU()
-        # cuda stream to parallelize execution of ConvNextBaseBlock
-        self.s1 = s1
-        self.s2 = s2
-    def forward(self, input, emb_sigma=None):
-        if self.s1 is not None and self.s2 is not None:
-            x = self.block_0(input, self.s1, self.s2)
-        else:
-            x = self.block_0(input)
-        x = self.relu(x)
-        if self.s1 is not None and self.s2 is not None:
-            x = self.block_1(x, self.s1, self.s2)
-        else:
-            x = self.block_1(x)
-        return x + input
-class CondResBlock(nn.Module):
-    def __init__(
-        self,
-        in_channels=64,
-        out_channels=64,
-        kernel_size=3,
-        stride=1,
-        padding=1,
-        bias=False,
-        emb_channels=512,
-    ):
-        super(CondResBlock, self).__init__()
-        assert in_channels == out_channels, "Only support in_channels==out_channels."
-        self.gain = torch.nn.Parameter(torch.tensor([1.0]), requires_grad=True)
-        self.emb_linear = MPConv(emb_channels, out_channels, kernel=[3, 3])
-        self.conv1 = nn.Conv2d(
-            in_channels, out_channels, kernel_size, stride, padding, bias=bias
-        )
-        self.conv2 = nn.Conv2d(
-            out_channels, out_channels, kernel_size, stride, padding, bias=bias
-        )
-    def forward(self, x, emb_sigma):
-        # u = self.conv1(mp_silu(x))
-        u = self.conv1(F.relu((x)))
-        c = self.emb_linear(emb_sigma, gain=self.gain) + 1
-        # y = mp_silu(u * c.unsqueeze(2).unsqueeze(3).to(u.dtype))
-        y = F.relu(u * c.unsqueeze(2).unsqueeze(3).to(u.dtype))
-        y = self.conv2(y)
-        return x + y
-"""
-Functional blocks below
-"""
-from collections import OrderedDict
-import torch
-import torch.nn as nn
-"""
-# --------------------------------------------
-# Advanced nn.Sequential
-# https://github.com/xinntao/BasicSR
-# --------------------------------------------
-"""
-def sequential(*args):
-    """Advanced nn.Sequential.
-    Args:
-        nn.Sequential, nn.Module
-    Returns:
-        nn.Sequential
-    """
-    if len(args) == 1:
-        if isinstance(args[0], OrderedDict):
-            raise NotImplementedError("sequential does not support OrderedDict input.")
-        return args[0]  # No sequential is needed.
-    modules = []
-    for module in args:
-        if isinstance(module, nn.Sequential):
-            for submodule in module.children():
-                modules.append(submodule)
-        elif isinstance(module, nn.Module):
-            modules.append(module)
-    return nn.Sequential(*modules)
-"""
-# --------------------------------------------
-# Useful blocks
-# https://github.com/xinntao/BasicSR
-# --------------------------------
-# conv + normaliation + relu (conv)
-# (PixelUnShuffle)
-# (ConditionalBatchNorm2d)
-# concat (ConcatBlock)
-# sum (ShortcutBlock)
-# resblock (ResBlock)
-# Channel Attention (CA) Layer (CALayer)
-# Residual Channel Attention Block (RCABlock)
-# Residual Channel Attention Group (RCAGroup)
-# Residual Dense Block (ResidualDenseBlock_5C)
-# Residual in Residual Dense Block (RRDB)
-# --------------------------------------------
-"""
-# --------------------------------------------
-# return nn.Sequantial of (Conv + BN + ReLU)
-# --------------------------------------------
-def conv(
-    in_channels=64,
-    out_channels=64,
-    kernel_size=3,
-    stride=1,
-    padding=1,
-    bias=True,
-    mode="CBR",
-    negative_slope=0.2,
-):
-    L = []
-    for t in mode:
-        if t == "C":
-            L.append(
-                nn.Conv2d(
-                    in_channels=in_channels,
-                    out_channels=out_channels,
-                    kernel_size=kernel_size,
-                    stride=stride,
-                    padding=padding,
-                    bias=bias,
-                )
-            )
-        elif t == "T":
-            L.append(
-                nn.ConvTranspose2d(
-                    in_channels=in_channels,
-                    out_channels=out_channels,
-                    kernel_size=kernel_size,
-                    stride=stride,
-                    padding=padding,
-                    bias=bias,
-                )
-            )
-        elif t == "B":
-            L.append(nn.BatchNorm2d(out_channels, momentum=0.9, eps=1e-04, affine=True))
-        elif t == "I":
-            L.append(nn.InstanceNorm2d(out_channels, affine=True))
-        elif t == "R":
-            L.append(nn.ReLU(inplace=True))
-        elif t == "r":
-            L.append(nn.ReLU(inplace=False))
-        elif t == "L":
-            L.append(nn.LeakyReLU(negative_slope=negative_slope, inplace=True))
-        elif t == "l":
-            L.append(nn.LeakyReLU(negative_slope=negative_slope, inplace=False))
-        elif t == "E":
-            L.append(nn.ELU(inplace=False))
-        elif t == "s":
-            L.append(nn.Softplus())
-        elif t == "2":
-            L.append(nn.PixelShuffle(upscale_factor=2))
-        elif t == "3":
-            L.append(nn.PixelShuffle(upscale_factor=3))
-        elif t == "4":
-            L.append(nn.PixelShuffle(upscale_factor=4))
-        elif t == "U":
-            L.append(nn.Upsample(scale_factor=2, mode="nearest"))
-        elif t == "u":
-            L.append(nn.Upsample(scale_factor=3, mode="nearest"))
-        elif t == "v":
-            L.append(nn.Upsample(scale_factor=4, mode="nearest"))
-        elif t == "M":
-            L.append(nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=0))
-        elif t == "A":
-            L.append(nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=0))
-        else:
-            raise NotImplementedError("Undefined type: ".format(t))
-    return sequential(*L)
-"""
-# --------------------------------------------
-# Upsampler
-# Kai Zhang, https://github.com/cszn/KAIR
-# --------------------------------------------
-# upsample_pixelshuffle
-# upsample_upconv
-# upsample_convtranspose
-# --------------------------------------------
-"""
-# --------------------------------------------
-# conv + subp (+ relu)
-# --------------------------------------------
-def upsample_pixelshuffle(
-    in_channels=64,
-    out_channels=3,
-    kernel_size=3,
-    stride=1,
-    padding=1,
-    bias=True,
-    mode="2R",
-    negative_slope=0.2,
-):
-    assert len(mode) < 4 and mode[0] in [
-        "2",
-        "3",
-        "4",
-    ], "mode examples: 2, 2R, 2BR, 3, ..., 4BR."
-    up1 = conv(
-        in_channels,
-        out_channels * (int(mode[0]) ** 2),
-        kernel_size,
-        stride,
-        padding,
-        bias,
-        mode="C" + mode,
-        negative_slope=negative_slope,
-    )
-    return up1
-# --------------------------------------------
-# nearest_upsample + conv (+ R)
-# --------------------------------------------
-def upsample_upconv(
-    in_channels=64,
-    out_channels=3,
-    kernel_size=3,
-    stride=1,
-    padding=1,
-    bias=True,
-    mode="2R",
-    negative_slope=0.2,
-):
-    assert len(mode) < 4 and mode[0] in [
-        "2",
-        "3",
-        "4",
-    ], "mode examples: 2, 2R, 2BR, 3, ..., 4BR"
-    if mode[0] == "2":
-        uc = "UC"
-    elif mode[0] == "3":
-        uc = "uC"
-    elif mode[0] == "4":
-        uc = "vC"
-    mode = mode.replace(mode[0], uc)
-    up1 = conv(
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        padding,
-        bias,
-        mode=mode,
-        negative_slope=negative_slope,
-    )
-    return up1
-# --------------------------------------------
-# convTranspose (+ relu)
-# --------------------------------------------
-def upsample_convtranspose(
-    in_channels=64,
-    out_channels=3,
-    kernel_size=2,
-    stride=2,
-    padding=0,
-    bias=True,
-    mode="2R",
-    negative_slope=0.2,
-):
-    assert len(mode) < 4 and mode[0] in [
-        "2",
-        "3",
-        "4",
-        "8",
-    ], "mode examples: 2, 2R, 2BR, 3, ..., 4BR."
-    kernel_size = int(mode[0])
-    stride = int(mode[0])
-    mode = mode.replace(mode[0], "T")
-    up1 = conv(
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        padding,
-        bias,
-        mode,
-        negative_slope,
-    )
-    return up1
-"""
-# --------------------------------------------
-# Downsampler
-# Kai Zhang, https://github.com/cszn/KAIR
-# --------------------------------------------
-# downsample_strideconv
-# downsample_maxpool
-# downsample_avgpool
-# --------------------------------------------
-"""
-# --------------------------------------------
-# strideconv (+ relu)
-# --------------------------------------------
-def downsample_strideconv(
-    in_channels=64,
-    out_channels=64,
-    kernel_size=2,
-    stride=2,
-    padding=0,
-    bias=True,
-    mode="2R",
-    negative_slope=0.2,
-):
-    assert len(mode) < 4 and mode[0] in [
-        "2",
-        "3",
-        "4",
-        "8",
-    ], "mode examples: 2, 2R, 2BR, 3, ..., 4BR."
-    kernel_size = int(mode[0])
-    stride = int(mode[0])
-    mode = mode.replace(mode[0], "C")
-    down1 = conv(
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        padding,
-        bias,
-        mode,
-        negative_slope,
-    )
-    return down1
-# --------------------------------------------
-# maxpooling + conv (+ relu)
-# --------------------------------------------
-def downsample_maxpool(
-    in_channels=64,
-    out_channels=64,
-    kernel_size=3,
-    stride=1,
-    padding=0,
-    bias=True,
-    mode="2R",
-    negative_slope=0.2,
-):
-    assert len(mode) < 4 and mode[0] in [
-        "2",
-        "3",
-    ], "mode examples: 2, 2R, 2BR, 3, ..., 3BR."
-    kernel_size_pool = int(mode[0])
-    stride_pool = int(mode[0])
-    mode = mode.replace(mode[0], "MC")
-    pool = conv(
-        kernel_size=kernel_size_pool,
-        stride=stride_pool,
-        mode=mode[0],
-        negative_slope=negative_slope,
-    )
-    pool_tail = conv(
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        padding,
-        bias,
-        mode=mode[1:],
-        negative_slope=negative_slope,
-    )
-    return sequential(pool, pool_tail)
-# --------------------------------------------
-# averagepooling + conv (+ relu)
-# --------------------------------------------
-def downsample_avgpool(
-    in_channels=64,
-    out_channels=64,
-    kernel_size=3,
-    stride=1,
-    padding=1,
-    bias=True,
-    mode="2R",
-    negative_slope=0.2,
-):
-    assert len(mode) < 4 and mode[0] in [
-        "2",
-        "3",
-    ], "mode examples: 2, 2R, 2BR, 3, ..., 3BR."
-    kernel_size_pool = int(mode[0])
-    stride_pool = int(mode[0])
-    mode = mode.replace(mode[0], "AC")
-    pool = conv(
-        kernel_size=kernel_size_pool,
-        stride=stride_pool,
-        mode=mode[0],
-        negative_slope=negative_slope,
-    )
-    pool_tail = conv(
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        padding,
-        bias,
-        mode=mode[1:],
-        negative_slope=negative_slope,
-    )
-    return sequential(pool, pool_tail)

models/heads.py DELETED Viewed

@@ -1,270 +0,0 @@
-import torch
-from models.blocks import AffineConv2d, downsample_strideconv, upsample_convtranspose
-class InHead(torch.nn.Module):
-    def __init__(self, in_channels_list, out_channels, mode="", bias=False, input_layer=False):
-        super(InHead, self).__init__()
-        self.in_channels_list = in_channels_list
-        self.input_layer = input_layer
-        for i, in_channels in enumerate(in_channels_list):
-            conv = AffineConv2d(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                bias=bias,
-                mode=mode,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                padding_mode="zeros",
-            )
-            setattr(self, f"conv{i}", conv)
-    def forward(self, x):
-        in_channels = x.size(1) - 1 if self.input_layer else x.size(1)
-        # find index
-        i = self.in_channels_list.index(in_channels)
-        x = getattr(self, f"conv{i}")(x)
-        return x
-class OutTail(torch.nn.Module):
-    def __init__(self, in_channels, out_channels_list, mode="", bias=False):
-        super(OutTail, self).__init__()
-        self.in_channels = in_channels
-        self.out_channels_list = out_channels_list
-        for i, out_channels in enumerate(out_channels_list):
-            conv = AffineConv2d(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                bias=bias,
-                mode=mode,
-                kernel_size=3,
-                stride=1,
-                padding=1,
-                padding_mode="zeros",
-            )
-            setattr(self, f"conv{i}", conv)
-    def forward(self, x, out_channels):
-        i = self.out_channels_list.index(out_channels)
-        x = getattr(self, f"conv{i}")(x)
-        return x
-# TODO: check that the heads are compatible with the old implementation
-class Heads(torch.nn.Module):
-    def __init__(self, in_channels_list, out_channels, depth=2, scale=1, bias=True, mode="bilinear", c_mult=1, c_add=0, relu_in=False, skip_in=False):
-        super(Heads, self).__init__()
-        self.in_channels_list = [c * (c_mult + c_add) for c in in_channels_list]
-        self.scale = scale
-        self.mode = mode
-        for i, in_channels in enumerate(self.in_channels_list):
-            setattr(self, f"head{i}", HeadBlock(in_channels, out_channels, depth=depth, bias=bias, relu_in=relu_in, skip_in=skip_in))
-        if self.mode == "":
-            self.nl = torch.nn.ReLU(inplace=False)
-            if self.scale != 1:
-                for i, in_channels in enumerate(in_channels_list):
-                    setattr(self, f"down{i}", downsample_strideconv(in_channels, in_channels, bias=False, mode=str(self.scale)))
-    def forward(self, x):
-        in_channels = x.size(1)
-        i = self.in_channels_list.index(in_channels)
-        if self.scale != 1:
-            if self.mode == "bilinear":
-                x = torch.nn.functional.interpolate(x, scale_factor=1/self.scale, mode='bilinear', align_corners=False)
-            else:
-                x = getattr(self, f"down{i}")(x)
-                x = self.nl(x)
-        # find index
-        x = getattr(self, f"head{i}")(x)
-        return x
-class Tails(torch.nn.Module):
-    def __init__(self, in_channels, out_channels_list, depth=2, scale=1, bias=True, mode="bilinear", c_mult=1, relu_in=False, skip_in=False):
-        super(Tails, self).__init__()
-        self.out_channels_list = out_channels_list
-        self.scale = scale
-        for i, out_channels in enumerate(out_channels_list):
-            setattr(self, f"tail{i}", HeadBlock(in_channels, out_channels * c_mult, depth=depth, bias=bias, relu_in=relu_in, skip_in=skip_in))
-        self.mode = mode
-        if self.mode == "":
-            self.nl = torch.nn.ReLU(inplace=False)
-            if self.scale != 1:
-                # self.up = upsample_convtranspose(out_channels, out_channels, bias=True, mode=str(self.scale))
-                for i, out_channels in enumerate(out_channels_list):
-                    setattr(self, f"up{i}", upsample_convtranspose(out_channels * c_mult, out_channels * c_mult, bias=bias, mode=str(self.scale)))
-    def forward(self, x, out_channels):
-        i = self.out_channels_list.index(out_channels)
-        x = getattr(self, f"tail{i}")(x)
-        # find index
-        if self.scale != 1:
-            if self.mode == "bilinear":
-                x = torch.nn.functional.interpolate(x, scale_factor=self.scale, mode='bilinear', align_corners=False)
-            else:
-                x = getattr(self, f"up{i}")(x)
-        return x
-class ConvChannels(torch.nn.Module):
-    """
-    TODO: remplace this with convconv
-    A method that only performs convolutional operations on the appropriate channels dim.
-    """
-    def __init__(self, channels_list, depth=2, bias=False, residual=False):
-        super(ConvChannels, self).__init__()
-        self.channels_list = channels_list
-        self.residual = residual
-        for i, channels in enumerate(channels_list):
-            setattr(self, f"conv{i}_1", torch.nn.Conv2d(channels, channels, 3, bias=bias, padding=1))
-            setattr(self, f"nl{i}", torch.nn.ReLU())
-            setattr(self, f"conv{i}_2", torch.nn.Conv2d(channels, channels, 3, bias=bias, padding=1))
-    def forward(self, x):
-        i = self.channels_list.index(x.shape[1])
-        u = getattr(self, f"conv{i}_1")(x)
-        u = getattr(self, f"nl{i}")(u)
-        u = getattr(self, f"conv{i}_2")(u)
-        if self.residual:
-            u = x + u
-        return u
-class HeadBlock(torch.nn.Module):
-    def __init__(self, in_channels, out_channels, kernel_size=3, bias=True, depth=2, relu_in=False, skip_in=False):
-        super(HeadBlock, self).__init__()
-        padding = kernel_size // 2
-        c = out_channels if depth < 2 else in_channels
-        self.convin = torch.nn.Conv2d(in_channels, c, kernel_size, padding=padding, bias=bias)
-        self.zero_conv_skip = torch.nn.Conv2d(in_channels, c, 1, bias=False)
-        self.depth = depth
-        self.nl_1 = torch.nn.ReLU(inplace=False)
-        self.nl_2 = torch.nn.ReLU(inplace=False)
-        self.relu_in = relu_in
-        self.skip_in = skip_in
-        for i in range(depth-1):
-            if i < depth - 2:
-                c_in, c = in_channels, in_channels
-            else:
-                c_in, c = in_channels, out_channels
-            setattr(self, f"conv1{i}", torch.nn.Conv2d(c_in, c_in, kernel_size, padding=padding, bias=bias))
-            setattr(self, f"conv2{i}", torch.nn.Conv2d(c_in, c, kernel_size, padding=padding, bias=bias))
-            setattr(self, f"skipconv{i}", torch.nn.Conv2d(c_in, c, 1, bias=False))
-    def forward(self, x):
-        if self.skip_in and self.relu_in:
-            x = self.nl_1(self.convin(x)) + self.zero_conv_skip(x)
-        elif self.skip_in and not self.relu_in:
-            x = self.convin(x) + self.zero_conv_skip(x)
-        else:
-            x = self.convin(x)
-        for i in range(self.depth-1):
-            aux = getattr(self, f"conv1{i}")(x)
-            aux = self.nl_2(aux)
-            aux_0 = getattr(self, f"conv2{i}")(aux)
-            aux_1 = getattr(self, f"skipconv{i}")(x)
-            x = aux_0 + aux_1
-        return x
-class SNRModule(torch.nn.Module):
-    """
-    A method that only performs convolutional operations on the appropriate channels dim.
-    """
-    def __init__(self, channels_list, out_channels, bias=False, residual=False, features=64):
-        super(SNRModule, self).__init__()
-        self.channels_list = channels_list
-        self.residual = residual
-        for i, channels in enumerate(channels_list):
-            setattr(self, f"conv{i}_1", torch.nn.Conv2d(channels + 1, features, 3, bias=bias, padding=1))
-            setattr(self, f"nl{i}", torch.nn.ReLU())
-            setattr(self, f"conv{i}_2", torch.nn.Conv2d(features, out_channels, 3, bias=bias, padding=1))
-    def forward(self, x0, sigma):
-        i = self.channels_list.index(x0.shape[1])
-        noise_level_map = (torch.ones((x0.size(0), 1, x0.size(2), x0.size(3)), device=x0.device) * sigma)
-        x = torch.cat((x0, noise_level_map), 1)
-        u = getattr(self, f"conv{i}_1")(x)
-        u = getattr(self, f"nl{i}")(u)
-        u = getattr(self, f"conv{i}_2")(u)
-        den = u.pow(2).mean(dim=-1, keepdim=True).mean(dim=-2, keepdim=True).sqrt()
-        u = u.abs() / (den + 1e-8)
-        return u.mean(dim=-1, keepdim=True).mean(dim=-2, keepdim=True)
-class EquivConvModule(torch.nn.Module):
-    """
-    A method that only performs convolutional operations on the appropriate channels dim.
-    """
-    def __init__(self, channels_list, out_channels, bias=False, residual=False, features=64, N=1):
-        super(EquivConvModule, self).__init__()
-        self.channels_list = [c * N for c in channels_list]
-        self.residual = residual
-        for i, channels in enumerate(channels_list):
-            setattr(self, f"conv{i}_1", torch.nn.Conv2d(channels * N, channels * N, 3, bias=bias, padding=1))
-            setattr(self, f"nl{i}", torch.nn.ReLU())
-            setattr(self, f"conv{i}_2", torch.nn.Conv2d(channels * N, out_channels, 3, bias=bias, padding=1))
-    def forward(self, x):
-        i = self.channels_list.index(x.shape[1])
-        u = getattr(self, f"conv{i}_1")(x)
-        u = getattr(self, f"nl{i}")(u)
-        u = getattr(self, f"conv{i}_2")(u)
-        return u
-class EquivHeads(torch.nn.Module):
-    def __init__(self, in_channels_list, out_channels, depth=2, scale=1, bias=True, mode="bilinear"):
-        super(EquivHeads, self).__init__()
-        self.in_channels_list = in_channels_list
-        self.scale = scale
-        self.mode = mode
-        for i, in_channels in enumerate(in_channels_list):
-            setattr(self, f"head{i}", HeadBlock(in_channels + 1, out_channels, depth=depth, bias=bias))
-        if self.mode == "":
-            self.nl = torch.nn.ReLU(inplace=False)
-            if self.scale != 1:
-                for i, in_channels in enumerate(in_channels_list):
-                    setattr(self, f"down{i}", downsample_strideconv(in_channels, in_channels, bias=False, mode=str(self.scale)))
-    def forward(self, x, sigma):
-        in_channels = x.size(1)
-        i = self.in_channels_list.index(in_channels)
-        if self.scale != 1:
-            if self.mode == "bilinear":
-                x = torch.nn.functional.interpolate(x, scale_factor=1/self.scale, mode='bilinear', align_corners=False)
-            else:
-                x = getattr(self, f"down{i}")(x)
-                x = self.nl(x)
-        # concat noise level map
-        noise_level_map = (torch.ones((x.size(0), 1, x.size(2), x.size(3)), device=x.device) * sigma)
-        x = torch.cat((x, noise_level_map), 1)
-        # find index
-        x = getattr(self, f"head{i}")(x)
-        return x

models/ram.py ADDED Viewed

	@@ -0,0 +1,854 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import deepinv as dinv
+from deepinv.physics import Physics, LinearPhysics, Downsampling
+from deepinv.utils import TensorList
+from deepinv.utils.tensorlist import TensorList
+from huggingface_hub import hf_hub_download
+cuda = True if torch.cuda.is_available() else False
+Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
+class RAM(nn.Module):
+    r"""
+    RAM model
+    This model is a convolutional neural network (CNN) designed for image reconstruction tasks.
+    :param in_channels: Number of input channels. If a list is provided, the model will have separate heads for each channel.
+    :param device: Device to which the model should be moved. If None, the model will be created on the default device.
+    :param pretrained: If True, the model will be initialized with pretrained weights.
+    """
+    def __init__(
+            self,
+            in_channels=[1, 2, 3],
+            device=None,
+            pretrained=True,
+    ):
+        super(RAM, self).__init__()
+        nc = [64, 128, 256, 512]  # number of channels in the network
+        self.in_channels = in_channels
+        self.fact_realign = torch.nn.Parameter(torch.tensor([1.0], device=device))
+        self.separate_head = isinstance(in_channels, list)
+        if isinstance(in_channels, list):
+            in_channels_first = []
+            for i in range(len(in_channels)):
+                in_channels_first.append(in_channels[i] + 2)
+        # check if in_channels is a list
+        self.m_head = InHead(in_channels_first, nc[0])
+        self.m_down1 = BaseEncBlock(nc[0], nc[0], img_channels=in_channels, decode_upscale=1)
+        self.m_down2 = BaseEncBlock(nc[1], nc[1], img_channels=in_channels, decode_upscale=2)
+        self.m_down3 = BaseEncBlock(nc[2], nc[2], img_channels=in_channels, decode_upscale=4)
+        self.m_body = BaseEncBlock(nc[3], nc[3], img_channels=in_channels, decode_upscale=8)
+        self.m_up3 = BaseEncBlock(nc[2], nc[2], img_channels=in_channels, decode_upscale=4)
+        self.m_up2 = BaseEncBlock(nc[1], nc[1], img_channels=in_channels, decode_upscale=2)
+        self.m_up1 = BaseEncBlock(nc[0], nc[0], img_channels=in_channels, decode_upscale=1)
+        self.pool1 = downsample_strideconv(nc[0], nc[1], bias=False, mode="2")
+        self.pool2 = downsample_strideconv(nc[1], nc[2], bias=False, mode="2")
+        self.pool3 = downsample_strideconv(nc[2], nc[3], bias=False, mode="2")
+        self.up3 = upsample_convtranspose(nc[3], nc[2], bias=False, mode="2")
+        self.up2 = upsample_convtranspose(nc[2], nc[1], bias=False, mode="2")
+        self.up1 = upsample_convtranspose(nc[1], nc[0], bias=False, mode="2")
+        self.m_tail = OutTail(nc[0], in_channels)
+        # load pretrained weights from hugging face
+        if pretrained:
+            self.load_state_dict(
+                torch.load(hf_hub_download(repo_id="mterris/ram", filename="ram.pth.tar"), map_location=device))
+        if device is not None:
+            self.to(device)
+    def constant2map(self, value, x):
+        r"""
+        Converts a constant value to a map of the same size as the input tensor x.
+        :params float value: constant value
+        :params torch.Tensor x: input tensor
+        """
+        if isinstance(value, torch.Tensor):
+            if value.ndim > 0:
+                value_map = value.view(x.size(0), 1, 1, 1)
+                value_map = value_map.expand(-1, 1, x.size(2), x.size(3))
+            else:
+                value_map = torch.ones(
+                    (x.size(0), 1, x.size(2), x.size(3)), device=x.device
+                ) * value[None, None, None, None].to(x.device)
+        else:
+            value_map = (
+                    torch.ones((x.size(0), 1, x.size(2), x.size(3)), device=x.device)
+                    * value
+            )
+        return value_map
+    def base_conditioning(self, x, sigma, gamma):
+        noise_level_map = self.constant2map(sigma, x)
+        gamma_map = self.constant2map(gamma, x)
+        return torch.cat((x, noise_level_map, gamma_map), 1)
+    def realign_input(self, x, physics, y):
+        r"""
+        Realign the input x based on the measurements y and the physics model.
+        Applies the proximity operator of the L2 norm with respect to the physics model.
+        :params torch.Tensor x: Input tensor
+        :params deepinv.physics.Physics physics: Physics model
+        :params torch.Tensor y: Measurements
+        """
+        if hasattr(physics, "factor"):
+            f = physics.factor
+        elif hasattr(physics, "base") and hasattr(physics.base, "factor"):
+            f = physics.base.factor
+        elif hasattr(physics, "base") and hasattr(physics.base, "base") and hasattr(physics.base.base, "factor"):
+            f = physics.base.base.factor
+        else:
+            f = 1.0
+        sigma = 1e-6  # default value
+        if hasattr(physics.noise_model, 'sigma'):
+            sigma = physics.noise_model.sigma
+        if hasattr(physics, 'base') and hasattr(physics.base, 'noise_model') and hasattr(physics.base.noise_model,
+                                                                                         'sigma'):
+            sigma = physics.base.noise_model.sigma
+        if hasattr(physics, 'base') and hasattr(physics.base, 'base') and hasattr(physics.base.base,
+                                                                                  'noise_model') and hasattr(
+                physics.base.base.noise_model, 'sigma'):
+            sigma = physics.base.base.noise_model.sigma
+        if isinstance(y, TensorList):
+            num = (y[0].reshape(y[0].shape[0], -1).abs().mean(1))
+        else:
+            num = (y.reshape(y.shape[0], -1).abs().mean(1))
+        snr = num / (sigma + 1e-4)  # SNR equivariant
+        gamma = 1 / (1e-4 + 1 / (
+                    snr * f ** 2))  # TODO: check square-root / mean / check if we need to add a factor in front ?
+        gamma = gamma[(...,) + (None,) * (x.dim() - 1)]
+        model_input = physics.prox_l2(x, y, gamma=gamma * self.fact_realign)
+        return model_input
+    def forward_unet(self, x0, sigma=None, gamma=None, physics=None, y=None):
+        r"""
+        Forward pass of the UNet model.
+        :params torch.Tensor x0: init image
+        :params float sigma: Gaussian noise level
+        :params float gamma: Poisson noise gain
+        :params deepinv.physics.Physics physics: physics measurement operator
+        :params torch.Tensor y: measurements
+        """
+        img_channels = x0.shape[1]
+        physics = MultiScaleLinearPhysics(physics, x0.shape[-3:], device=x0.device)
+        if self.separate_head and img_channels not in self.in_channels:
+            raise ValueError(
+                f"Input image has {img_channels} channels, but the network only have heads for {self.in_channels} channels.")
+        if y is not None:
+            x0 = self.realign_input(x0, physics, y)
+        x0 = self.base_conditioning(x0, sigma, gamma)
+        x1 = self.m_head(x0)
+        x1_ = self.m_down1(x1, physics=physics, y=y, img_channels=img_channels, scale=0)
+        x2 = self.pool1(x1_)
+        x3_ = self.m_down2(x2, physics=physics, y=y, img_channels=img_channels, scale=1)
+        x3 = self.pool2(x3_)
+        x4_ = self.m_down3(x3, physics=physics, y=y, img_channels=img_channels, scale=2)
+        x4 = self.pool3(x4_)
+        x = self.m_body(x4, physics=physics, y=y, img_channels=img_channels, scale=3)
+        x = self.up3(x + x4)
+        x = self.m_up3(x, physics=physics, y=y, img_channels=img_channels, scale=2)
+        x = self.up2(x + x3)
+        x = self.m_up2(x, physics=physics, y=y, img_channels=img_channels, scale=1)
+        x = self.up1(x + x2)
+        x = self.m_up1(x, physics=physics, y=y, img_channels=img_channels, scale=0)
+        x = self.m_tail(x + x1, img_channels)
+        return x
+    def forward(self, y=None, physics=None):
+        r"""
+        Reconstructs a signal estimate from measurements y
+        :param torch.tensor y: measurements
+        :param deepinv.physics.Physics physics: forward operator
+        """
+        if physics is None:
+            physics = dinv.physics.Denoising(noise_model=dinv.physics.GaussianNoise(sigma=0.), device=y.device)
+        x_temp = physics.A_adjoint(y)
+        pad = (-x_temp.size(-2) % 8, -x_temp.size(-1) % 8)
+        physics = Pad(physics, pad)
+        x_in = physics.A_adjoint(y)
+        sigma = physics.noise_model.sigma if hasattr(physics.noise_model, "sigma") else 1e-3
+        gamma = physics.noise_model.gain if hasattr(physics.noise_model, "gain") else 1e-3
+        out = self.forward_unet(x_in, sigma=sigma, gamma=gamma, physics=physics, y=y)
+        out = physics.remove_pad(out)
+        return out
+### --------------- MODEL ---------------
+class BaseEncBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, bias=False, nb=4, img_channels=None, decode_upscale=None):
+        super(BaseEncBlock, self).__init__()
+        self.enc = nn.ModuleList(
+            [
+                ResBlock(
+                    in_channels,
+                    out_channels,
+                    bias=bias,
+                    img_channels=img_channels,
+                    decode_upscale=decode_upscale,
+                )
+                for _ in range(nb)
+            ]
+        )
+    def forward(self, x, physics=None, y=None, img_channels=None, scale=0):
+        for i in range(len(self.enc)):
+            x = self.enc[i](x, physics=physics, y=y, img_channels=img_channels, scale=scale)
+        return x
+def krylov_embeddings(y, p, factor, v=None, N=4, x_init=None):
+    r"""
+    Efficient Krylov subspace embedding computation with parallel processing.
+    :params torch.Tensor y: Input tensor.
+    :params p: An object with A and A_adjoint methods (linear operator).
+    :params float factor: Scaling factor.
+    :params torch.Tensor v: Precomputed values to subtract from Krylov sequence. Defaults to None.
+    :params int N: Number of Krylov iterations. Defaults to 4.
+    :params torch.Tensor x_init: Initial guess. Defaults to None.
+    """
+    if x_init is None:
+        x = p.A_adjoint(y)
+    else:
+        x = x_init.clone()  # Extract the first img_channels
+    norm = factor ** 2  # Precompute normalization factor
+    AtA = lambda u: p.A_adjoint(p.A(u)) * norm  # Define the linear operator
+    v = v if v is not None else torch.zeros_like(x)
+    out = x.clone()
+    # Compute Krylov basis
+    x_k = x.clone()
+    for i in range(N - 1):
+        x_k = AtA(x_k) - v
+        out = torch.cat([out, x_k], dim=1)
+    return out
+class MeasCondBlock(nn.Module):
+    r"""
+    Measurement conditioning block for the RAM model.
+    :param out_channels: Number of output channels.
+    :param img_channels: Number of input channels. If a list is provided, the model will have separate heads for each channel.
+    :param decode_upscale: Upscaling factor for the decoding convolution.
+    :param N: Number of Krylov iterations.
+    :param depth_encoding: Depth of the encoding convolution.
+    :param c_mult: Multiplier for the number of channels.
+    """
+    def __init__(self, out_channels=64, img_channels=None, decode_upscale=None, N=4, depth_encoding=1, c_mult=1):
+        super(MeasCondBlock, self).__init__()
+        self.separate_head = isinstance(img_channels, list)
+        assert img_channels is not None, "decode_dimensions should be provided"
+        assert decode_upscale is not None, "decode_upscale should be provided"
+        self.N = N
+        self.c_mult = c_mult
+        self.relu_encoding = nn.ReLU(inplace=False)
+        self.decoding_conv = Tails(out_channels, img_channels, depth=1, scale=1, bias=False, c_mult=self.c_mult)
+        self.encoding_conv = Heads(img_channels, out_channels, depth=depth_encoding, scale=1, bias=False,
+                                   c_mult=self.c_mult * N, c_add=N, relu_in=False, skip_in=True)
+        self.gain = torch.nn.Parameter(torch.tensor([1.0]), requires_grad=True)
+        self.gain_gradx = torch.nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
+        self.gain_grady = torch.nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
+        self.gain_pinvx = torch.nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
+        self.gain_pinvy = torch.nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
+    def forward(self, x, y, physics, img_channels=None, scale=1):
+        physics.set_scale(scale)
+        dec = self.decoding_conv(x, img_channels)
+        factor = 2 ** (scale)
+        meas_y = krylov_embeddings(y, physics, factor, N=self.N)
+        meas_dec = krylov_embeddings(y, physics, factor, N=self.N, x_init=dec[:, :img_channels, ...])
+        for c in range(1, self.c_mult):
+            meas_cur = krylov_embeddings(y, physics, factor, N=self.N,
+                                         x_init=dec[:, img_channels * c:img_channels * (c + 1)])
+            meas_dec = torch.cat([meas_dec, meas_cur], dim=1)
+        meas = torch.cat([meas_y, meas_dec], dim=1)
+        cond = self.encoding_conv(meas)
+        emb = self.relu_encoding(cond)
+        return emb
+class ResBlock(nn.Module):
+    r"""
+    Convolutional residual block.
+    :param in_channels: Number of input channels.
+    :param out_channels: Number of output channels.
+    :param kernel_size: Size of the convolution kernel.
+    :param stride: Stride of the convolution.
+    :param padding: Padding for the convolution.
+    :param bias: Whether to use bias in the convolution.
+    :param img_channels: Number of input channels. If a list is provided, the model will have separate heads for each channel.
+    :param decode_upscale: Upscaling factor for the decoding convolution.
+    :param head: Whether this is a head block.
+    :param tail: Whether this is a tail block.
+    :param N: Number of Krylov iterations.
+    :param c_mult: Multiplier for the number of channels.
+    :param depth_encoding: Depth of the encoding convolution.
+    """
+    def __init__(
+            self,
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            bias=True,
+            img_channels=None,
+            decode_upscale=None,
+            head=False,
+            tail=False,
+            N=2,
+            c_mult=2,
+            depth_encoding=2,
+    ):
+        super(ResBlock, self).__init__()
+        if not head and not tail:
+            assert in_channels == out_channels, "Only support in_channels==out_channels."
+        self.separate_head = isinstance(img_channels, list)
+        self.is_head = head
+        self.is_tail = tail
+        if self.is_head:
+            self.head = InHead(img_channels, out_channels, input_layer=True)
+        if not self.is_head and not self.is_tail:
+            self.conv1 = conv(
+                in_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding,
+                bias,
+                "C",
+            )
+            self.nl = nn.ReLU(inplace=True)
+            self.conv2 = conv(
+                out_channels,
+                out_channels,
+                kernel_size,
+                stride,
+                padding,
+                bias,
+                "C",
+            )
+        self.gain = torch.nn.Parameter(torch.tensor([1.0]), requires_grad=True)
+        self.PhysicsBlock = MeasCondBlock(out_channels=out_channels, c_mult=c_mult,
+                                          img_channels=img_channels, decode_upscale=decode_upscale,
+                                          N=N, depth_encoding=depth_encoding)
+    def forward(self, x, physics=None, y=None, img_channels=None, scale=0):
+        u = self.conv1(x)
+        u = self.nl(u)
+        u_2 = self.conv2(u)
+        emb_grad = self.PhysicsBlock(u, y, physics, img_channels=img_channels, scale=scale)
+        u_1 = self.gain * emb_grad
+        return x + u_2 + u_1
+class InHead(torch.nn.Module):
+    def __init__(self, in_channels_list, out_channels, mode="", bias=False, input_layer=False):
+        super(InHead, self).__init__()
+        self.in_channels_list = in_channels_list
+        self.input_layer = input_layer
+        for i, in_channels in enumerate(in_channels_list):
+            conv = AffineConv2d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                bias=bias,
+                mode=mode,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                padding_mode="zeros",
+            )
+            setattr(self, f"conv{i}", conv)
+    def forward(self, x):
+        in_channels = x.size(1) - 1 if self.input_layer else x.size(1)
+        # find index
+        i = self.in_channels_list.index(in_channels)
+        x = getattr(self, f"conv{i}")(x)
+        return x
+class OutTail(torch.nn.Module):
+    def __init__(self, in_channels, out_channels_list, mode="", bias=False):
+        super(OutTail, self).__init__()
+        self.in_channels = in_channels
+        self.out_channels_list = out_channels_list
+        for i, out_channels in enumerate(out_channels_list):
+            conv = AffineConv2d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                bias=bias,
+                mode=mode,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                padding_mode="zeros",
+            )
+            setattr(self, f"conv{i}", conv)
+    def forward(self, x, out_channels):
+        i = self.out_channels_list.index(out_channels)
+        x = getattr(self, f"conv{i}")(x)
+        return x
+class Heads(torch.nn.Module):
+    def __init__(self, in_channels_list, out_channels, depth=2, scale=1, bias=True, mode="bilinear", c_mult=1, c_add=0,
+                 relu_in=False, skip_in=False):
+        super(Heads, self).__init__()
+        self.in_channels_list = [c * (c_mult + c_add) for c in in_channels_list]
+        self.scale = scale
+        self.mode = mode
+        for i, in_channels in enumerate(self.in_channels_list):
+            setattr(self, f"head{i}",
+                    HeadBlock(in_channels, out_channels, depth=depth, bias=bias, relu_in=relu_in, skip_in=skip_in))
+        if self.mode == "":
+            self.nl = torch.nn.ReLU(inplace=False)
+            if self.scale != 1:
+                for i, in_channels in enumerate(in_channels_list):
+                    setattr(self, f"down{i}",
+                            downsample_strideconv(in_channels, in_channels, bias=False, mode=str(self.scale)))
+    def forward(self, x):
+        in_channels = x.size(1)
+        i = self.in_channels_list.index(in_channels)
+        if self.scale != 1:
+            if self.mode == "bilinear":
+                x = torch.nn.functional.interpolate(x, scale_factor=1 / self.scale, mode='bilinear',
+                                                    align_corners=False)
+            else:
+                x = getattr(self, f"down{i}")(x)
+                x = self.nl(x)
+        # find index
+        x = getattr(self, f"head{i}")(x)
+        return x
+class Tails(torch.nn.Module):
+    def __init__(self, in_channels, out_channels_list, depth=2, scale=1, bias=True, mode="bilinear", c_mult=1,
+                 relu_in=False, skip_in=False):
+        super(Tails, self).__init__()
+        self.out_channels_list = out_channels_list
+        self.scale = scale
+        for i, out_channels in enumerate(out_channels_list):
+            setattr(self, f"tail{i}",
+                    HeadBlock(in_channels, out_channels * c_mult, depth=depth, bias=bias, relu_in=relu_in,
+                              skip_in=skip_in))
+        self.mode = mode
+        if self.mode == "":
+            self.nl = torch.nn.ReLU(inplace=False)
+            if self.scale != 1:
+                for i, out_channels in enumerate(out_channels_list):
+                    setattr(self, f"up{i}",
+                            upsample_convtranspose(out_channels * c_mult, out_channels * c_mult, bias=bias,
+                                                   mode=str(self.scale)))
+    def forward(self, x, out_channels):
+        i = self.out_channels_list.index(out_channels)
+        x = getattr(self, f"tail{i}")(x)
+        # find index
+        if self.scale != 1:
+            if self.mode == "bilinear":
+                x = torch.nn.functional.interpolate(x, scale_factor=self.scale, mode='bilinear', align_corners=False)
+            else:
+                x = getattr(self, f"up{i}")(x)
+        return x
+class HeadBlock(torch.nn.Module):
+    def __init__(self, in_channels, out_channels, kernel_size=3, bias=True, depth=2, relu_in=False, skip_in=False):
+        super(HeadBlock, self).__init__()
+        padding = kernel_size // 2
+        c = out_channels if depth < 2 else in_channels
+        self.convin = torch.nn.Conv2d(in_channels, c, kernel_size, padding=padding, bias=bias)
+        self.zero_conv_skip = torch.nn.Conv2d(in_channels, c, 1, bias=False)
+        self.depth = depth
+        self.nl_1 = torch.nn.ReLU(inplace=False)
+        self.nl_2 = torch.nn.ReLU(inplace=False)
+        self.relu_in = relu_in
+        self.skip_in = skip_in
+        for i in range(depth - 1):
+            if i < depth - 2:
+                c_in, c = in_channels, in_channels
+            else:
+                c_in, c = in_channels, out_channels
+            setattr(self, f"conv1{i}", torch.nn.Conv2d(c_in, c_in, kernel_size, padding=padding, bias=bias))
+            setattr(self, f"conv2{i}", torch.nn.Conv2d(c_in, c, kernel_size, padding=padding, bias=bias))
+            setattr(self, f"skipconv{i}", torch.nn.Conv2d(c_in, c, 1, bias=False))
+    def forward(self, x):
+        if self.skip_in and self.relu_in:
+            x = self.nl_1(self.convin(x)) + self.zero_conv_skip(x)
+        elif self.skip_in and not self.relu_in:
+            x = self.convin(x) + self.zero_conv_skip(x)
+        else:
+            x = self.convin(x)
+        for i in range(self.depth - 1):
+            aux = getattr(self, f"conv1{i}")(x)
+            aux = self.nl_2(aux)
+            aux_0 = getattr(self, f"conv2{i}")(aux)
+            aux_1 = getattr(self, f"skipconv{i}")(x)
+            x = aux_0 + aux_1
+        return x
+# --------------------------------------------------------------------------------------
+class AffineConv2d(nn.Conv2d):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            mode="affine",
+            bias=False,
+            stride=1,
+            padding=0,
+            dilation=1,
+            groups=1,
+            padding_mode="circular",
+            blind=True,
+    ):
+        if mode == "affine":  # f(a*x + 1) = a*f(x) + 1
+            bias = False
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            bias=bias,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            padding_mode=padding_mode,
+        )
+        self.blind = blind
+        self.mode = mode
+    def affine(self, w):
+        """returns new kernels that encode affine combinations"""
+        return (
+                w.view(self.out_channels, -1).roll(1, 1).view(w.size())
+                - w
+                + 1 / w[0, ...].numel()
+        )
+    def forward(self, x):
+        if self.mode != "affine":
+            return super().forward(x)
+        else:
+            kernel = (
+                self.affine(self.weight)
+                if self.blind
+                else torch.cat(
+                    (self.affine(self.weight[:, :-1, :, :]), self.weight[:, -1:, :, :]),
+                    dim=1,
+                )
+            )
+            padding = tuple(
+                elt for elt in reversed(self.padding) for _ in range(2)
+            )  # used to translate padding arg used by Conv module to the ones used by F.pad
+            padding_mode = (
+                self.padding_mode if self.padding_mode != "zeros" else "constant"
+            )  # used to translate padding_mode arg used by Conv module to the ones used by F.pad
+            return F.conv2d(
+                F.pad(x, padding, mode=padding_mode),
+                kernel,
+                stride=self.stride,
+                dilation=self.dilation,
+                groups=self.groups,
+            )
+"""
+Functional blocks below
+Parts of code borrowed from
+https://github.com/cszn/DPIR/tree/master/models
+https://github.com/xinntao/BasicSR
+"""
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+"""
+# --------------------------------------------
+# Advanced nn.Sequential
+# https://github.com/xinntao/BasicSR
+# --------------------------------------------
+"""
+def sequential(*args):
+    """Advanced nn.Sequential.
+    Args:
+        nn.Sequential, nn.Module
+    Returns:
+        nn.Sequential
+    """
+    if len(args) == 1:
+        if isinstance(args[0], OrderedDict):
+            raise NotImplementedError("sequential does not support OrderedDict input.")
+        return args[0]  # No sequential is needed.
+    modules = []
+    for module in args:
+        if isinstance(module, nn.Sequential):
+            for submodule in module.children():
+                modules.append(submodule)
+        elif isinstance(module, nn.Module):
+            modules.append(module)
+    return nn.Sequential(*modules)
+def conv(
+        in_channels=64,
+        out_channels=64,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+        bias=True,
+        mode="CBR",
+):
+    L = []
+    for t in mode:
+        if t == "C":
+            L.append(
+                nn.Conv2d(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    padding=padding,
+                    bias=bias,
+                )
+            )
+        elif t == "T":
+            L.append(
+                nn.ConvTranspose2d(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    padding=padding,
+                    bias=bias,
+                )
+            )
+        elif t == "R":
+            L.append(nn.ReLU(inplace=True))
+        else:
+            raise NotImplementedError("Undefined type: ".format(t))
+    return sequential(*L)
+# --------------------------------------------
+# convTranspose (+ relu)
+# --------------------------------------------
+def upsample_convtranspose(
+        in_channels=64,
+        out_channels=3,
+        padding=0,
+        bias=True,
+        mode="2R",
+):
+    assert len(mode) < 4 and mode[0] in [
+        "2",
+        "3",
+        "4",
+        "8",
+    ], "mode examples: 2, 2R, 2BR, 3, ..., 4BR."
+    kernel_size = int(mode[0])
+    stride = int(mode[0])
+    mode = mode.replace(mode[0], "T")
+    up1 = conv(
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        bias,
+        mode,
+    )
+    return up1
+def downsample_strideconv(
+        in_channels=64,
+        out_channels=64,
+        padding=0,
+        bias=True,
+        mode="2R",
+):
+    assert len(mode) < 4 and mode[0] in [
+        "2",
+        "3",
+        "4",
+        "8",
+    ], "mode examples: 2, 2R, 2BR, 3, ..., 4BR."
+    kernel_size = int(mode[0])
+    stride = int(mode[0])
+    mode = mode.replace(mode[0], "C")
+    down1 = conv(
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        bias,
+        mode,
+    )
+    return down1
+class Upsampling(Downsampling):
+    def A(self, x, **kwargs):
+        return super().A_adjoint(x, **kwargs)
+    def A_adjoint(self, y, **kwargs):
+        return super().A(y, **kwargs)
+    def prox_l2(self, z, y, gamma, **kwargs):
+        return super().prox_l2(z, y, gamma, **kwargs)
+class MultiScalePhysics(Physics):
+    def __init__(self, physics, img_shape, filter="sinc", scales=[2, 4, 8], device='cpu', **kwargs):
+        super().__init__(noise_model=physics.noise_model, **kwargs)
+        self.base = physics
+        self.scales = scales
+        self.img_shape = img_shape
+        self.Upsamplings = [Upsampling(img_size=img_shape, filter=filter, factor=factor, device=device) for factor in
+                            scales]
+        self.scale = 0
+    def set_scale(self, scale):
+        if scale is not None:
+            self.scale = scale
+    def A(self, x, scale=None, **kwargs):
+        self.set_scale(scale)
+        if self.scale == 0:
+            return self.base.A(x, **kwargs)
+        else:
+            return self.base.A(self.Upsamplings[self.scale - 1].A(x), **kwargs)
+    def downsample(self, x, scale=None):
+        self.set_scale(scale)
+        if self.scale == 0:
+            return x
+        else:
+            return self.Upsamplings[self.scale - 1].A_adjoint(x)
+    def upsample(self, x, scale=None):
+        self.set_scale(scale)
+        if self.scale == 0:
+            return x
+        else:
+            return self.Upsamplings[self.scale - 1].A(x)
+    def update_parameters(self, **kwargs):
+        self.base.update_parameters(**kwargs)
+class MultiScaleLinearPhysics(MultiScalePhysics, LinearPhysics):
+    def __init__(self, physics, img_shape, filter="sinc", scales=[2, 4, 8], **kwargs):
+        super().__init__(physics=physics, img_shape=img_shape, filter=filter, scales=scales, **kwargs)
+    def A_adjoint(self, y, scale=None, **kwargs):
+        self.set_scale(scale)
+        y = self.base.A_adjoint(y, **kwargs)
+        if self.scale == 0:
+            return y
+        else:
+            return self.Upsamplings[self.scale - 1].A_adjoint(y)
+class Pad(LinearPhysics):
+    def __init__(self, physics, pad):
+        super().__init__(noise_model=physics.noise_model)
+        self.base = physics
+        self.pad = pad
+    def A(self, x):
+        return self.base.A(x[..., self.pad[0]:, self.pad[1]:])
+    def A_adjoint(self, y):
+        y = self.base.A_adjoint(y)
+        y = torch.nn.functional.pad(y, (self.pad[1], 0, self.pad[0], 0))
+        return y
+    def remove_pad(self, x):
+        return x[..., self.pad[0]:, self.pad[1]:]
+    def update_parameters(self, **kwargs):
+        self.base.update_parameters(**kwargs)

models/unext_wip.py DELETED Viewed

@@ -1,1238 +0,0 @@
-# Code borrowed from Kai Zhang https://github.com/cszn/DPIR/tree/master/models
-import re
-import math
-import functools
-import deepinv as dinv
-from deepinv.utils import plot, TensorList
-import torch
-from torch.func import vmap
-import torch.nn as nn
-import torch.nn.functional as F
-from torchvision import transforms
-from deepinv.optim.utils import conjugate_gradient
-from physics.multiscale import MultiScaleLinearPhysics, Pad
-from models.blocks import EquivMaxPool, AffineConv2d, ConvNextBlock2, NoiseEmbedding, MPConv, TimestepEmbedding, conv, downsample_strideconv, upsample_convtranspose
-from models.heads import Heads, Tails, InHead, OutTail, ConvChannels, SNRModule, EquivConvModule, EquivHeads
-cuda = True if torch.cuda.is_available() else False
-Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
-### --------------- MODEL ---------------
-class BaseEncBlock(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        bias=False,
-        mode="CRC",
-        nb=2,
-        embedding=False,
-        emb_channels=None,
-        emb_physics=False,
-        img_channels=None,
-        decode_upscale=None,
-        config='A',
-        N=4,
-        c_mult=1,
-        depth_encoding=1,
-        relu_in_encoding=False,
-        skip_in_encoding=True,
-    ):
-        super(BaseEncBlock, self).__init__()
-        self.config = config
-        self.enc = nn.ModuleList(
-            [
-                ResBlock(
-                    in_channels,
-                    out_channels,
-                    bias=bias,
-                    mode=mode,
-                    embedding=embedding,
-                    emb_channels=emb_channels,
-                    emb_physics=emb_physics,
-                    img_channels=img_channels,
-                    decode_upscale=decode_upscale,
-                    config=config,
-                    N=N,
-                    c_mult=c_mult,
-                    depth_encoding=depth_encoding,
-                    relu_in_encoding=relu_in_encoding,
-                    skip_in_encoding=skip_in_encoding,
-                )
-                for _ in range(nb)
-            ]
-        )
-    def forward(self, x, emb_sigma=None, physics=None, t=None, y=None, emb_in=None, img_channels=None, scale=0):
-        for i in range(len(self.enc)):
-            x = self.enc[i](x, emb_sigma=emb_sigma, physics=physics, t=t, y=y, img_channels=img_channels, scale=scale)
-        return x
-class NextEncBlock(nn.Module):
-    def __init__(
-        self, in_channels, out_channels, bias=False, mode="", mult_fact=4, nb=2
-    ):
-        super(NextEncBlock, self).__init__()
-        self.enc = nn.ModuleList(
-            [
-                ConvNextBlock2(
-                    in_channels=in_channels,
-                    out_channels=out_channels,
-                    bias=bias,
-                    mode=mode,
-                    mult_fact=mult_fact,
-                )
-                for _ in range(nb)
-            ]
-        )
-    def forward(self, x, emb_sigma=None):
-        for i in range(len(self.enc)):
-            x = self.enc[i](x, emb_sigma)
-        return x
-class UNeXt(nn.Module):
-    r"""
-    DRUNet denoiser network.
-    The network architecture is based on the paper
-    `Learning deep CNN denoiser prior for image restoration <https://arxiv.org/abs/1704.03264>`_,
-    and has a U-Net like structure, with convolutional blocks in the encoder and decoder parts.
-    The network takes into account the noise level of the input image, which is encoded as an additional input channel.
-    A pretrained network for (in_channels=out_channels=1 or in_channels=out_channels=3)
-    can be downloaded via setting ``pretrained='download'``.
-    :param int in_channels: number of channels of the input.
-    :param int out_channels: number of channels of the output.
-    :param list nc: number of convolutional layers.
-    :param int nb: number of convolutional blocks per layer.
-    :param int nf: number of channels per convolutional layer.
-    :param str act_mode: activation mode, "R" for ReLU, "L" for LeakyReLU "E" for ELU and "S" for Softplus.
-    :param str downsample_mode: Downsampling mode, "avgpool" for average pooling, "maxpool" for max pooling, and
-        "strideconv" for convolution with stride 2.
-    :param str upsample_mode: Upsampling mode, "convtranspose" for convolution transpose, "pixelsuffle" for pixel
-        shuffling, and "upconv" for nearest neighbour upsampling with additional convolution.
-    :param str, None pretrained: use a pretrained network. If ``pretrained=None``, the weights will be initialized at random
-        using Pytorch's default initialization. If ``pretrained='download'``, the weights will be downloaded from an
-        online repository (only available for the default architecture with 3 or 1 input/output channels).
-        Finally, ``pretrained`` can also be set as a path to the user's own pretrained weights.
-        See :ref:`pretrained-weights <pretrained-weights>` for more details.
-    :param bool train: training or testing mode.
-    :param str device: gpu or cpu.
-    """
-    def __init__(
-        self,
-        in_channels=[1, 2, 3],
-        out_channels=[1, 2, 3],
-        nc=[64, 128, 256, 512],
-        nb=4,  # 4 in DRUNet but out of memory
-        conv_type="next",  # should be 'base' or 'next'
-        pool_type="next",  # should be 'base' or 'next'
-        cond_type="base",  # conditioning, should be 'base' or 'edm'
-        device=None,
-        bias=False,
-        mode="",
-        residual=False,
-        act_mode="R",
-        layer_scale_init_value=1e-6,
-        init_type="ortho",
-        gain_init_conv=1.0,
-        gain_init_linear=1.0,
-        drop_prob=0.0,
-        replk=False,
-        mult_fact=4,
-        antialias="gaussian",
-        emb_physics=False,
-        config='A',
-        pretrained_pth=None,
-        N=4,
-        c_mult=1,
-        depth_encoding=1,
-        relu_in_encoding=False,
-        skip_in_encoding=True,
-    ):
-        super(UNeXt, self).__init__()
-        self.residual = residual
-        self.conv_type = conv_type
-        self.pool_type = pool_type
-        self.emb_physics = emb_physics
-        self.config = config
-        self.in_channels = in_channels
-        self.fact_realign = torch.nn.Parameter(torch.tensor([1.0], device=device))
-        self.separate_head = isinstance(in_channels, list)
-        assert cond_type in ["base", "edm"], "cond_type should be 'base' or 'edm'"
-        self.cond_type = cond_type
-        if self.cond_type == "base":
-            if self.config != 'E':
-                if isinstance(in_channels, list):
-                    in_channels_first = []
-                    for i in range(len(in_channels)):
-                        in_channels_first.append(in_channels[i] + 2)
-                else: # old head
-                    in_channels_first = in_channels + 1
-            else:
-                in_channels_first = in_channels
-        else:
-            in_channels_first = in_channels
-            self.noise_embedding = NoiseEmbedding(
-                num_channels=in_channels, emb_channels=max(nc), device=device
-            )
-        self.timestep_embedding = lambda x: x
-        # check if in_channels is a list
-        self.m_head = InHead(in_channels_first, nc[0])
-        if conv_type == "next":
-            self.m_down1 = NextEncBlock(
-                nc[0], nc[0], bias=bias, mode=mode, mult_fact=mult_fact, nb=nb
-            )
-            self.m_down2 = NextEncBlock(
-                nc[1], nc[1], bias=bias, mode=mode, mult_fact=mult_fact, nb=nb
-            )
-            self.m_down3 = NextEncBlock(
-                nc[2], nc[2], bias=bias, mode=mode, mult_fact=mult_fact, nb=nb
-            )
-            self.m_body = NextEncBlock(
-                nc[3], nc[3], bias=bias, mode=mode, mult_fact=mult_fact, nb=nb
-            )
-            self.m_up3 = NextEncBlock(
-                nc[2], nc[2], bias=bias, mode=mode, mult_fact=mult_fact, nb=nb
-            )
-            self.m_up2 = NextEncBlock(
-                nc[1], nc[1], bias=bias, mode=mode, mult_fact=mult_fact, nb=nb
-            )
-            self.m_up1 = NextEncBlock(
-                nc[0], nc[0], bias=bias, mode=mode, mult_fact=mult_fact, nb=nb
-            )
-        elif conv_type == "base":
-            embedding = (
-                False if cond_type == "base" else True
-            )
-            emb_channels = max(nc)
-            self.m_down1 = BaseEncBlock(
-                nc[0],
-                nc[0],
-                bias=False,
-                mode="CRC",
-                nb=nb,
-                embedding=embedding,
-                emb_channels=emb_channels,
-                emb_physics=emb_physics,
-                img_channels=in_channels,
-                decode_upscale=1,
-                config=config,
-                N=N,
-                c_mult=c_mult,
-                depth_encoding=depth_encoding,
-                relu_in_encoding=relu_in_encoding,
-                skip_in_encoding=skip_in_encoding,
-            )
-            self.m_down2 = BaseEncBlock(
-                nc[1],
-                nc[1],
-                bias=False,
-                mode="CRC",
-                nb=nb,
-                embedding=embedding,
-                emb_channels=emb_channels,
-                emb_physics=emb_physics,
-                img_channels=in_channels,
-                decode_upscale=2,
-                config=config,
-                N=N,
-                c_mult=c_mult,
-                depth_encoding=depth_encoding,
-                relu_in_encoding=relu_in_encoding,
-                skip_in_encoding=skip_in_encoding,
-            )
-            self.m_down3 = BaseEncBlock(
-                nc[2],
-                nc[2],
-                bias=False,
-                mode="CRC",
-                nb=nb,
-                embedding=embedding,
-                emb_channels=emb_channels,
-                emb_physics=emb_physics,
-                img_channels=in_channels,
-                decode_upscale=4,
-                config=config,
-                N=N,
-                c_mult=c_mult,
-                depth_encoding=depth_encoding,
-                relu_in_encoding=relu_in_encoding,
-                skip_in_encoding=skip_in_encoding,
-            )
-            self.m_body = BaseEncBlock(
-                nc[3],
-                nc[3],
-                bias=False,
-                mode="CRC",
-                nb=nb,
-                embedding=embedding,
-                emb_channels=emb_channels,
-                emb_physics=emb_physics,
-                img_channels=in_channels,
-                decode_upscale=8,
-                config=config,
-                N=N,
-                c_mult=c_mult,
-                depth_encoding=depth_encoding,
-                relu_in_encoding=relu_in_encoding,
-                skip_in_encoding=skip_in_encoding,
-            )
-            self.m_up3 = BaseEncBlock(
-                nc[2],
-                nc[2],
-                bias=False,
-                mode="CRC",
-                nb=nb,
-                embedding=embedding,
-                emb_channels=emb_channels,
-                emb_physics=emb_physics,
-                img_channels=in_channels,
-                decode_upscale=4,
-                config=config,
-                N=N,
-                c_mult=c_mult,
-                depth_encoding=depth_encoding,
-                relu_in_encoding=relu_in_encoding,
-                skip_in_encoding=skip_in_encoding,
-            )
-            self.m_up2 = BaseEncBlock(
-                nc[1],
-                nc[1],
-                bias=False,
-                mode="CRC",
-                nb=nb,
-                embedding=embedding,
-                emb_channels=emb_channels,
-                emb_physics=emb_physics,
-                img_channels=in_channels,
-                decode_upscale=2,
-                config=config,
-                N=N,
-                c_mult=c_mult,
-                depth_encoding=depth_encoding,
-                relu_in_encoding=relu_in_encoding,
-                skip_in_encoding=skip_in_encoding,
-            )
-            self.m_up1 = BaseEncBlock(
-                nc[0],
-                nc[0],
-                bias=False,
-                mode="CRC",
-                nb=nb,
-                embedding=embedding,
-                emb_channels=emb_channels,
-                emb_physics=emb_physics,
-                img_channels=in_channels,
-                decode_upscale=1,
-                config=config,
-                N=N,
-                c_mult=c_mult,
-                depth_encoding=depth_encoding,
-                relu_in_encoding=relu_in_encoding,
-                skip_in_encoding=skip_in_encoding,
-            )
-        else:
-            raise NotImplementedError("conv_type should be 'base' or 'next'")
-        if pool_type == "next_max":
-            self.pool1 = EquivMaxPool(
-                antialias=antialias,
-                in_channels=nc[0],
-                out_channels=nc[1],
-                device=device,
-            )
-            self.pool2 = EquivMaxPool(
-                antialias=antialias,
-                in_channels=nc[1],
-                out_channels=nc[2],
-                device=device,
-            )
-            self.pool3 = EquivMaxPool(
-                antialias=antialias,
-                in_channels=nc[2],
-                out_channels=nc[3],
-                device=device,
-            )
-        elif pool_type == "base":
-            self.pool1 = downsample_strideconv(nc[0], nc[1], bias=False, mode="2")
-            self.pool2 = downsample_strideconv(nc[1], nc[2], bias=False, mode="2")
-            self.pool3 = downsample_strideconv(nc[2], nc[3], bias=False, mode="2")
-            self.up3 = upsample_convtranspose(nc[3], nc[2], bias=False, mode="2")
-            self.up2 = upsample_convtranspose(nc[2], nc[1], bias=False, mode="2")
-            self.up1 = upsample_convtranspose(nc[1], nc[0], bias=False, mode="2")
-        else:
-            raise NotImplementedError("pool_type should be 'base' or 'next'")
-        self.m_tail = OutTail(nc[0], in_channels)
-        if conv_type == "base":
-            init_func = functools.partial(
-                weights_init_unext, init_type="ortho", gain_conv=0.2
-            )
-            self.apply(init_func)
-        else:
-            init_func = functools.partial(
-                weights_init_unext,
-                init_type=init_type,
-                gain_conv=gain_init_conv,
-                gain_linear=gain_init_linear,
-            )
-            self.apply(init_func)
-        if pretrained_pth=='jz':
-            pth = '/lustre/fswork/projects/rech/nyd/commun/mterris/base_checkpoints/drunet_deepinv_color_finetune_22k.pth'
-            self.load_drunet_weights(pth)
-        elif pretrained_pth is not None:
-            self.load_drunet_weights(pretrained_pth)
-        if self.config == 'D':
-            # deactivate grad for layers that do not contain the string "PhysicsBlock" or "gain" or "fact_realign"
-            for name, param in self.named_parameters():
-                if 'PhysicsBlock' not in name and 'gain' not in name and 'fact_realign' not in name and "m_head" not in name and "m_tail" not in name:
-                    param.requires_grad = False
-        if device is not None:
-            self.to(device)
-    def load_drunet_weights(self, ckpt_pth):
-        state_dict = torch.load(ckpt_pth, map_location=lambda storage, loc: storage)
-        new_state_dict = {}
-        matched_keys = []  # List to store successfully matched keys
-        unmatched_keys = []  # List to store keys that were not matched or excluded
-        excluded_keys = []  # List to store excluded keys
-        # Define patterns to exclude
-        exclude_patterns = ["head", "tail"]
-        # Dealing with regular keys
-        for old_key, value in state_dict.items():
-            # Skip keys containing any of the excluded patterns
-            if any(excluded in old_key for excluded in exclude_patterns):
-                excluded_keys.append(old_key)
-                continue  # Skip further processing for this key
-            new_key = old2new(old_key)
-            if new_key is not None:
-                matched_keys.append((old_key, new_key))  # Record the matched keys
-                new_state_dict[new_key] = value
-            else:
-                unmatched_keys.append(old_key)  # Record unmatched keys
-        # TODO: clean this
-        for excluded_key in excluded_keys:
-            if isinstance(self.in_channels, list):
-                for i, in_channel in enumerate(self.in_channels):
-                    # print('Dealing with conv ', i)
-                    new_key = f"m_head.conv{i}.weight"
-                    if 'head' in excluded_key:
-                        new_key = f"m_head.conv{i}.weight"
-                        # new_key = f"m_head.head.conv{i}.weight"
-                    if 'tail' in excluded_key:
-                        new_key = f"m_tail.conv{i}.weight"
-                    # DEBUG print all keys of state dict:
-                    # print(state_dict.keys())
-                    # print(self.state_dict().keys())
-                    conditioning = 'base'
-                    # if self.config == 'E':
-                    #     conditioning = False
-                    new_kv = update_keyvals_headtail(excluded_key,
-                                                     state_dict[excluded_key],
-                                                     init_value=self.state_dict()[new_key],
-                                                     new_key_name=new_key,
-                                                     conditioning=conditioning)
-                    new_state_dict.update(new_kv)
-                    # print(new_kv.keys())
-            else:
-                new_kv = update_keyvals_headtail(excluded_key, state_dict[excluded_key])
-            new_state_dict.update(new_kv)
-        # Display matched keys
-        print("Matched keys:")
-        for old_key, new_key in matched_keys:
-            print(f"{old_key} -> {new_key}")
-        # Load updated state dict into the model
-        self.load_state_dict(new_state_dict, strict=False)
-        # Display unmatched keys
-        print("\nUnmatched keys:")
-        for unmatched_key in unmatched_keys:
-            print(unmatched_key)
-        print("Weights loaded from ", ckpt_pth)
-    def constant2map(self, value, x):
-        if isinstance(value, torch.Tensor):
-            if value.ndim > 0:
-                value_map = value.view(x.size(0), 1, 1, 1)
-                value_map = value_map.expand(-1, 1, x.size(2), x.size(3))
-            else:
-                value_map = torch.ones(
-                    (x.size(0), 1, x.size(2), x.size(3)), device=x.device
-                ) * value[None, None, None, None].to(x.device)
-        else:
-            value_map = (
-                torch.ones((x.size(0), 1, x.size(2), x.size(3)), device=x.device)
-                * value
-            )
-        return value_map
-    def base_conditioning(self, x, sigma, gamma):
-        noise_level_map = self.constant2map(sigma, x)
-        gamma_map = self.constant2map(gamma, x)
-        return torch.cat((x, noise_level_map, gamma_map), 1)
-    def realign_input(self, x, physics, y):
-        if hasattr(physics, "factor"):
-            f = physics.factor
-        elif hasattr(physics, "base") and hasattr(physics.base, "factor"):
-            f = physics.base.factor
-        elif hasattr(physics, "base") and hasattr(physics.base, "base") and hasattr(physics.base.base, "factor"):
-            f = physics.base.base.factor
-        else:
-            f = 1.0
-        sigma = 1e-6  # default value
-        if hasattr(physics.noise_model, 'sigma'):
-            sigma = physics.noise_model.sigma
-        if hasattr(physics, 'base') and hasattr(physics.base, 'noise_model') and hasattr(physics.base.noise_model, 'sigma'):
-            sigma = physics.base.noise_model.sigma
-        if hasattr(physics, 'base') and hasattr(physics.base, 'base') and hasattr(physics.base.base, 'noise_model') and hasattr(physics.base.base.noise_model, 'sigma'):
-            sigma = physics.base.base.noise_model.sigma
-        if isinstance(y, TensorList):
-            num = (y[0].reshape(y[0].shape[0], -1).abs().mean(1))
-        else:
-            num = (y.reshape(y.shape[0], -1).abs().mean(1))
-        snr = num / (sigma + 1e-4)  # SNR equivariant
-        gamma = 1 / (1e-4 + 1 / (snr * f **2 ))  # TODO: check square-root / mean / check if we need to add a factor in front ?
-        gamma = gamma[(...,) + (None,) * (x.dim() - 1)]
-        model_input = physics.prox_l2(x, y, gamma=gamma * self.fact_realign)
-        return model_input
-    def forward_unet(self, x0, sigma=None, gamma=None, physics=None, t=None, y=None, img_channels=None):
-        # list_values = []
-        if self.cond_type == "base":
-            # if self.config != 'E':
-            x0 = self.base_conditioning(x0, sigma, gamma)
-            emb_sigma = None
-        else:
-            emb_sigma = self.noise_embedding(
-                sigma
-            )  # This only if the embedding is the non-basic one from drunet
-        emb_timestep = self.timestep_embedding(t)
-        x1 = self.m_head(x0) # old
-        # x1 = self.m_head(x0, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels)
-        # list_values.append(x1.abs().mean())
-        if self.config == 'G':
-            x1_, emb1_ = self.m_down1(x1, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels)
-        else:
-            x1_ = self.m_down1(x1, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels, scale=0)
-        x2 = self.pool1(x1_)
-        # list_values.append(x2.abs().mean())
-        if self.config == 'G':
-            x3_, emb3_ = self.m_down2(x2, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels)
-        else:
-            x3_ = self.m_down2(x2, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels, scale=1)
-        x3 = self.pool2(x3_)
-        # list_values.append(x3.abs().mean())
-        if self.config == 'G':
-            x4_, emb4_ = self.m_down3(x3, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels)
-        else:
-            x4_ = self.m_down3(x3, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels, scale=2)
-        x4 = self.pool3(x4_)
-        # issue: https://github.com/matthieutrs/ram_project/issues/1
-        # solution 1: using .contiguous() below
-        # solution 2: using a print statement that magically solves the issue
-        ###print(x4.is_contiguous())
-        # list_values.append(x4.abs().mean())
-        if self.config == 'G':
-            x, _ = self.m_body(x4, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels)
-        else:
-            x = self.m_body(x4, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels, scale=3)
-        # list_values.append(x.abs().mean())
-        if self.pool_type == "next" or self.pool_type == "next_max":
-            x = self.pool3.upscale(x + x4)
-        else:
-            x = self.up3(x + x4)
-        if self.config == 'G':
-            x, _ = self.m_up3(x, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, emb_in=emb4_, img_channels=img_channels)
-        else:
-            x = self.m_up3(x, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels, scale=2)
-        # list_values.append(x.abs().mean())
-        if self.pool_type == "next" or self.pool_type == "next_max":
-            x = self.pool2.upscale(x + x3)
-        else:
-            x = self.up2(x + x3)
-        if self.config == 'G':
-            x, _ = self.m_up2(x, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, emb_in=emb3_, img_channels=img_channels)
-        else:
-            x = self.m_up2(x, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels, scale=1)
-        # list_values.append(x.abs().mean())
-        if self.pool_type == "next" or self.pool_type == "next_max":
-            x = self.pool1.upscale(x + x2)
-        else:
-            x = self.up1(x + x2)
-        if self.config == 'G':
-            x, _ = self.m_up1(x, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, emb_in=emb1_, img_channels=img_channels)
-        else:
-            x = self.m_up1(x, emb_sigma=emb_sigma, physics=physics, t=emb_timestep, y=y, img_channels=img_channels, scale=0)
-        # list_values.append(x.abs().mean())
-        if self.separate_head:
-            x = self.m_tail(x + x1, img_channels)
-        else:
-            x = self.m_tail(x + x1)
-        return x
-    def forward(self, x, sigma=None, gamma=None, physics=None, t=None, y=None):
-        r"""
-        Run the denoiser on image with noise level :math:`\sigma`.
-        :param torch.Tensor x: noisy image
-        :param float, torch.Tensor sigma: noise level. If ``sigma`` is a float, it is used for all images in the batch.
-            If ``sigma`` is a tensor, it must be of shape ``(batch_size,)``.
-        """
-        img_channels = x.shape[1]  # x_n_chan = x.shape[1]
-        if self.emb_physics:
-            physics = MultiScaleLinearPhysics(physics, x.shape[-3:], device=x.device)
-        if self.separate_head and img_channels not in self.in_channels:
-            raise ValueError(f"Input image has {img_channels} channels, but the network only have heads for {self.in_channels} channels.")
-        if y is not None:
-            x = self.realign_input(x, physics, y)
-        x = self.forward_unet(x, sigma=sigma, gamma=gamma, physics=physics, t=t, y=y, img_channels=img_channels)
-        return x
-def krylov_embeddings_old(y, p, factor, v=None, N=4, feat_size=1, x_init=None, img_channels=3):
-    if x_init is None:
-        x = p.A_adjoint(y)
-    else:
-        x = x_init[:, :img_channels, ...]
-    if feat_size > 1:
-        _, C, _, _ = x.shape
-        if v is None:
-            v = torch.zeros_like(x).repeat(1, N-1, 1, 1)
-        out = x - v[:, :C, ...]
-        norm = factor ** 2
-        A = lambda u: p.A_adjoint(p.A(u)) * norm
-        for i in range(N-1):
-            x = A(x) - v[:, (i+1) * C:(i+2) * C, ...]
-            out = torch.cat([out, x], dim=1)
-    else:
-        if v is None:
-            v = torch.zeros_like(x)
-        out = x - v
-        norm = factor ** 2
-        A = lambda u: p.A_adjoint(p.A(u)) * norm
-        for i in range(N-1):
-            x = A(x) - v
-            out = torch.cat([out, x], dim=1)
-    return out
-def krylov_embeddings(y, p, factor, v=None, N=4, x_init=None, img_channels=3):
-    """
-    Efficient Krylov subspace embedding computation with parallel processing.
-    Args:
-        y (torch.Tensor): The input tensor.
-        p: An object with A and A_adjoint methods (linear operator).
-        factor (float): Scaling factor.
-        v (torch.Tensor, optional): Precomputed values to subtract from Krylov sequence. Defaults to None.
-        N (int, optional): Number of Krylov iterations. Defaults to 4.
-        feat_size (int, optional): Feature expansion size. Defaults to 1.
-        x_init (torch.Tensor, optional): Initial guess. Defaults to None.
-        img_channels (int, optional): Number of image channels. Defaults to 3.
-    Returns:
-        torch.Tensor: The Krylov embeddings.
-    """
-    if x_init is None:
-        x = p.A_adjoint(y)
-    else:
-        x = x_init.clone()  # Extract the first img_channels
-    norm = factor ** 2  # Precompute normalization factor
-    AtA = lambda u: p.A_adjoint(p.A(u)) * norm  # Define the linear operator
-    v = v if v is not None else torch.zeros_like(x)
-    out = x.clone()
-    # Compute Krylov basis
-    x_k = x.clone()
-    for i in range(N-1):
-        x_k = AtA(x_k) - v
-        out = torch.cat([out, x_k], dim=1)
-    return out
-def grad_embeddings(y, p, factor, v=None, N=4, feat_size=1):
-    Aty = p.A_adjoint(y)
-    if feat_size > 1:
-        _, C, _, _ = Aty.shape
-        if v is None:
-            v = torch.zeros_like(Aty).repeat(1, N-1, 1, 1)
-        out = v[:, :C, ...] - Aty
-        norm = factor ** 2
-        A = lambda u: p.A_adjoint(p.A(u)) * norm
-        for i in range(N-1):
-            x = A(v[:, (i+1) * C:(i+2) * C, ...]) - Aty
-            out = torch.cat([out, x], dim=1)
-    else:
-        if v is None:
-            v = torch.zeros_like(Aty)
-        out = v - Aty
-        norm = factor ** 2
-        A = lambda u: p.A_adjoint(p.A(u)) * norm
-        for i in range(N-1):
-            x = A(v) - Aty
-            out = torch.cat([out, x], dim=1)
-    return out
-def prox_embeddings(y, p, factor, v=None, N=4):
-    x = p.A_adjoint(y)
-    B, C, H, W = x.shape
-    if v is None:
-        v = torch.zeros_like(x)
-    v = v.repeat(1, N - 1, 1, 1)
-    gamma = torch.logspace(-4, -1, N-1, device=x.device).repeat_interleave(C).unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
-    norm = factor ** 2
-    A_sub = lambda u: torch.cat([p.A_adjoint(p.A(u[:, i * C:(i+1) * C, ...])) * norm for i in range(N-1)], dim=1)
-    A = lambda u: A_sub(u) + (u - v) * gamma
-    u_hat = conjugate_gradient(A, x.repeat(1, N-1, 1, 1), max_iter=3, tol=1e-3)
-    u_hat = torch.cat([u_hat, x], dim=1)
-    return u_hat
-# --------------------------------------------
-# Res Block: x + conv(relu(conv(x)))
-# --------------------------------------------
-class MeasCondBlock(nn.Module):
-    def __init__(
-        self,
-        out_channels=64,
-        img_channels=None,
-        decode_upscale=None,
-        config = 'A',
-        N=4,
-        depth_encoding=1,
-        relu_in_encoding=False,
-        skip_in_encoding=True,
-        c_mult=1,
-    ):
-        super(MeasCondBlock, self).__init__()
-        self.separate_head = isinstance(img_channels, list)
-        self.config = config
-        assert img_channels is not None, "decode_dimensions should be provided"
-        assert decode_upscale is not None, "decode_upscale should be provided"
-        # if self.separate_head:
-        if self.config == 'A':
-            self.relu_encoding = nn.ReLU(inplace=False)
-            self.N = N
-            self.c_mult = c_mult
-            self.encoding_conv = Heads(img_channels, out_channels,  depth=depth_encoding, scale=1, bias=False, c_mult=self.c_mult, relu_in=relu_in_encoding, skip_in=skip_in_encoding)
-        if self.config == 'B':
-            self.N = N
-            self.c_mult = c_mult
-            self.relu_encoding = nn.ReLU(inplace=False)
-            self.decoding_conv = Tails(out_channels, img_channels, depth=1, scale=1, bias=False, c_mult=self.c_mult)
-            self.encoding_conv = Heads(img_channels, out_channels,  depth=depth_encoding, scale=1, bias=False, c_mult=self.c_mult, relu_in=relu_in_encoding, skip_in=skip_in_encoding)
-        if self.config == 'C':
-            self.N = N
-            self.c_mult = c_mult
-            self.relu_encoding = nn.ReLU(inplace=False)
-            self.decoding_conv = Tails(out_channels, img_channels, depth=1, scale=1, bias=False, c_mult=self.c_mult)
-            self.encoding_conv = Heads(img_channels, out_channels,  depth=depth_encoding, scale=1, bias=False, c_mult=self.c_mult*N, c_add=N, relu_in=relu_in_encoding, skip_in=skip_in_encoding)
-        elif self.config == 'D':
-            self.N = N
-            self.c_mult = c_mult
-            self.relu_encoding = nn.ReLU(inplace=False)
-            self.decoding_conv = Tails(out_channels, img_channels, depth=1, scale=1, bias=False, c_mult=self.c_mult)
-            self.encoding_conv = Heads(img_channels, out_channels,  depth=depth_encoding, scale=1, bias=False, c_mult=self.c_mult*N, c_add=N, relu_in=relu_in_encoding, skip_in=skip_in_encoding)
-        self.gain = torch.nn.Parameter(torch.tensor([1.0]), requires_grad=True)
-        self.gain_gradx = torch.nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
-        self.gain_grady = torch.nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
-        self.gain_pinvx = torch.nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
-        self.gain_pinvy = torch.nn.Parameter(torch.tensor([1e-2]), requires_grad=True)
-    def forward(self, x, y, physics, t, emb_in=None, img_channels=None, scale=1):
-        if self.config == 'A':
-            return self.measurement_conditioning_config_A(x, y, physics, img_channels=img_channels, scale=scale)
-        elif self.config == 'F':
-            return self.measurement_conditioning_config_F(x, y, physics, img_channels=img_channels, scale=scale)
-        elif self.config == 'B':
-            return self.measurement_conditioning_config_B(x, y, physics, img_channels=img_channels, scale=scale)
-        elif self.config == 'C':
-            return self.measurement_conditioning_config_C(x, y, physics, img_channels=img_channels, scale=scale)
-        elif self.config == 'D':
-            return self.measurement_conditioning_config_D(x, y, physics, img_channels=img_channels, scale=scale)
-        elif self.config == 'E':
-            return self.measurement_conditioning_config_E(x, y, physics, img_channels=img_channels, scale=scale)
-        else:
-            raise NotImplementedError('Config not implemented')
-    def measurement_conditioning_config_A(self, x, y, physics, img_channels, scale=0):
-        physics.set_scale(scale)
-        factor = 2**(scale)
-        meas = krylov_embeddings(y, physics, factor, N=self.N, img_channels=img_channels)
-        cond = self.encoding_conv(meas)
-        emb = self.relu_encoding(cond)
-        return emb
-    def measurement_conditioning_config_B(self, x, y, physics, img_channels, scale=0):
-        physics.set_scale(scale)
-        dec = self.decoding_conv(x, img_channels)
-        factor = 2**(scale)
-        meas = krylov_embeddings(y, physics, factor, v=dec, N=self.N, img_channels=img_channels)
-        cond = self.encoding_conv(meas)
-        emb = self.relu_encoding(cond)
-        return emb # * sigma_emb
-    def measurement_conditioning_config_C(self, x, y, physics, img_channels, scale=0):
-        physics.set_scale(scale)
-        dec = self.decoding_conv(x, img_channels)
-        factor = 2**(scale)
-        meas_y = krylov_embeddings(y, physics, factor, N=self.N, img_channels=img_channels)
-        meas_dec = krylov_embeddings(y, physics, factor, N=self.N, x_init=dec[:, :img_channels, ...], img_channels=img_channels)
-        for c in range(1, self.c_mult):
-            meas_cur = krylov_embeddings(y, physics, factor, N=self.N, x_init=dec[:, img_channels*c:img_channels*(c+1)],
-                                         img_channels=img_channels)
-            meas_dec = torch.cat([meas_dec, meas_cur], dim=1)
-        meas = torch.cat([meas_y, meas_dec], dim=1)
-        cond = self.encoding_conv(meas)
-        emb = self.relu_encoding(cond)
-        return emb
-    def measurement_conditioning_config_D(self, x, y, physics, img_channels, scale=0):
-        physics.set_scale(scale)
-        dec = self.decoding_conv(x, img_channels)
-        factor = 2**(scale)
-        meas_y = krylov_embeddings(y, physics, factor, N=self.N, img_channels=img_channels)
-        meas_dec = krylov_embeddings(y, physics, factor, N=self.N, x_init=dec[:, :img_channels, ...], img_channels=img_channels)
-        for c in range(1, self.c_mult):
-            meas_cur = krylov_embeddings(y, physics, factor, N=self.N, x_init=dec[:, img_channels*c:img_channels*(c+1)],
-                                         img_channels=img_channels)
-            meas_dec = torch.cat([meas_dec, meas_cur], dim=1)
-        meas = torch.cat([meas_y, meas_dec], dim=1)
-        cond = self.encoding_conv(meas)
-        emb = self.relu_encoding(cond)
-        return cond
-    def measurement_conditioning_config_F(self, x, y, physics, img_channels):
-        dec_large = self.decoding_conv(x, img_channels) # go from shape = (B, C, H, W) to (B, 64, 64, 64) (independent of modality)
-        dec = self.relu_decoding(dec_large)
-        Adec = physics.A(dec)
-        grad = physics.A_adjoint(self.gain_gradx ** 2 * Adec - self.gain_grady ** 2 * y)  # TODO: check if we need to have L2 (depending on noise nature, can be automated)
-        if 'tomography' in physics.__class__.__name__.lower():  # or 'pansharp' in physics.__class__.__name__.lower():
-            pinv = physics.prox_l2(dec, self.gain_pinvx ** 2 * Adec - self.gain_pinvy ** 2 * y, gamma=1e9)
-        else:
-            pinv = physics.A_dagger(self.gain_pinvx ** 2 * Adec - self.gain_pinvy ** 2 * y)  # TODO: do we set this to gain_gradx ? To get 0 during training too?? Better for denoising I guess
-        # Mix grad and pinv
-        emb = grad - pinv  # will be 0 in the case of denoising, but also inpainting
-        im_emb = dec - physics.A_adjoint_A(dec)  # will be 0 in the case of denoising, but not inpainting  # TODO: add gains here too
-        grad_large = emb + im_emb
-        emb_grad = self.encoding_conv(grad_large)
-        emb_grad = self.relu_encoding(emb_grad)
-        return emb_grad
-    def measurement_conditioning_config_E(self, x, y, physics, img_channels, scale=1):
-        dec = self.decoding_conv(x, img_channels) # go from shape = (B, C, H, W) to (B, 64, 64, 64) (independent of modality)
-        physics.set_scale(scale)
-        # TODO: check things are batched
-        f = physics.factor if hasattr(physics, "factor") else 1.0
-        err = (physics.A_adjoint(physics.A(dec) - y))
-        # snr = self.snr_module(err)
-        snr = dec.reshape(dec.shape[0], -1).abs().mean(dim=1) / (err.reshape(err.shape[0], -1).abs().mean(dim=1) + 1e-4)
-        gamma = 1 / (1e-4 + 1 / (snr * f ** 2 + 1))  # TODO: check square-root / mean / check if we need to add a factor in front
-        gamma_est = gamma[(...,) + (None,) * (dec.dim() - 1)]
-        prox = physics.prox_l2(dec, y, gamma=gamma_est * self.fact_prox)
-        emb = self.fact_prox_skip_1 * prox + self.fact_prox_skip_2 * dec
-        emb_grad = self.encoding_conv(emb)
-        emb_grad = self.relu_encoding(emb_grad)
-        return emb_grad
-class ResBlock(nn.Module):
-    def __init__(
-        self,
-        in_channels=64,
-        out_channels=64,
-        kernel_size=3,
-        stride=1,
-        padding=1,
-        bias=True,
-        mode="CRC",
-        negative_slope=0.2,
-        embedding=False,
-        emb_channels=None,
-        emb_physics=False,
-        img_channels=None,
-        decode_upscale=None,
-        config = 'A',
-        head=False,
-        tail=False,
-        N=4,
-        c_mult=1,
-        depth_encoding=1,
-        relu_in_encoding=False,
-        skip_in_encoding=True,
-    ):
-        super(ResBlock, self).__init__()
-        if not head and not tail:
-            assert in_channels == out_channels, "Only support in_channels==out_channels."
-        self.separate_head = isinstance(img_channels, list)
-        self.config = config
-        self.is_head = head
-        self.is_tail = tail
-        if self.is_head:
-            self.head = InHead(img_channels, out_channels, input_layer=True)
-        # if self.is_tail:
-        #     self.tail = OutTail(in_channels, out_channels)
-        if not self.is_head and not self.is_tail:
-            self.conv1 = conv(
-                in_channels,
-                out_channels,
-                kernel_size,
-                stride,
-                padding,
-                bias,
-                "C",
-                negative_slope,
-            )
-            self.nl = nn.ReLU(inplace=True)
-            self.conv2 = conv(
-                out_channels,
-                out_channels,
-                kernel_size,
-                stride,
-                padding,
-                bias,
-                "C",
-                negative_slope,
-            )
-        if embedding:
-            self.gain = torch.nn.Parameter(torch.tensor([1.0]), requires_grad=True)
-            self.emb_linear = MPConv(emb_channels, out_channels, kernel=[])
-        self.emb_physics = emb_physics
-        if self.emb_physics:
-            self.gain = torch.nn.Parameter(torch.tensor([1.0]), requires_grad=True)
-            self.PhysicsBlock = MeasCondBlock(out_channels=out_channels, config=config, c_mult=c_mult,
-                                              img_channels=img_channels, decode_upscale=decode_upscale,
-                                              N=N, depth_encoding=depth_encoding,
-                                              relu_in_encoding=relu_in_encoding, skip_in_encoding=skip_in_encoding)
-    def forward(self, x, emb_sigma=None, physics=None, t=None, y=None, emb_in=None, img_channels=None, scale=0):
-        u = self.conv1(x)
-        u = self.nl(u)
-        u_2 = self.conv2(u)  # Should we sum this with below?
-        if self.emb_physics:  # TODO: add a factor (1+gain) to the emb_meas? that depends on the input snr
-            emb_grad = self.PhysicsBlock(u, y, physics, t, img_channels=img_channels, scale=scale)
-            u_1 = self.gain * emb_grad  # x - grad (sign does not matter)
-        else:
-            u_1 = 0
-        return x + u_2 + u_1
-def calculate_fan_in_and_fan_out(tensor, pytorch_style: bool = True):
-    """
-    from https://github.com/megvii-research/basecls/blob/main/basecls/layers/wrapper.py#L77
-    """
-    if len(tensor.shape) not in (2, 4, 5):
-        raise ValueError(
-            "fan_in and fan_out can only be computed for tensor with 2/4/5 "
-            "dimensions"
-        )
-    if len(tensor.shape) == 5:
-        # `GOIKK` to `OIKK`
-        tensor = tensor.reshape(-1, *tensor.shape[2:]) if pytorch_style else tensor[0]
-    num_input_fmaps = tensor.shape[1]
-    num_output_fmaps = tensor.shape[0]
-    receptive_field_size = 1
-    if len(tensor.shape) > 2:
-        receptive_field_size = functools.reduce(lambda x, y: x * y, tensor.shape[2:], 1)
-    fan_in = num_input_fmaps * receptive_field_size
-    fan_out = num_output_fmaps * receptive_field_size
-    return fan_in, fan_out
-def weights_init_unext(m, gain_conv=1.0, gain_linear=1.0, init_type="ortho"):
-    if hasattr(m, "modules"):
-        for submodule in m.modules():
-            if not 'skip' in str(submodule):
-                if isinstance(submodule, nn.Conv2d) or isinstance(
-                    submodule, nn.ConvTranspose2d
-                ):
-                    # nn.init.orthogonal_(submodule.weight.data, gain=1.0)
-                    k_shape = submodule.weight.data.shape[-1]
-                    if k_shape < 4:
-                        nn.init.orthogonal_(submodule.weight.data, gain=0.2)
-                    else:
-                        _, fan_out = calculate_fan_in_and_fan_out(submodule.weight)
-                        std = math.sqrt(2 / fan_out)
-                        nn.init.normal_(submodule.weight, 0, std)
-                    # if init_type == 'ortho':
-                    #     nn.init.orthogonal_(submodule.weight.data, gain=gain_conv)
-                    # elif init_type == 'kaiming':
-                    #     nn.init.kaiming_normal_(submodule.weight.data, a=0, mode='fan_in')
-                    # elif init_type == 'xavier':
-                    #     nn.init.xavier_normal_(submodule.weight.data, gain=gain_conv)
-                elif isinstance(submodule, nn.Linear):
-                    nn.init.normal_(submodule.weight.data, std=0.01)
-            elif 'skip' in str(submodule):
-                if isinstance(submodule, nn.Conv2d) or isinstance(
-                        submodule, nn.ConvTranspose2d
-                ):
-                    nn.init.ones_(submodule.weight.data)
-            # else:
-            #     classname = submodule.__class__.__name__
-            #     # print('WARNING: no init for ', classname)
-def old2new(old_key):
-    """
-    Converting old DRUNet keys to new UNExt style keys.
-    PATTERNS TO MATCH:
-    1. Case of downsampling blocks:
-    - for residual blocks (non-downsampling):
-         m_down3.2.res.0.weight -> m_down3.enc.2.conv1.weight
-    - for downsampling blocks:
-         m_down3.4.weight -> m_down3.downsample_strideconv.weight
-    2. Case of upsampling blocks:
-    - for upsampling:
-         m_up3.0.weight -> m_up3.upsample_convtranspose.weight
-    - for residual blocks:
-         m_up3.2.res.0.weight -> m_up3.enc.2.conv1.weight
-    3. Case for body blocks:
-         m_body.0.res.2.weight -> m_body.enc.0.conv2.weight
-    Args:
-        old_key (str): The old key from the state dictionary.
-    Returns:
-        str or None: The new key if matched, otherwise None.
-    """
-    # Match keys with the pattern for residual blocks (downsampling)
-    match_residual = re.search(r"(m_down\d+)\.(\d+)\.res\.(\d+)", old_key)
-    if match_residual:
-        prefix = match_residual.group(1)  # e.g., "m_down2"
-        index = match_residual.group(2)   # e.g., "3"
-        conv_index = int(match_residual.group(3))  # e.g., "0"
-        # Determine the new conv index: 0 -> 1, 2 -> 2
-        new_conv_index = 1 if conv_index == 0 else 2
-        # Construct the new key
-        new_key = f"{prefix}.enc.{index}.conv{new_conv_index}.weight"
-        return new_key
-    match_residual = re.search(r"(m_up\d+)\.(\d+)\.res\.(\d+)", old_key)
-    if match_residual:
-        prefix = match_residual.group(1)  # e.g., "m_down2"
-        index = int(match_residual.group(2))   # e.g., "3"
-        conv_index = int(match_residual.group(3))  # e.g., "0"
-        # Determine the new conv index: 0 -> 1, 2 -> 2
-        new_conv_index = 1 if conv_index == 0 else 2
-        # Construct the new key
-        new_key = f"{prefix}.enc.{index-1}.conv{new_conv_index}.weight"
-        return new_key
-    match_pool_downsample = re.search(r"m_down(\d+)\.4\.weight", old_key)
-    if match_pool_downsample:
-        index = match_pool_downsample.group(1)  # e.g., "1" or "2"
-        # Construct the new key
-        new_key = f"pool{index}.weight"
-        return new_key
-    # Match keys for upsampling blocks
-    match_upsample = re.search(r"m_up(\d+)\.0\.weight", old_key)
-    if match_upsample:
-        index = match_upsample.group(1)  # e.g., "1" or "2"
-        # Construct the new key
-        new_key = f"up{index}.weight"
-        return new_key
-    # Match keys for body blocks
-    match_body = re.search(r"(m_body)\.(\d+)\.res\.(\d+)\.weight", old_key)
-    if match_body:
-        prefix = match_body.group(1)  # e.g., "m_body"
-        index = match_body.group(2)   # e.g., "0"
-        conv_index = int(match_body.group(3))  # e.g., "2"
-        new_convindex = 1 if conv_index == 0 else 2
-        # Construct the new key
-        new_key = f"{prefix}.enc.{index}.conv{new_convindex}.weight"
-        return new_key
-    # If no patterns match, return None
-    return None
-def update_keyvals_headtail(old_key, old_value, init_value=None, new_key_name='m_head.conv0.weight', conditioning='base'):
-    """
-    Converting old DRUNet keys to new UNExt style keys.
-    KEYS do not change but weight need to be 0 padded.
-    Args:
-        old_key (str): The old key from the state dictionary.
-    """
-    if 'head' in old_key:
-        if conditioning == 'base':
-            c_in = init_value.shape[1]
-            c_in_old = old_value.shape[1]
-            # if c_in == c_in_old:
-            #     new_value = old_value.detach()
-            # elif c_in < c_in_old:
-            #     new_value = torch.zeros_like(init_value.detach())
-            #     new_value[:, -1:, ...] = old_value[:, -1:, ...]
-            #     new_value[:, :c_in-1, ...] = old_value[:, :c_in-1, ...]
-            # if c_in == c_in_old:
-            #     new_value = old_value.detach()
-            # elif c_in < c_in_old:
-            new_value = torch.zeros_like(init_value.detach())
-            new_value[:, -2:-1, ...] = old_value[:, -1:, ...]
-            new_value[:, -1:, ...] = old_value[:, -1:, ...]
-            new_value[:, :c_in-2, ...] = old_value[:, :c_in-2, ...]
-            return {new_key_name: new_value}
-        else:
-            c_in = init_value.shape[1]
-            c_in_old = old_value.shape[1]
-            # if c_in == c_in_old - 1:
-            #     new_value = old_value[:, :-1, ...].detach()
-            # elif c_in < c_in_old - 1:
-            #     new_value = torch.zeros_like(init_value.detach())
-            #     new_value[:, -1:, ...] = old_value[:, -1:, ...]
-            #     new_value[:, ...] = old_value[:, :c_in, ...]
-            new_value = torch.zeros_like(init_value.detach())
-            new_value[:, -1:-2, ...] = old_value[:, -1:, ...]
-            new_value[:, -1:, ...] = old_value[:, -1:, ...]
-            new_value[:, ...] = old_value[:, :c_in, ...]
-            return {new_key_name: new_value}
-    elif 'tail' in old_key:
-        c_in = init_value.shape[0]
-        c_in_old = old_value.shape[0]
-        new_value = torch.zeros_like(init_value.detach())
-        if c_in == c_in_old:
-            new_value = old_value.detach()
-        elif c_in < c_in_old:
-            new_value = torch.zeros_like(init_value.detach())
-            new_value[:, ...] = old_value[:c_in, ...]
-        return {new_key_name: new_value}
-    else:
-        print(f"Key {old_key} does not contain 'head' or 'tail'.")
-# test the network
-if __name__ == "__main__":
-    net = UNeXt()
-    x = torch.randn(1, 3, 128, 128)
-    y = net(x, 0.1)
-    # print(y.shape)
-    # print(y)
-# Case for diagonal physics
-# IDEA 1: kills signal in the image of A
-# im_emb = dec - physics.A_adjoint_A(dec)  # will be 0 in the case of denoising, but not inpainting  # TODO: add gains here too
-# IDEA 2: compute norm of signal in ker of A
-# normker = (dec - physics.A_adjoint_A(dec)).norm() / (dec.norm() + 1e-4)
-# im_emb = normker * physics.A_adjoint(self.gain_diag_x * physics.A(dec) - self.gain_diag_y * y)  # will be 0 in the case of denoising, but not inpainting  # TODO: add gains here too
-# IDEA 3: same as above but add the pinv as well
-# normker = (dec - physics.A_adjoint_A(dec)).norm() / (dec.norm() + 1e-4)
-# grad_term = physics.A_adjoint(self.gain_diag_x * physics.A(dec) - self.gain_diag_y * y)
-# # pinv_term = physics.A_dagger(self.gain_diagpinv_x * physics.A(dec) - self.gain_diagpinv_y * y)
-# if 'tomography' in physics.__class__.__name__.lower():  # or 'pansharp' in physics.__class__.__name__.lower():
-#     pinv_term = physics.prox_l2(dec, self.gain_diagpinv_x ** 2 * Adec - self.gain_diagpinv_y ** 2 * y, gamma=1e9)
-# else:
-#     pinv_term = physics.A_dagger(self.gain_diagpinv_x ** 2 * Adec - self.gain_diagpinv_y ** 2 * y)  # TODO: do we set this to gain_gradx ? To get 0 during training too?? Better for denoising I guess
-# im_emb = normker * (grad_term + pinv_term)  # will be 0 in the case of denoising, but not inpainting  # TODO: add gains here too
-# # Mix it
-# if hasattr(physics.noise_model, 'sigma'):
-#     sigma = physics.noise_model.sigma  # SNR ?  x /= sigma ** 2
-#     snr = (y.abs().mean()) / (sigma + 1e-4)  # SNR equivariant  # TODO: add epsilon
-#     snr = snr[(...,) + (None,) * (im_emb.dim() - 1)]
-# else:
-#     snr = 1e4
-#
-# grad_large = emb + self.gain_diag * (1 + self.gain_noise / snr) * im_emb