Spaces:

vfontech
/

Multi-Input-Res-Diffusion-VFI

Sleeping

App Files Files Community

vfontech commited on 15 days ago

Commit

587665f

verified ·

1 Parent(s): 6a0795d

Uploading the app

Browse files

Files changed (32) hide show

.gitattributes +37 -35
.gitignore +26 -0
README.md +14 -14
_data/.gitkeep +0 -0
_data/example_images/frame1.png +3 -0
_data/example_images/frame3.png +3 -0
app.py +76 -0
config/confg.yaml +64 -0
model/hub.py +12 -0
model/model.py +220 -0
model/train_pipline.py +177 -0
modules/basic_layers.py +313 -0
modules/cupy_module/correlation.py +402 -0
modules/cupy_module/cupy_utils.py +7 -0
modules/cupy_module/nedt.py +129 -0
modules/cupy_module/softsplat.py +368 -0
modules/feature_extactor.py +87 -0
modules/flow_models/flow_models.py +102 -0
modules/flow_models/raft/LICENSE +29 -0
modules/flow_models/raft/corr.py +56 -0
modules/flow_models/raft/extractor.py +342 -0
modules/flow_models/raft/rfr_new.py +235 -0
modules/flow_models/raft/update.py +139 -0
modules/flow_models/raft/utils.py +81 -0
modules/half_warper.py +129 -0
modules/synthesizer.py +277 -0
requirements.txt +42 -0
utils/ema.py +32 -0
utils/inter_frame_idx.py +123 -0
utils/raft.py +20 -0
utils/uncertainty.py +49 -0
utils/utils.py +83 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,37 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+_data/example_images/frame1.png filter=lfs diff=lfs merge=lfs -text
+_data/example_images/frame3.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,26 @@

+*.png
+*.jpg
+*.jpeg
+*.gif
+*.bmp
+*.tiff
+*.ico
+!_data/example_images/frame1.png
+!_data/example_images/frame3.png
+__pycache__
+*.pyc
+*.pyo
+*.pyd
+*.pyw
+*.pyz
+*.ckpt
+*.pt
+*.pth
+!metrics/flolpips/weights/v0.1/alex.pth
+*.ipynb

README.md CHANGED Viewed

@@ -1,14 +1,14 @@
----
-title: Multi Input Res Diffusion VFI
-emoji: 🚀
-colorFrom: blue
-colorTo: green
-sdk: gradio
-sdk_version: 5.25.2
-app_file: app.py
-pinned: false
-license: mit
-short_description: Gradio demo for Multi-Input ResShift Diffusion VFI
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Multi Input Res Diffusion VFI
+emoji: 🚀
+colorFrom: blue
+colorTo: green
+sdk: gradio
+sdk_version: 5.25.2
+app_file: app.py
+pinned: false
+license: mit
+short_description: Gradio demo for Multi-Input ResShift Diffusion VFI
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

_data/.gitkeep ADDED Viewed

File without changes

_data/example_images/frame1.png ADDED Viewed

Git LFS Details

SHA256: 6109ca9d74f3bf034fa74fcd744750c704310738e50c345802d539028c31738a
Pointer size: 132 Bytes
Size of remote file: 2.23 MB

_data/example_images/frame3.png ADDED Viewed

Git LFS Details

SHA256: e2d7226ea4642e45a00ea1af1cb8b1e6bd1209deef0a956a360a7eaf848b25dc
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import gradio as gr
+from PIL import Image
+from torchvision.transforms import Compose, ToTensor, Resize, Normalize
+import numpy as np
+import imageio
+import tempfile
+from utils.utils import denorm
+from model.hub import MultiInputResShiftHub
+model = MultiInputResShiftHub.from_pretrained("vfontech/Multiple-Input-Resshift-VFI")
+model.requires_grad_(False).cuda().eval()
+transform = Compose([
+    Resize((256, 448)),
+    ToTensor(),
+    Normalize(mean=[0.5]*3, std=[0.5]*3),
+])
+def to_numpy(img_tensor):
+    img_np = denorm(img_tensor, mean=[0.5]*3, std=[0.5]*3).squeeze().permute(1, 2, 0).cpu().numpy()
+    img_np = np.clip(img_np, 0, 1)
+    return (img_np * 255).astype(np.uint8)
+def interpolate(img0_pil, img2_pil, tau, num_samples):
+    img0 = transform(img0_pil.convert("RGB")).unsqueeze(0).cuda()
+    img2 = transform(img2_pil.convert("RGB")).unsqueeze(0).cuda()
+    if num_samples == 1:
+        # Unique image
+        img1 = model.reverse_process([img0, img2], tau)
+        return Image.fromarray(to_numpy(img1)), None
+    else:
+        # Múltiples imágenes → video
+        frames = [to_numpy(img0)]
+        for t in np.linspace(0, 1, num_samples):
+            img = model.reverse_process([img0, img2], float(t))
+            frames.append(to_numpy(img))
+        frames.append(to_numpy(img2))
+        temp_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
+        imageio.mimsave(temp_path, frames, fps=8)
+        return None, temp_path
+demo = gr.Interface(
+    fn=interpolate,
+    inputs=[
+        gr.Image(type="pil", label="Initial Image (frame1)"),
+        gr.Image(type="pil", label="Final Image (frame3)"),
+        gr.Slider(0.0, 1.0, step=0.05, value=0.5, label="Tau Value (only if Num Samples = 1)"),
+        gr.Slider(1, 15, step=1, value=1, label="Number of Samples"),
+    ],
+    outputs=[
+        gr.Image(label="Interpolated Image (if num_samples = 1)"),
+        gr.Video(label="Interpolation in video (if num_samples > 1)"),
+    ],
+    title="Multi-Input ResShift Diffusion VFI",
+    description=(
+        "📄 [arXiv Paper](https://arxiv.org/pdf/2504.05402) • "
+        "🤗 [Model](https://huggingface.co/vfontech/Multiple-Input-Resshift-VFI) • "
+        "🧪 [Colab](https://colab.research.google.com/drive/1MGYycbNMW6Mxu5MUqw_RW_xxiVeHK5Aa#scrollTo=EKaYCioiP3tQ) • "
+        "🌐 [GitHub](https://github.com/VicFonch/Multi-Input-Resshift-Diffusion-VFI)\n\n"
+        "Video interpolation using Conditional Residual Diffusion.\n"
+        "- All images are resized to 256x448.\n"
+        "- If `Number of Samples` = 1, generates only one intermediate image with the given Tau value.\n"
+        "- If `Number of Samples` > 1, ignores Tau and generates a sequence of interpolated images."
+    ),
+    examples=[
+        ["_data/example_images/frame1.png", "_data/example_images/frame3.png", 0.5],
+    ],
+)
+if __name__ == "__main__":
+    demo.queue(max_size=12)
+    demo.launch(max_threads=1)

config/confg.yaml ADDED Viewed

	@@ -0,0 +1,64 @@

+data_confg:
+  train_batch_size: 6
+  val_batch_size: 6
+  test_batch_size: 6
+  flow_method: raft
+  data_domain: animation
+  datamodule_confg:
+    mean: [0.5, 0.5, 0.5]
+    sd: [0.5, 0.5, 0.5]
+    size: [256, 448]
+    amount_augmentations: 1
+    horizontal_flip: 0.5
+    time_flip: True
+    rotation: 0
+    brightness: 0.2
+    contrast: 0.2
+    saturation: 0.2
+    hue: 0.1
+trainer_confg:
+  accumulate_grad_batches: 5
+  gradient_clip_val: 1.0
+  max_epochs: 500
+  num_nodes: 1
+  devices: 2
+  accelerator: gpu
+  strategy: ddp_find_unused_parameters_true
+optim_confg:
+  optimizer_confg: # AdamW
+    lr: 1.0e-4
+    betas: [0.9, 0.999]
+    eps: 1.0e-8
+  scheduler_confg: # ReduceLROnPlateau
+    mode: min
+    factor: 0.5
+    patience: 3
+    verbose: True
+pretrained_model_path: null # Fine-tune model path
+model_confg:
+  kappa: 2.0
+  timesteps: 20
+  p: 0.3
+  etas_end: 0.99
+  min_noise_level: 0.04
+  flow_model: raft
+  flow_kwargs:
+    pretrained_path: null #_pretrain_models/anime_interp_full.ckpt
+  warping_kwargs:
+    in_channels: 3
+    channels: [128, 256, 384, 512]
+  synthesis_kwargs:
+    in_channels: 3
+    channels: [128, 256, 384, 512]
+    temb_channels: 512
+    heads: 1
+    window_size: 8
+    window_attn: True
+    grid_attn: True
+    expansion_rate: 1.5
+    num_conv_blocks: 1
+    dropout: 0.0

model/hub.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from model.model import MultiInputResShift
+from huggingface_hub import PyTorchModelHubMixin
+class MultiInputResShiftHub(
+    MultiInputResShift,
+    PyTorchModelHubMixin,
+    repo_url="https://github.com/VicFonch/Multi-Input-Resshift-Diffusion-VFI",
+    paper_url="https://arxiv.org/pdf/2504.05402",
+    language="en",
+):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)

model/model.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import torch
+import torch.nn as nn
+from torch.nn.functional import interpolate
+import math
+from tqdm import tqdm
+from modules.feature_extactor import Extractor
+from modules.half_warper import HalfWarper
+from modules.cupy_module.nedt import NEDT
+from modules.flow_models.flow_models import (
+    RAFTFineFlow,
+    PWCFineFlow
+)
+from modules.synthesizer import Synthesis
+class FeatureWarper(nn.Module):
+    def __init__(
+        self,
+        in_channels: int = 3,
+        channels: list[int] = [32, 64, 128, 256],
+    ):
+        super().__init__()
+        channels = [in_channels + 1] + channels
+        self.half_warper = HalfWarper()
+        self.feature_extractor = Extractor(channels)
+        self.nedt = NEDT()
+    def forward(
+        self,
+        I0: torch.Tensor,
+        I1: torch.Tensor,
+        flow0to1: torch.Tensor,
+        flow1to0: torch.Tensor,
+        tau: torch.Tensor = None
+    ) -> tuple[list[torch.Tensor], list[torch.Tensor]]:
+        assert tau.shape == (I0.shape[0], 2), "tau shape must be (batch, 2)"
+        flow0tot = tau[:, 0][:, None, None, None] * flow0to1
+        flow1tot = tau[:, 1][:, None, None, None] * flow1to0
+        I0 = torch.cat([I0, self.nedt(I0)], dim=1)
+        I1 = torch.cat([I1, self.nedt(I1)], dim=1)
+        z0to1, z1to0 = HalfWarper.z_metric(I0, I1, flow0to1, flow1to0)
+        base0, base1 = self.half_warper(I0, I1, flow0tot, flow1tot, z0to1, z1to0)
+        warped0, warped1 = [base0], [base1]
+        features0 = self.feature_extractor(I0)
+        features1 = self.feature_extractor(I1)
+        for feat0, feat1 in zip(features0, features1):
+            f0 = interpolate(flow0tot, size=feat0.shape[2:], mode='bilinear', align_corners=False)
+            f1 = interpolate(flow1tot, size=feat0.shape[2:], mode='bilinear', align_corners=False)
+            z0 = interpolate(z0to1, size=feat0.shape[2:], mode='bilinear', align_corners=False)
+            z1 = interpolate(z1to0, size=feat0.shape[2:], mode='bilinear', align_corners=False)
+            w0, w1 = self.half_warper(feat0, feat1, f0, f1, z0, z1)
+            warped0.append(w0)
+            warped1.append(w1)
+        return warped0, warped1
+class MultiInputResShift(nn.Module):
+    def __init__(
+        self,
+        kappa: float=2.0,
+        p: float =0.3,
+        min_noise_level: float=0.04,
+        etas_end: float=0.99,
+        timesteps: int=15,
+        flow_model: str = 'raft',
+        flow_kwargs: dict = {},
+        warping_kwargs: dict = {},
+        synthesis_kwargs: dict = {}
+    ):
+        super().__init__()
+        self.timesteps = timesteps
+        self.kappa = kappa
+        self.eta_partition = None
+        sqrt_eta_1 = min(min_noise_level / kappa, min_noise_level, math.sqrt(0.001))
+        b0 = math.exp(1/float(timesteps - 1) * math.log(etas_end/sqrt_eta_1))
+        base = torch.ones(timesteps)*b0
+        beta = ((torch.linspace(0,1,timesteps))**p)*(timesteps-1)
+        sqrt_eta = torch.pow(base, beta) * sqrt_eta_1
+        self.register_buffer("sqrt_sum_eta", sqrt_eta)
+        self.register_buffer("sum_eta", sqrt_eta**2)
+        sum_prev_eta = torch.roll(self.sum_eta, 1)
+        sum_prev_eta[0] = 0
+        self.register_buffer("sum_prev_eta", sum_prev_eta)
+        self.register_buffer("sum_alpha", self.sum_eta - self.sum_prev_eta)
+        self.register_buffer("backward_mean_c1", self.sum_prev_eta / self.sum_eta)
+        self.register_buffer("backward_mean_c2", self.sum_alpha / self.sum_eta)
+        self.register_buffer("backward_std", self.kappa*torch.sqrt(self.sum_prev_eta*self.sum_alpha/self.sum_eta))
+        if flow_model == 'raft':
+            self.flow_model = RAFTFineFlow(**flow_kwargs)
+        elif flow_model == 'pwc':
+            self.flow_model = PWCFineFlow(**flow_kwargs)
+        else:
+            raise ValueError(f"Flow model {flow_model} not supported")
+        self.feature_warper = FeatureWarper(**warping_kwargs)
+        self.synthesis = Synthesis(**synthesis_kwargs)
+    def forward_process(
+        self,
+        x: torch.Tensor | None,
+        Y: list[torch.Tensor],
+        tau: torch.Tensor | float | None,
+        t: torch.Tensor | int
+    ) -> torch.Tensor:
+        if tau is None:
+            tau: torch.Tensor = torch.full((x.shape[0], len(Y)), 0.5, device=x.device, dtype=x.dtype)
+        elif isinstance(tau, float):
+            assert tau >= 0 and tau <= 1, "tau must be between 0 and 1"
+            tau: torch.Tensor = torch.cat([
+                torch.full((x.shape[0], 1), tau, device=x.device, dtype=x.dtype),
+                torch.full((x.shape[0], 1), 1 - tau, device=x.device, dtype=x.dtype)
+            ], dim=1)
+        if not torch.is_tensor(t):
+            t: torch.Tensor = torch.tensor([t], device=x.device, dtype=torch.long)
+        if x is None:
+            x: torch.Tensor = torch.zeros_like(Y[0])
+        eta = self.sum_eta[t][:, None] * tau
+        eta = eta[:, :, None, None, None].transpose(0, 1)
+        e_i = torch.stack([y - x for y in Y])
+        mean = x + (eta*e_i).sum(dim=0)
+        sqrt_sum_eta = self.sqrt_sum_eta[t][:, None, None, None]
+        std = self.kappa*sqrt_sum_eta
+        epsilon = torch.randn_like(x)
+        return mean + std*epsilon
+    @torch.inference_mode()
+    def reverse_process(
+        self,
+        Y: list[torch.Tensor],
+        tau: torch.Tensor | float,
+        flows: list[torch.Tensor] | None = None,
+    ) -> torch.Tensor:
+        y = Y[0]
+        batch, device, dtype = y.shape[0], y.device, y.dtype
+        if isinstance(tau, float):
+            assert tau >= 0 and tau <= 1, "tau must be between 0 and 1"
+            tau: torch.Tensor = torch.cat([
+                torch.full((batch, 1), tau, device=device, dtype=dtype),
+                torch.full((batch, 1), 1 - tau, device=device, dtype=dtype)
+            ], dim=1)
+        if flows is None:
+           flow0to1, flow1to0 = self.flow_model(Y[0], Y[1])
+        else:
+            flow0to1, flow1to0 = flows
+        warp0to1, warp1to0 = self.feature_warper(Y[0], Y[1], flow0to1, flow1to0, tau)
+        T = torch.tensor([self.timesteps-1,] * batch, device=device, dtype=torch.long)
+        x = self.forward_process(torch.zeros_like(Y[0]), [warp0to1[0][:, :3], warp1to0[0][:, :3]], tau, T)
+        pbar = tqdm(total=self.timesteps, desc="Reversing Process")
+        for i in reversed(range(self.timesteps)):
+            t = torch.ones(batch, device = device, dtype=torch.long) * i
+            predicted_x0 = self.synthesis(x, warp0to1, warp1to0, t)
+            mean_c1 = self.backward_mean_c1[t][:, None, None, None]
+            mean_c2 = self.backward_mean_c2[t][:, None, None, None]
+            std = self.backward_std[t][:, None, None, None]
+            eta = self.sum_eta[t][:, None] * tau
+            prev_eta = self.sum_prev_eta[t][:, None] * tau
+            eta = eta[:, :, None, None, None].transpose(0, 1)
+            prev_eta = prev_eta[:, :, None, None, None].transpose(0, 1)
+            e_i = torch.stack([y - predicted_x0 for y in Y])
+            mean = (
+                mean_c1*(x + (eta*e_i).sum(dim=0))
+                + mean_c2*predicted_x0
+                - (prev_eta*e_i).sum(dim=0)
+            )
+            x = mean + std*torch.randn_like(x)
+            pbar.update(1)
+        pbar.close()
+        return x
+    # Training Step Only
+    def forward(
+        self,
+        I0: torch.Tensor,
+        It: torch.Tensor,
+        I1: torch.Tensor,
+        flow1to0: torch.Tensor | None = None,
+        flow0to1: torch.Tensor | None = None,
+        tau: torch.Tensor | None = None,
+        t: torch.Tensor | None = None
+    ) -> torch.Tensor:
+        if tau is None:
+            tau = torch.full((It.shape[0], 2), 0.5, device=It.device, dtype=It.dtype)
+        if flow0to1 is None or flow1to0 is None:
+            flow0to1, flow1to0 = self.flow_model(I0, I1)
+        if t is None:
+            t = torch.randint(low=1, high=self.timesteps, size=(It.shape[0],), device=It.device, dtype=torch.long)
+        warp0to1, warp1to0 = self.feature_warper(I0, I1, flow0to1, flow1to0, tau)
+        x_t = self.forward_process(It, [warp0to1[0][:, :3], warp1to0[0][:, :3]], tau, t)
+        predicted_It = self.synthesis(x_t, warp0to1, warp1to0, t)
+        return predicted_It

model/train_pipline.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import os
+import copy
+import matplotlib.pyplot as plt
+from typing import Any
+import torch
+from torch.optim.lr_scheduler import ReduceLROnPlateau
+from torch.optim import AdamW, Optimizer
+from torch.utils.data import DataLoader
+from lightning import LightningModule
+from torchmetrics import MetricCollection
+from torchmetrics.image import PeakSignalNoiseRatio as PSNR
+from torchmetrics.image import StructuralSimilarityIndexMeasure as SSIM
+from torchmetrics.image import LearnedPerceptualImagePatchSimilarity as LPIPS
+from model.model import MultiInputResShift
+from utils.utils import denorm, make_grid_images#, save_triplet
+from utils.ema import EMA
+from utils.inter_frame_idx import get_inter_frame_temp_index
+from utils.raft import raft_flow
+class TrainPipline(LightningModule):
+    def __init__(self,
+                 confg: dict,
+                 test_dataloader: DataLoader):
+        super(TrainPipline, self).__init__()
+        self.test_dataloader = test_dataloader
+        self.confg = confg
+        self.mean, self.sd = confg["data_confg"]["mean"], confg["data_confg"]["sd"]
+        self.model = MultiInputResShift(**confg["model_confg"])
+        self.model.flow_model.requires_grad_(False).eval()
+        self.ema = EMA(beta=0.995)
+        self.ema_model = copy.deepcopy(self.model).eval().requires_grad_(False)
+        self.charbonnier_loss = lambda x, y: torch.mean(torch.sqrt((x - y)**2 + 1e-6))
+        self.lpips_loss = LPIPS(net_type='vgg')
+        self.train_metrics = MetricCollection({
+            "train_lpips": LPIPS(net_type='alex'),
+            "train_psnr": PSNR(),
+            "train_ssim": SSIM()
+        })
+        self.val_metrics = MetricCollection({
+            "val_lpips": LPIPS(net_type='alex'),
+            "val_psnr": PSNR(),
+            "val_ssim": SSIM()
+        })
+    def loss_fn(self,
+                x: torch.Tensor,
+                predicted_x: torch.Tensor) -> torch.Tensor:
+        percep_loss = 0.2 * self.lpips_loss(x, predicted_x.clamp(-1, 1))
+        pix2pix_loss = self.charbonnier_loss(x, predicted_x)
+        return percep_loss + pix2pix_loss
+    def sample_t(self,
+                 shape: tuple[int, ...],
+                 max_t: int,
+                 device: torch.device) -> torch.Tensor:
+        p = torch.linspace(1, max_t, steps=max_t, device=device) ** 2
+        p = p / p.sum()
+        t = torch.multinomial(p, num_samples=shape[0], replacement=True)
+        return t
+    def forward(self,
+                I0: torch.Tensor,
+                It: torch.Tensor,
+                I1: torch.Tensor) -> torch.Tensor:
+        flow0tot = raft_flow(I0, It, 'animation')
+        flow1tot = raft_flow(I1, It, 'animation')
+        mid_idx = get_inter_frame_temp_index(I0, It, I1, flow0tot, flow1tot).to(It.dtype)
+        tau = torch.stack([mid_idx, 1 - mid_idx], dim=1)
+        if self.current_epoch > 5:
+            t = torch.randint(low=1, high=self.model.timesteps, size=(It.shape[0],), device=It.device, dtype=torch.long)
+        else:
+            t = self.sample_t(shape=(It.shape[0],), max_t=self.model.timesteps, device=It.device)
+        predicted_It = self.model(I0, It, I1, tau=tau, t=t)
+        return predicted_It
+    def get_step_plt_images(self,
+                            It: torch.Tensor,
+                            predicted_It: torch.Tensor) -> plt.Figure:
+        fig, ax = plt.subplots(1, 2, figsize=(20, 10))
+        ax[0].imshow(denorm(predicted_It.clamp(-1, 1), self.mean, self.sd)[0].permute(1, 2, 0).cpu().numpy())
+        ax[0].axis("off")
+        ax[0].set_title("Predicted")
+        ax[1].imshow(denorm(It, self.mean, self.sd)[0].permute(1, 2, 0).cpu().numpy())
+        ax[1].axis("off")
+        ax[1].set_title("Ground Truth")
+        plt.tight_layout()
+        #img_path = "step_image.png"
+        #fig.savefig(img_path, dpi=300, bbox_inches='tight')
+        plt.close(fig)
+        return fig
+    def training_step(self, batch: tuple[torch.Tensor, ...], _) -> torch.Tensor:
+        I0, It, I1 = batch
+        predicted_It = self(I0, It, I1)
+        loss = self.loss_fn(It, predicted_It)
+        self.log("lr", self.trainer.optimizers[0].param_groups[0]["lr"], prog_bar=True, on_step=True, on_epoch=False, sync_dist=True)
+        self.log("train_loss", loss, prog_bar=True, on_step=True, on_epoch=False, sync_dist=True)
+        self.ema.step_ema(self.ema_model, self.model)
+        with torch.inference_mode():
+            fig = self.get_step_plt_images(It, predicted_It)
+            self.logger.experiment.add_figure("Train Predictions", fig, self.global_step)
+            mets = self.train_metrics(It, predicted_It.clamp(-1, 1))
+            self.log_dict(mets, prog_bar=True, on_step=True,on_epoch=False)
+        return loss
+    @torch.no_grad()
+    def validation_step(self,  batch: tuple[torch.Tensor, ...], _) -> None:
+        I0, It, I1 = batch
+        predicted_It = self(I0, It, I1)
+        loss = self.loss_fn(It, predicted_It)
+        self.log("val_loss", loss, prog_bar=True, on_step=False, on_epoch=True, sync_dist=True)
+        mets = self.val_metrics(It, predicted_It.clamp(-1, 1))
+        self.log_dict(mets, prog_bar=True, on_step=False, on_epoch=True)
+    @torch.inference_mode()
+    def on_train_epoch_end(self) -> None:
+        torch.save(self.ema_model.state_dict(),
+                   os.path.join("_checkpoint", f"resshift_diff_{self.current_epoch}.pth"))
+        batch = next(iter(self.test_dataloader))
+        I0, It, I1 = batch
+        I0, It, I1 = I0.to(self.device), It.to(self.device), I1.to(self.device)
+        flow0tot = raft_flow(I0, It, 'animation')
+        flow1tot = raft_flow(I1, It, 'animation')
+        mid_idx = get_inter_frame_temp_index(I0, It, I1, flow0tot, flow1tot).to(It.dtype)
+        tau = torch.stack([mid_idx, 1 - mid_idx], dim=1)
+        predicted_It = self.ema_model.reverse_process([I0, I1], tau)
+        I0 = denorm(I0, self.mean, self.sd)
+        I1 = denorm(I1, self.mean, self.sd)
+        It = denorm(It, self.mean, self.sd)
+        predicted_It = denorm(predicted_It.clamp(-1, 1), self.mean, self.sd)
+        #save_triplet([I0, It, predicted_It, I1], f"./_output/target_{self.current_epoch}.png", nrow=1)
+        grid = make_grid_images([I0, It, predicted_It, I1], nrow=1)
+        self.logger.experiment.add_image("Predicted Images", grid, self.global_step)
+    def configure_optimizers(self) -> tuple[list[Optimizer], list[dict[str, Any]]]:
+        optimizer = [AdamW(
+                        self.model.parameters(),
+                        **self.confg["optim_confg"]['optimizer_confg']
+                    )]
+        scheduler = [{
+            'scheduler': ReduceLROnPlateau(
+                optimizer[0],
+                **self.confg["optim_confg"]['scheduler_confg']
+            ),
+            'monitor': 'val_loss',
+            'interval': 'epoch',
+            'frequency': 1,
+            'strict': True,
+        }]
+        return optimizer, scheduler

modules/basic_layers.py ADDED Viewed

	@@ -0,0 +1,313 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from einops import rearrange
+from einops.layers.torch import Rearrange
+class GroupNorm(nn.Module):
+    def __init__(self, in_channels: int, num_groups: int = 32):
+        super(GroupNorm, self).__init__()
+        self.gn = nn.GroupNorm(num_groups=num_groups, num_channels=in_channels, eps=1e-6, affine=True)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.gn(x)
+class AdaLayerNorm(nn.Module):
+    def __init__(self, channels: int, cond_channels: int = 0, return_scale_shift: bool = True):
+        super(AdaLayerNorm, self).__init__()
+        self.norm = nn.LayerNorm(channels)
+        self.return_scale_shift = return_scale_shift
+        if cond_channels != 0:
+            if return_scale_shift:
+                self.proj = nn.Linear(cond_channels, channels * 3, bias=False)
+            else:
+                self.proj = nn.Linear(cond_channels, channels * 2, bias=False)
+            nn.init.xavier_uniform_(self.proj.weight)
+    def expand_dims(self, tensor: torch.Tensor, dims: list[int]) -> torch.Tensor:
+        for dim in dims:
+            tensor = tensor.unsqueeze(dim)
+        return tensor
+    def forward(self, x: torch.Tensor, cond: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.norm(x)
+        if cond is None:
+            return x
+        dims = list(range(1, len(x.shape) - 1))
+        if self.return_scale_shift:
+            gamma, beta, sigma = self.proj(cond).chunk(3, dim=-1)
+            gamma, beta, sigma = [self.expand_dims(t, dims) for t in (gamma, beta, sigma)]
+            return x * (1 + gamma) + beta, sigma
+        else:
+            gamma, beta = self.proj(cond).chunk(2, dim=-1)
+            gamma, beta = [self.expand_dims(t, dims) for t in (gamma, beta)]
+            return x * (1 + gamma) + beta
+class SinusoidalPositionalEmbedding(nn.Module):
+    def __init__(self, emb_dim: int = 256):
+        super(SinusoidalPositionalEmbedding, self).__init__()
+        self.channels = emb_dim
+    def forward(self, t: torch.Tensor) -> torch.Tensor:
+        inv_freq = 1.0 / (
+            10000
+            ** (torch.arange(0, self.channels, 2, device=t.device).float() / self.channels)
+        )
+        pos_enc_a = torch.sin(t.repeat(1, self.channels // 2) * inv_freq)
+        pos_enc_b = torch.cos(t.repeat(1, self.channels // 2) * inv_freq)
+        pos_enc = torch.cat([pos_enc_a, pos_enc_b], dim=-1)
+        return pos_enc
+class GatedConv2d(nn.Module):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 kernel_size: int = 3,
+                 padding: int = 1,
+                 bias: bool = False):
+        super(GatedConv2d, self).__init__()
+        self.gate_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
+        self.feature_conv = nn.Conv2d(in_channels,
+                                      out_channels,
+                                      kernel_size=kernel_size,
+                                      padding=padding,
+                                      bias=bias)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        gate = torch.sigmoid(self.gate_conv(x))
+        feature = F.silu(self.feature_conv(x))
+        return gate * feature
+class ResGatedBlock(nn.Module):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 mid_channels: int | None = None,
+                 num_groups: int = 32,
+                 residual: bool = True,
+                 emb_channels: int | None = None,
+                 gated_conv: bool = False):
+        super().__init__()
+        self.residual = residual
+        self.emb_channels = emb_channels
+        if not mid_channels:
+            mid_channels = out_channels
+        if gated_conv: conv2d = GatedConv2d
+        else: conv2d = nn.Conv2d
+        self.conv1 = conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False)
+        self.norm1 = GroupNorm(mid_channels, num_groups=num_groups)
+        self.nonlienrity = nn.SiLU()
+        if emb_channels:
+            self.emb_proj = nn.Linear(emb_channels, mid_channels)
+        self.conv2 = conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False)
+        self.norm2 = GroupNorm(out_channels, num_groups=num_groups)
+        if in_channels != out_channels:
+            self.skip = conv2d(in_channels, out_channels, kernel_size=1, padding=0)
+    def double_conv(self, x: torch.Tensor, emb: torch.Tensor | None = None) -> torch.Tensor:
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.nonlienrity(x)
+        if emb is not None and self.emb_channels is not None:
+            x = x + self.emb_proj(emb)[:,:,None,None]
+        x = self.conv2(x)
+        return self.norm2(x)
+    def forward(self, x: torch.Tensor, emb: torch.Tensor | None = None) -> torch.Tensor:
+        if self.residual:
+            if hasattr(self, 'skip'):
+                return F.silu(self.skip(x) + self.double_conv(x, emb))
+            return F.silu(x + self.double_conv(x, emb))
+        else:
+            return self.double_conv(x, emb)
+class Downsample(nn.Module):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 use_conv: bool=True):
+        super().__init__()
+        self.use_conv = use_conv
+        if use_conv:
+            self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=0)
+        else:
+            assert in_channels == out_channels
+            self.conv = nn.AvgPool2d(kernel_size=2, stride=2)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        pad = (0, 1, 0, 1)
+        hidden_states = F.pad(x, pad, mode="constant", value=0)
+        return self.conv(hidden_states) if self.use_conv else self.conv(x)
+class Upsample(nn.Module):
+    def __init__(self,
+                 in_channels: int,
+                 out_channels: int,
+                 use_conv: bool=True):
+        super().__init__()
+        self.use_conv = use_conv
+        if use_conv:
+            self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = F.interpolate(x,
+                          scale_factor = (2, 2) if x.dim() == 4 else (1, 2, 2),
+                          mode='nearest')
+        return self.conv(x) if self.use_conv else x
+class FeedForward(nn.Module):
+    def __init__(self,
+                 dim: int,
+                 emb_channels: int,
+                 expansion_rate: int = 4,
+                 dropout: float = 0.0):
+        super().__init__()
+        inner_dim = int(dim * expansion_rate)
+        self.norm = AdaLayerNorm(dim, emb_channels)
+        self.net = nn.Sequential(
+            nn.Linear(dim, inner_dim),
+            nn.SiLU(),
+            nn.Dropout(dropout),
+            nn.Linear(inner_dim, dim),
+            nn.Dropout(dropout)
+        )
+        self.__init_weights()
+    def __init_weights(self):
+        nn.init.xavier_uniform_(self.net[0].weight)
+        nn.init.xavier_uniform_(self.net[3].weight)
+    def forward(self, x: torch.Tensor, emb: torch.Tensor | None = None) -> torch.Tensor:
+        x, sigma = self.norm(x, emb)
+        return self.net(x) * sigma
+class Attention(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        emb_channels: int = 512,
+        dim_head: int = 32,
+        dropout: float = 0.,
+        window_size: int = 7
+    ):
+        super().__init__()
+        assert (dim % dim_head) == 0, 'dimension should be divisible by dimension per head'
+        self.heads = dim // dim_head
+        self.scale = dim_head ** -0.5
+        self.norm = AdaLayerNorm(dim, emb_channels)
+        self.to_q = nn.Linear(dim, dim, bias = False)
+        self.to_k = nn.Linear(dim, dim, bias = False)
+        self.to_v = nn.Linear(dim, dim, bias = False)
+        self.attend = nn.Sequential(
+            nn.Softmax(dim = -1),
+            nn.Dropout(dropout)
+        )
+        self.to_out = nn.Sequential(
+            nn.Linear(dim, dim, bias = False),
+            nn.Dropout(dropout)
+        )
+        self.rel_pos_bias = nn.Embedding((2 * window_size - 1) ** 2, self.heads)
+        pos = torch.arange(window_size)
+        grid = torch.stack(torch.meshgrid(pos, pos, indexing = 'ij'))
+        grid = rearrange(grid, 'c i j -> (i j) c')
+        rel_pos = rearrange(grid, 'i ... -> i 1 ...') - rearrange(grid, 'j ... -> 1 j ...')
+        rel_pos += window_size - 1
+        rel_pos_indices = (rel_pos * torch.tensor([2 * window_size - 1, 1])).sum(dim = -1)
+        self.register_buffer('rel_pos_indices', rel_pos_indices, persistent = False)
+    def forward(self, x: torch.Tensor, emb: torch.Tensor | None = None) -> torch.Tensor:
+        batch, height, width, window_height, window_width, _, device, h = *x.shape, x.device, self.heads
+        x, sigma = self.norm(x, emb)
+        x = rearrange(x, 'b x y w1 w2 d -> (b x y) (w1 w2) d')
+        q = self.to_q(x)
+        k = self.to_k(x)
+        v = self.to_v(x)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = h), (q, k, v)) # split heads
+        q = q * self.scale
+        sim = torch.einsum('b h i d, b h j d -> b h i j', q, k) # sim
+        bias = self.rel_pos_bias(self.rel_pos_indices)
+        sim = sim + rearrange(bias, 'i j h -> h i j')# add positional bias
+        attn = self.attend(sim) # attention
+        out = torch.einsum('b h i j, b h j d -> b h i d', attn, v) # aggregate
+        out = rearrange(out, 'b h (w1 w2) d -> b w1 w2 (h d)', w1 = window_height, w2 = window_width) # merge heads
+        out = self.to_out(out) # combine heads out
+        return rearrange(out, '(b x y) ... -> b x y ...', x = height, y = width) * sigma
+class MaxViTBlock(nn.Module):
+    def __init__(
+        self,
+        channels: int,
+        emb_channels: int = 512,
+        heads: int = 1,
+        window_size: int = 8,
+        window_attn: bool = True,
+        grid_attn: bool = True,
+        expansion_rate: int = 4,
+        dropout: float = 0.0,
+    ):
+        super(MaxViTBlock, self).__init__()
+        dim_head = channels // heads
+        layer_dim = dim_head * heads
+        w = window_size
+        self.window_attn = window_attn
+        self.grid_attn = grid_attn
+        if window_attn:
+            self.wind_rearrange_forward = Rearrange('b d (x w1) (y w2) -> b x y w1 w2 d', w1 = w, w2 = w)  # block-like attention
+            self.wind_attn = Attention(
+                dim = layer_dim,
+                emb_channels = emb_channels,
+                dim_head = dim_head,
+                dropout = dropout,
+                window_size = w
+            )
+            self.wind_ff = FeedForward(dim = layer_dim,
+                                       emb_channels = emb_channels,
+                                       expansion_rate = expansion_rate,
+                                       dropout = dropout)
+            self.wind_rearrange_backward = Rearrange('b x y w1 w2 d -> b d (x w1) (y w2)')
+        if grid_attn:
+            self.grid_rearrange_forward = Rearrange('b d (w1 x) (w2 y) -> b x y w1 w2 d', w1 = w, w2 = w)  # grid-like attention
+            self.grid_attn = Attention(
+                dim = layer_dim,
+                emb_channels = emb_channels,
+                dim_head = dim_head,
+                dropout = dropout,
+                window_size = w
+            )
+            self.grid_ff = FeedForward(dim = layer_dim,
+                                       emb_channels = emb_channels,
+                                       expansion_rate = expansion_rate,
+                                       dropout = dropout)
+            self.grid_rearrange_backward = Rearrange('b x y w1 w2 d -> b d (w1 x) (w2 y)')
+    def forward(self, x: torch.Tensor, emb: torch.Tensor | None = None) -> torch.Tensor:
+        if self.window_attn:
+            x = self.wind_rearrange_forward(x)
+            x = x + self.wind_attn(x, emb = emb)
+            x = x + self.wind_ff(x, emb = emb)
+            x = self.wind_rearrange_backward(x)
+        if self.grid_attn:
+            x = self.grid_rearrange_forward(x)
+            x = x + self.grid_attn(x, emb = emb)
+            x = x + self.grid_ff(x, emb = emb)
+            x = self.grid_rearrange_backward(x)
+        return x

modules/cupy_module/correlation.py ADDED Viewed

	@@ -0,0 +1,402 @@

+#!/usr/bin/env python
+import cupy
+import os
+import re
+import torch
+# Code taken from https://github.com/sniklaus/softmax-splatting/blob/master/correlation/correlation.py
+kernel_Correlation_rearrange = '''
+    extern "C" __global__ void kernel_Correlation_rearrange(
+        const int n,
+        const float* input,
+        float* output
+    ) {
+      int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x;
+      if (intIndex >= n) {
+        return;
+      }
+      int intSample = blockIdx.z;
+      int intChannel = blockIdx.y;
+      float fltValue = input[(((intSample * SIZE_1(input)) + intChannel) * SIZE_2(input) * SIZE_3(input)) + intIndex];
+      __syncthreads();
+      int intPaddedY = (intIndex / SIZE_3(input)) + 4;
+      int intPaddedX = (intIndex % SIZE_3(input)) + 4;
+      int intRearrange = ((SIZE_3(input) + 8) * intPaddedY) + intPaddedX;
+      output[(((intSample * SIZE_1(output) * SIZE_2(output)) + intRearrange) * SIZE_1(input)) + intChannel] = fltValue;
+    }
+'''
+kernel_Correlation_updateOutput = '''
+    extern "C" __global__ void kernel_Correlation_updateOutput(
+      const int n,
+      const float* rbot0,
+      const float* rbot1,
+      float* top
+    ) {
+      extern __shared__ char patch_data_char[];
+      float *patch_data = (float *)patch_data_char;
+      // First (upper left) position of kernel upper-left corner in current center position of neighborhood in image 1
+      int x1 = blockIdx.x + 4;
+      int y1 = blockIdx.y + 4;
+      int item = blockIdx.z;
+      int ch_off = threadIdx.x;
+      // Load 3D patch into shared shared memory
+      for (int j = 0; j < 1; j++) { // HEIGHT
+        for (int i = 0; i < 1; i++) { // WIDTH
+          int ji_off = (j + i) * SIZE_3(rbot0);
+          for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
+            int idx1 = ((item * SIZE_1(rbot0) + y1+j) * SIZE_2(rbot0) + x1+i) * SIZE_3(rbot0) + ch;
+            int idxPatchData = ji_off + ch;
+            patch_data[idxPatchData] = rbot0[idx1];
+          }
+        }
+      }
+      __syncthreads();
+      __shared__ float sum[32];
+      // Compute correlation
+      for (int top_channel = 0; top_channel < SIZE_1(top); top_channel++) {
+        sum[ch_off] = 0;
+        int s2o = top_channel % 9 - 4;
+        int s2p = top_channel / 9 - 4;
+        for (int j = 0; j < 1; j++) { // HEIGHT
+          for (int i = 0; i < 1; i++) { // WIDTH
+            int ji_off = (j + i) * SIZE_3(rbot0);
+            for (int ch = ch_off; ch < SIZE_3(rbot0); ch += 32) { // CHANNELS
+              int x2 = x1 + s2o;
+              int y2 = y1 + s2p;
+              int idxPatchData = ji_off + ch;
+              int idx2 = ((item * SIZE_1(rbot0) + y2+j) * SIZE_2(rbot0) + x2+i) * SIZE_3(rbot0) + ch;
+              sum[ch_off] += patch_data[idxPatchData] * rbot1[idx2];
+            }
+          }
+        }
+        __syncthreads();
+        if (ch_off == 0) {
+          float total_sum = 0;
+          for (int idx = 0; idx < 32; idx++) {
+            total_sum += sum[idx];
+          }
+          const int sumelems = SIZE_3(rbot0);
+          const int index = ((top_channel*SIZE_2(top) + blockIdx.y)*SIZE_3(top))+blockIdx.x;
+          top[index + item*SIZE_1(top)*SIZE_2(top)*SIZE_3(top)] = total_sum / (float)sumelems;
+        }
+      }
+    }
+'''
+kernel_Correlation_updateGradOne = '''
+    #define ROUND_OFF 50000
+    extern "C" __global__ void kernel_Correlation_updateGradOne(
+      const int n,
+      const int intSample,
+      const float* rbot0,
+      const float* rbot1,
+      const float* gradOutput,
+      float* gradOne,
+      float* gradTwo
+    ) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+      int n = intIndex % SIZE_1(gradOne); // channels
+      int l = (intIndex / SIZE_1(gradOne)) % SIZE_3(gradOne) + 4; // w-pos
+      int m = (intIndex / SIZE_1(gradOne) / SIZE_3(gradOne)) % SIZE_2(gradOne) + 4; // h-pos
+      // round_off is a trick to enable integer division with ceil, even for negative numbers
+      // We use a large offset, for the inner part not to become negative.
+      const int round_off = ROUND_OFF;
+      const int round_off_s1 = round_off;
+      // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
+      int xmin = (l - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)
+      int ymin = (m - 4 + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4)
+      // Same here:
+      int xmax = (l - 4 + round_off_s1) - round_off; // floor (l - 4)
+      int ymax = (m - 4 + round_off_s1) - round_off; // floor (m - 4)
+      float sum = 0;
+      if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
+        xmin = max(0,xmin);
+        xmax = min(SIZE_3(gradOutput)-1,xmax);
+        ymin = max(0,ymin);
+        ymax = min(SIZE_2(gradOutput)-1,ymax);
+        for (int p = -4; p <= 4; p++) {
+          for (int o = -4; o <= 4; o++) {
+            // Get rbot1 data:
+            int s2o = o;
+            int s2p = p;
+            int idxbot1 = ((intSample * SIZE_1(rbot0) + (m+s2p)) * SIZE_2(rbot0) + (l+s2o)) * SIZE_3(rbot0) + n;
+            float bot1tmp = rbot1[idxbot1]; // rbot1[l+s2o,m+s2p,n]
+            // Index offset for gradOutput in following loops:
+            int op = (p+4) * 9 + (o+4); // index[o,p]
+            int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
+            for (int y = ymin; y <= ymax; y++) {
+              for (int x = xmin; x <= xmax; x++) {
+                int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
+                sum += gradOutput[idxgradOutput] * bot1tmp;
+              }
+            }
+          }
+        }
+      }
+      const int sumelems = SIZE_1(gradOne);
+      const int bot0index = ((n * SIZE_2(gradOne)) + (m-4)) * SIZE_3(gradOne) + (l-4);
+      gradOne[bot0index + intSample*SIZE_1(gradOne)*SIZE_2(gradOne)*SIZE_3(gradOne)] = sum / (float)sumelems;
+    } }
+'''
+kernel_Correlation_updateGradTwo = '''
+    #define ROUND_OFF 50000
+    extern "C" __global__ void kernel_Correlation_updateGradTwo(
+      const int n,
+      const int intSample,
+      const float* rbot0,
+      const float* rbot1,
+      const float* gradOutput,
+      float* gradOne,
+      float* gradTwo
+    ) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+      int n = intIndex % SIZE_1(gradTwo); // channels
+      int l = (intIndex / SIZE_1(gradTwo)) % SIZE_3(gradTwo) + 4; // w-pos
+      int m = (intIndex / SIZE_1(gradTwo) / SIZE_3(gradTwo)) % SIZE_2(gradTwo) + 4; // h-pos
+      // round_off is a trick to enable integer division with ceil, even for negative numbers
+      // We use a large offset, for the inner part not to become negative.
+      const int round_off = ROUND_OFF;
+      const int round_off_s1 = round_off;
+      float sum = 0;
+      for (int p = -4; p <= 4; p++) {
+        for (int o = -4; o <= 4; o++) {
+          int s2o = o;
+          int s2p = p;
+          //Get X,Y ranges and clamp
+          // We add round_off before_s1 the int division and subtract round_off after it, to ensure the formula matches ceil behavior:
+          int xmin = (l - 4 - s2o + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)
+          int ymin = (m - 4 - s2p + round_off_s1 - 1) + 1 - round_off; // ceil (l - 4 - s2o)
+          // Same here:
+          int xmax = (l - 4 - s2o + round_off_s1) - round_off; // floor (l - 4 - s2o)
+          int ymax = (m - 4 - s2p + round_off_s1) - round_off; // floor (m - 4 - s2p)
+          if (xmax>=0 && ymax>=0 && (xmin<=SIZE_3(gradOutput)-1) && (ymin<=SIZE_2(gradOutput)-1)) {
+            xmin = max(0,xmin);
+            xmax = min(SIZE_3(gradOutput)-1,xmax);
+            ymin = max(0,ymin);
+            ymax = min(SIZE_2(gradOutput)-1,ymax);
+            // Get rbot0 data:
+            int idxbot0 = ((intSample * SIZE_1(rbot0) + (m-s2p)) * SIZE_2(rbot0) + (l-s2o)) * SIZE_3(rbot0) + n;
+            float bot0tmp = rbot0[idxbot0]; // rbot1[l+s2o,m+s2p,n]
+            // Index offset for gradOutput in following loops:
+            int op = (p+4) * 9 + (o+4); // index[o,p]
+            int idxopoffset = (intSample * SIZE_1(gradOutput) + op);
+            for (int y = ymin; y <= ymax; y++) {
+              for (int x = xmin; x <= xmax; x++) {
+                int idxgradOutput = (idxopoffset * SIZE_2(gradOutput) + y) * SIZE_3(gradOutput) + x; // gradOutput[x,y,o,p]
+                sum += gradOutput[idxgradOutput] * bot0tmp;
+              }
+            }
+          }
+        }
+      }
+      const int sumelems = SIZE_1(gradTwo);
+      const int bot1index = ((n * SIZE_2(gradTwo)) + (m-4)) * SIZE_3(gradTwo) + (l-4);
+      gradTwo[bot1index + intSample*SIZE_1(gradTwo)*SIZE_2(gradTwo)*SIZE_3(gradTwo)] = sum / (float)sumelems;
+    } }
+'''
+def cupy_kernel(strFunction, objVariables):
+    strKernel = globals()[strFunction]
+    while True:
+        objMatch = re.search('(SIZE_)([0-4])(\()([^\)]*)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArg = int(objMatch.group(2))
+        strTensor = objMatch.group(4)
+        intSizes = objVariables[strTensor].size()
+        strKernel = strKernel.replace(objMatch.group(), str(intSizes[intArg] if torch.is_tensor(intSizes[intArg]) == False else intSizes[intArg].item()))
+    while True:
+        objMatch = re.search('(VALUE_)([0-4])(\()([^\)]+)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArgs = int(objMatch.group(2))
+        strArgs = objMatch.group(4).split(',')
+        strTensor = strArgs[0]
+        intStrides = objVariables[strTensor].stride()
+        strIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg] if torch.is_tensor(intStrides[intArg]) == False else intStrides[intArg].item()) + ')' for intArg in range(intArgs) ]
+        strKernel = strKernel.replace(objMatch.group(0), strTensor + '[' + str('+').join(strIndex) + ']')
+    # end
+    return strKernel
+# end
+@cupy.memoize(for_each_device=True)
+def cupy_launch(strFunction, strKernel):
+    if 'CUDA_HOME' not in os.environ:
+        os.environ['CUDA_HOME'] = cupy.cuda.get_cuda_path()
+    # end
+    return cupy.RawKernel(strKernel, strFunction, tuple(['-I ' + os.environ['CUDA_HOME'], '-I ' + os.environ['CUDA_HOME'] + '/include']))
+# end
+class _FunctionCorrelation(torch.autograd.Function):
+    @staticmethod
+    def forward(self, one, two):
+        rbot0 = one.new_zeros([ one.shape[0], one.shape[2] + 8, one.shape[3] + 8, one.shape[1] ])
+        rbot1 = one.new_zeros([ one.shape[0], one.shape[2] + 8, one.shape[3] + 8, one.shape[1] ])
+        one = one.contiguous(); assert(one.is_cuda == True)
+        two = two.contiguous(); assert(two.is_cuda == True)
+        output = one.new_zeros([ one.shape[0], 81, one.shape[2], one.shape[3] ])
+        if one.is_cuda == True:
+            n = one.shape[2] * one.shape[3]
+            cupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {
+                'input': one,
+                'output': rbot0
+            }))(
+                grid=tuple([ int((n + 16 - 1) / 16), one.shape[1], one.shape[0] ]),
+                block=tuple([ 16, 1, 1 ]),
+                args=[ cupy.int32(n), one.data_ptr(), rbot0.data_ptr() ]
+            )
+            n = two.shape[2] * two.shape[3]
+            cupy_launch('kernel_Correlation_rearrange', cupy_kernel('kernel_Correlation_rearrange', {
+                'input': two,
+                'output': rbot1
+            }))(
+                grid=tuple([ int((n + 16 - 1) / 16), two.shape[1], two.shape[0] ]),
+                block=tuple([ 16, 1, 1 ]),
+                args=[ cupy.int32(n), two.data_ptr(), rbot1.data_ptr() ]
+            )
+            n = output.shape[1] * output.shape[2] * output.shape[3]
+            cupy_launch('kernel_Correlation_updateOutput', cupy_kernel('kernel_Correlation_updateOutput', {
+                'rbot0': rbot0,
+                'rbot1': rbot1,
+                'top': output
+            }))(
+                grid=tuple([ output.shape[3], output.shape[2], output.shape[0] ]),
+                block=tuple([ 32, 1, 1 ]),
+                shared_mem=one.shape[1] * 4,
+                args=[ cupy.int32(n), rbot0.data_ptr(), rbot1.data_ptr(), output.data_ptr() ]
+            )
+        elif one.is_cuda == False:
+            raise NotImplementedError()
+        # end
+        self.save_for_backward(one, two, rbot0, rbot1)
+        return output
+    # end
+    @staticmethod
+    def backward(self, gradOutput):
+        one, two, rbot0, rbot1 = self.saved_tensors
+        gradOutput = gradOutput.contiguous(); assert(gradOutput.is_cuda == True)
+        gradOne = one.new_zeros([ one.shape[0], one.shape[1], one.shape[2], one.shape[3] ]) if self.needs_input_grad[0] == True else None
+        gradTwo = one.new_zeros([ one.shape[0], one.shape[1], one.shape[2], one.shape[3] ]) if self.needs_input_grad[1] == True else None
+        if one.is_cuda == True:
+            if gradOne is not None:
+                for intSample in range(one.shape[0]):
+                    n = one.shape[1] * one.shape[2] * one.shape[3]
+                    cupy_launch('kernel_Correlation_updateGradOne', cupy_kernel('kernel_Correlation_updateGradOne', {
+                        'rbot0': rbot0,
+                        'rbot1': rbot1,
+                        'gradOutput': gradOutput,
+                        'gradOne': gradOne,
+                        'gradTwo': None
+                    }))(
+                        grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
+                        block=tuple([ 512, 1, 1 ]),
+                        args=[ cupy.int32(n), intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), gradOne.data_ptr(), None ]
+                    )
+                # end
+            # end
+            if gradTwo is not None:
+                for intSample in range(one.shape[0]):
+                    n = one.shape[1] * one.shape[2] * one.shape[3]
+                    cupy_launch('kernel_Correlation_updateGradTwo', cupy_kernel('kernel_Correlation_updateGradTwo', {
+                        'rbot0': rbot0,
+                        'rbot1': rbot1,
+                        'gradOutput': gradOutput,
+                        'gradOne': None,
+                        'gradTwo': gradTwo
+                    }))(
+                        grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
+                        block=tuple([ 512, 1, 1 ]),
+                        args=[ cupy.int32(n), intSample, rbot0.data_ptr(), rbot1.data_ptr(), gradOutput.data_ptr(), None, gradTwo.data_ptr() ]
+                    )
+                # end
+            # end
+        elif one.is_cuda == False:
+            raise NotImplementedError()
+        # end
+        return gradOne, gradTwo
+    # end
+# end
+def FunctionCorrelation(tenOne, tenTwo):
+    return _FunctionCorrelation.apply(tenOne, tenTwo)
+# end
+class ModuleCorrelation(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+    # end
+    def forward(self, tenOne, tenTwo):
+        return _FunctionCorrelation.apply(tenOne, tenTwo)
+    # end
+# end

modules/cupy_module/cupy_utils.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import cupy
+#@cupy.memoize(for_each_device=True)
+def cupy_launch(strFunction, strKernel):
+    # return cupy.cuda.compile_with_cache(strKernel).get_function(strFunction)
+    return cupy.RawKernel(strKernel, strFunction)
+# end

modules/cupy_module/nedt.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import torch
+import cupy
+import kornia
+import torch.nn as nn
+from modules.cupy_module.cupy_utils import cupy_launch
+# Code taken from https://github.com/ShuhongChen/eisai-anime-interpolator
+_batch_edt_kernel = ('kernel_dt', '''
+    extern "C" __global__ void kernel_dt(
+        const int bs,
+        const int h,
+        const int w,
+        const float diam2,
+        float* data,
+        float* output
+    ) {
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        if (idx >= bs*h*w) {
+            return;
+        }
+        int pb = idx / (h*w);
+        int pi = (idx - h*w*pb) / w;
+        int pj = (idx - h*w*pb - w*pi);
+        float cost;
+        float mincost = diam2;
+        for (int j = 0; j < w; j++) {
+            cost = data[h*w*pb + w*pi + j] + (pj-j)*(pj-j);
+            if (cost < mincost) {
+                mincost = cost;
+            }
+        }
+        output[idx] = mincost;
+        return;
+    }
+''')
+class NEDT(nn.Module):
+    def __init__(self):
+        super().__init__()
+    def batch_edt(self, img, block=1024):
+        # must initialize cuda/cupy after forking
+        _batch_edt = cupy_launch(*_batch_edt_kernel)
+        # bookkeeppingg
+        if len(img.shape)==4:
+            assert img.shape[1]==1
+            img = img.squeeze(1)
+            expand = True
+        else:
+            expand = False
+        bs,h,w = img.shape
+        diam2 = h**2 + w**2
+        odtype = img.dtype
+        grid = (img.nelement()+block-1) // block
+        # first pass, y-axis
+        data = ((1-img.type(torch.float32)) * diam2).contiguous()
+        intermed = torch.zeros_like(data)
+        _batch_edt(
+            grid=(grid, 1, 1),
+            block=(block, 1, 1),  # < 1024
+            args=[
+                cupy.int32(bs),
+                cupy.int32(h),
+                cupy.int32(w),
+                cupy.float32(diam2),
+                data.data_ptr(),
+                intermed.data_ptr(),
+            ],
+        )
+        # second pass, x-axis
+        intermed = intermed.permute(0,2,1).contiguous()
+        out = torch.zeros_like(intermed)
+        _batch_edt(
+            grid=(grid, 1, 1),
+            block=(block, 1, 1),
+            args=[
+                cupy.int32(bs),
+                cupy.int32(w),
+                cupy.int32(h),
+                cupy.float32(diam2),
+                intermed.data_ptr(),
+                out.data_ptr(),
+            ],
+        )
+        ans = out.permute(0,2,1).sqrt()
+        ans = ans.type(odtype) if odtype!=ans.dtype else ans
+        if expand:
+            ans = ans.unsqueeze(1)
+        return ans
+    def batch_dog(self, img, t=1.0, sigma=1.0, k=1.6, epsilon=0.01, kernel_factor=4, clip=True):
+        # to grayscale if needed
+        bs,ch,h,w = img.shape
+        if ch in [3,4]:
+            img = kornia.color.rgb_to_grayscale(img[:,:3])
+        else:
+            assert ch==1
+        # calculate dog
+        kern0 = max(2*int(sigma*kernel_factor)+1, 3)
+        kern1 = max(2*int(sigma*k*kernel_factor)+1, 3)
+        g0 = kornia.filters.gaussian_blur2d(
+            img, (kern0,kern0), (sigma,sigma), border_type='replicate',
+        )
+        g1 = kornia.filters.gaussian_blur2d(
+            img, (kern1,kern1), (sigma*k,sigma*k), border_type='replicate',
+        )
+        out = 0.5 + t*(g1 - g0) - epsilon
+        out = out.clip(0,1) if clip else out
+        return out
+    def forward(
+        self, img, t=2.0, sigma_factor=1/540,
+        k=1.6, epsilon=0.01,
+        kernel_factor=4, exp_factor=540/15
+    ):
+        dog = self.batch_dog(
+            img, t=t, sigma=img.shape[-2]*sigma_factor, k=k,
+            epsilon=epsilon, kernel_factor=kernel_factor, clip=False,
+        )
+        edt = self.batch_edt((dog > 0.5).float())
+        out = 1 - (-edt*exp_factor / max(edt.shape[-2:])).exp()
+        return out

modules/cupy_module/softsplat.py ADDED Viewed

	@@ -0,0 +1,368 @@

+import torch
+import re
+import cupy
+from modules.cupy_module.cupy_utils import cupy_launch
+# Code from https://github.com/sniklaus/softmax-splatting/blob/master/softsplat.py
+kernel_Softsplat_updateOutput = '''
+    extern "C" __global__ void kernel_Softsplat_updateOutput(
+        const int n,
+        const float* input,
+        const float* flow,
+        float* output
+    ) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+        const int intN = ( intIndex / SIZE_3(output) / SIZE_2(output) / SIZE_1(output) ) % SIZE_0(output);
+        const int intC = ( intIndex / SIZE_3(output) / SIZE_2(output)                  ) % SIZE_1(output);
+        const int intY = ( intIndex / SIZE_3(output)                                   ) % SIZE_2(output);
+        const int intX = ( intIndex                                                    ) % SIZE_3(output);
+        float fltOutputX = (float) (intX) + VALUE_4(flow, intN, 0, intY, intX);
+        float fltOutputY = (float) (intY) + VALUE_4(flow, intN, 1, intY, intX);
+        int intNorthwestX = (int) (floor(fltOutputX));
+        int intNorthwestY = (int) (floor(fltOutputY));
+        int intNortheastX = intNorthwestX + 1;
+        int intNortheastY = intNorthwestY;
+        int intSouthwestX = intNorthwestX;
+        int intSouthwestY = intNorthwestY + 1;
+        int intSoutheastX = intNorthwestX + 1;
+        int intSoutheastY = intNorthwestY + 1;
+        float fltNorthwest = ((float) (intSoutheastX) - fltOutputX) * ((float) (intSoutheastY) - fltOutputY);
+        float fltNortheast = (fltOutputX - (float) (intSouthwestX)) * ((float) (intSouthwestY) - fltOutputY);
+        float fltSouthwest = ((float) (intNortheastX) - fltOutputX) * (fltOutputY - (float) (intNortheastY));
+        float fltSoutheast = (fltOutputX - (float) (intNorthwestX)) * (fltOutputY - (float) (intNorthwestY));
+        if ((intNorthwestX >= 0) & (intNorthwestX < SIZE_3(output)) & (intNorthwestY >= 0) & (intNorthwestY < SIZE_2(output))) {
+            atomicAdd(&output[OFFSET_4(output, intN, intC, intNorthwestY, intNorthwestX)], VALUE_4(input, intN, intC, intY, intX) * fltNorthwest);
+        }
+        if ((intNortheastX >= 0) & (intNortheastX < SIZE_3(output)) & (intNortheastY >= 0) & (intNortheastY < SIZE_2(output))) {
+            atomicAdd(&output[OFFSET_4(output, intN, intC, intNortheastY, intNortheastX)], VALUE_4(input, intN, intC, intY, intX) * fltNortheast);
+        }
+        if ((intSouthwestX >= 0) & (intSouthwestX < SIZE_3(output)) & (intSouthwestY >= 0) & (intSouthwestY < SIZE_2(output))) {
+            atomicAdd(&output[OFFSET_4(output, intN, intC, intSouthwestY, intSouthwestX)], VALUE_4(input, intN, intC, intY, intX) * fltSouthwest);
+        }
+        if ((intSoutheastX >= 0) & (intSoutheastX < SIZE_3(output)) & (intSoutheastY >= 0) & (intSoutheastY < SIZE_2(output))) {
+            atomicAdd(&output[OFFSET_4(output, intN, intC, intSoutheastY, intSoutheastX)], VALUE_4(input, intN, intC, intY, intX) * fltSoutheast);
+        }
+    } }
+'''
+kernel_Softsplat_updateGradInput = '''
+    extern "C" __global__ void kernel_Softsplat_updateGradInput(
+        const int n,
+        const float* input,
+        const float* flow,
+        const float* gradOutput,
+        float* gradInput,
+        float* gradFlow
+    ) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+        const int intN = ( intIndex / SIZE_3(gradInput) / SIZE_2(gradInput) / SIZE_1(gradInput) ) % SIZE_0(gradInput);
+        const int intC = ( intIndex / SIZE_3(gradInput) / SIZE_2(gradInput)                     ) % SIZE_1(gradInput);
+        const int intY = ( intIndex / SIZE_3(gradInput)                                         ) % SIZE_2(gradInput);
+        const int intX = ( intIndex                                                             ) % SIZE_3(gradInput);
+        float fltGradInput = 0.0;
+        float fltOutputX = (float) (intX) + VALUE_4(flow, intN, 0, intY, intX);
+        float fltOutputY = (float) (intY) + VALUE_4(flow, intN, 1, intY, intX);
+        int intNorthwestX = (int) (floor(fltOutputX));
+        int intNorthwestY = (int) (floor(fltOutputY));
+        int intNortheastX = intNorthwestX + 1;
+        int intNortheastY = intNorthwestY;
+        int intSouthwestX = intNorthwestX;
+        int intSouthwestY = intNorthwestY + 1;
+        int intSoutheastX = intNorthwestX + 1;
+        int intSoutheastY = intNorthwestY + 1;
+        float fltNorthwest = ((float) (intSoutheastX) - fltOutputX) * ((float) (intSoutheastY) - fltOutputY);
+        float fltNortheast = (fltOutputX - (float) (intSouthwestX)) * ((float) (intSouthwestY) - fltOutputY);
+        float fltSouthwest = ((float) (intNortheastX) - fltOutputX) * (fltOutputY - (float) (intNortheastY));
+        float fltSoutheast = (fltOutputX - (float) (intNorthwestX)) * (fltOutputY - (float) (intNorthwestY));
+        if ((intNorthwestX >= 0) & (intNorthwestX < SIZE_3(gradOutput)) & (intNorthwestY >= 0) & (intNorthwestY < SIZE_2(gradOutput))) {
+            fltGradInput += VALUE_4(gradOutput, intN, intC, intNorthwestY, intNorthwestX) * fltNorthwest;
+        }
+        if ((intNortheastX >= 0) & (intNortheastX < SIZE_3(gradOutput)) & (intNortheastY >= 0) & (intNortheastY < SIZE_2(gradOutput))) {
+            fltGradInput += VALUE_4(gradOutput, intN, intC, intNortheastY, intNortheastX) * fltNortheast;
+        }
+        if ((intSouthwestX >= 0) & (intSouthwestX < SIZE_3(gradOutput)) & (intSouthwestY >= 0) & (intSouthwestY < SIZE_2(gradOutput))) {
+            fltGradInput += VALUE_4(gradOutput, intN, intC, intSouthwestY, intSouthwestX) * fltSouthwest;
+        }
+        if ((intSoutheastX >= 0) & (intSoutheastX < SIZE_3(gradOutput)) & (intSoutheastY >= 0) & (intSoutheastY < SIZE_2(gradOutput))) {
+            fltGradInput += VALUE_4(gradOutput, intN, intC, intSoutheastY, intSoutheastX) * fltSoutheast;
+        }
+        gradInput[intIndex] = fltGradInput;
+    } }
+'''
+kernel_Softsplat_updateGradFlow = '''
+    extern "C" __global__ void kernel_Softsplat_updateGradFlow(
+        const int n,
+        const float* input,
+        const float* flow,
+        const float* gradOutput,
+        float* gradInput,
+        float* gradFlow
+    ) { for (int intIndex = (blockIdx.x * blockDim.x) + threadIdx.x; intIndex < n; intIndex += blockDim.x * gridDim.x) {
+        float fltGradFlow = 0.0;
+        const int intN = ( intIndex / SIZE_3(gradFlow) / SIZE_2(gradFlow) / SIZE_1(gradFlow) ) % SIZE_0(gradFlow);
+        const int intC = ( intIndex / SIZE_3(gradFlow) / SIZE_2(gradFlow)                    ) % SIZE_1(gradFlow);
+        const int intY = ( intIndex / SIZE_3(gradFlow)                                       ) % SIZE_2(gradFlow);
+        const int intX = ( intIndex                                                          ) % SIZE_3(gradFlow);
+        float fltOutputX = (float) (intX) + VALUE_4(flow, intN, 0, intY, intX);
+        float fltOutputY = (float) (intY) + VALUE_4(flow, intN, 1, intY, intX);
+        int intNorthwestX = (int) (floor(fltOutputX));
+        int intNorthwestY = (int) (floor(fltOutputY));
+        int intNortheastX = intNorthwestX + 1;
+        int intNortheastY = intNorthwestY;
+        int intSouthwestX = intNorthwestX;
+        int intSouthwestY = intNorthwestY + 1;
+        int intSoutheastX = intNorthwestX + 1;
+        int intSoutheastY = intNorthwestY + 1;
+        float fltNorthwest = 0.0;
+        float fltNortheast = 0.0;
+        float fltSouthwest = 0.0;
+        float fltSoutheast = 0.0;
+        if (intC == 0) {
+            fltNorthwest = ((float) (-1.0)) * ((float) (intSoutheastY) - fltOutputY);
+            fltNortheast = ((float) (+1.0)) * ((float) (intSouthwestY) - fltOutputY);
+            fltSouthwest = ((float) (-1.0)) * (fltOutputY - (float) (intNortheastY));
+            fltSoutheast = ((float) (+1.0)) * (fltOutputY - (float) (intNorthwestY));
+        } else if (intC == 1) {
+            fltNorthwest = ((float) (intSoutheastX) - fltOutputX) * ((float) (-1.0));
+            fltNortheast = (fltOutputX - (float) (intSouthwestX)) * ((float) (-1.0));
+            fltSouthwest = ((float) (intNortheastX) - fltOutputX) * ((float) (+1.0));
+            fltSoutheast = (fltOutputX - (float) (intNorthwestX)) * ((float) (+1.0));
+        }
+        for (int intChannel = 0; intChannel < SIZE_1(gradOutput); intChannel += 1) {
+            float fltInput = VALUE_4(input, intN, intChannel, intY, intX);
+            if ((intNorthwestX >= 0) & (intNorthwestX < SIZE_3(gradOutput)) & (intNorthwestY >= 0) & (intNorthwestY < SIZE_2(gradOutput))) {
+                fltGradFlow += fltInput * VALUE_4(gradOutput, intN, intChannel, intNorthwestY, intNorthwestX) * fltNorthwest;
+            }
+            if ((intNortheastX >= 0) & (intNortheastX < SIZE_3(gradOutput)) & (intNortheastY >= 0) & (intNortheastY < SIZE_2(gradOutput))) {
+                fltGradFlow += fltInput * VALUE_4(gradOutput, intN, intChannel, intNortheastY, intNortheastX) * fltNortheast;
+            }
+            if ((intSouthwestX >= 0) & (intSouthwestX < SIZE_3(gradOutput)) & (intSouthwestY >= 0) & (intSouthwestY < SIZE_2(gradOutput))) {
+                fltGradFlow += fltInput * VALUE_4(gradOutput, intN, intChannel, intSouthwestY, intSouthwestX) * fltSouthwest;
+            }
+            if ((intSoutheastX >= 0) & (intSoutheastX < SIZE_3(gradOutput)) & (intSoutheastY >= 0) & (intSoutheastY < SIZE_2(gradOutput))) {
+                fltGradFlow += fltInput * VALUE_4(gradOutput, intN, intChannel, intSoutheastY, intSoutheastX) * fltSoutheast;
+            }
+        }
+        gradFlow[intIndex] = fltGradFlow;
+    } }
+'''
+def cupy_kernel(strFunction, objVariables):
+    strKernel = globals()[strFunction]
+    while True:
+        objMatch = re.search('(SIZE_)([0-4])(\()([^\)]*)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArg = int(objMatch.group(2))
+        strTensor = objMatch.group(4)
+        intSizes = objVariables[strTensor].size()
+        strKernel = strKernel.replace(objMatch.group(), str(intSizes[intArg]))
+    # end
+    while True:
+        objMatch = re.search('(OFFSET_)([0-4])(\()([^\)]+)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArgs = int(objMatch.group(2))
+        strArgs = objMatch.group(4).split(',')
+        strTensor = strArgs[0]
+        intStrides = objVariables[strTensor].stride()
+        strIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg]) + ')' for intArg in range(intArgs) ]
+        strKernel = strKernel.replace(objMatch.group(0), '(' + str.join('+', strIndex) + ')')
+    # end
+    while True:
+        objMatch = re.search('(VALUE_)([0-4])(\()([^\)]+)(\))', strKernel)
+        if objMatch is None:
+            break
+        # end
+        intArgs = int(objMatch.group(2))
+        strArgs = objMatch.group(4).split(',')
+        strTensor = strArgs[0]
+        intStrides = objVariables[strTensor].stride()
+        strIndex = [ '((' + strArgs[intArg + 1].replace('{', '(').replace('}', ')').strip() + ')*' + str(intStrides[intArg]) + ')' for intArg in range(intArgs) ]
+        strKernel = strKernel.replace(objMatch.group(0), strTensor + '[' + str.join('+', strIndex) + ']')
+    # end
+    return strKernel
+# end
+class _FunctionSoftsplat(torch.autograd.Function):
+    @staticmethod
+    def forward(self, input, flow):
+        intSamples = input.shape[0]
+        intInputDepth, intInputHeight, intInputWidth = input.shape[1], input.shape[2], input.shape[3]
+        intFlowDepth, intFlowHeight, intFlowWidth = flow.shape[1], flow.shape[2], flow.shape[3]
+        assert(intFlowDepth == 2)
+        assert(intInputHeight == intFlowHeight)
+        assert(intInputWidth == intFlowWidth)
+        input = input.contiguous(); assert(input.is_cuda == True)
+        flow = flow.contiguous(); assert(flow.is_cuda == True)
+        output = input.new_zeros([ intSamples, intInputDepth, intInputHeight, intInputWidth ])
+        if input.is_cuda == True:
+            n = output.nelement()
+            cupy_launch('kernel_Softsplat_updateOutput', cupy_kernel('kernel_Softsplat_updateOutput', {
+                'input': input,
+                'flow': flow,
+                'output': output
+            }))(
+                grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
+                block=tuple([ 512, 1, 1 ]),
+                args=[ cupy.int32(n), input.data_ptr(), flow.data_ptr(), output.data_ptr() ]
+            )
+        elif input.is_cuda == False:
+            raise NotImplementedError()
+        # end
+        self.save_for_backward(input, flow)
+        return output
+    # end
+    @staticmethod
+    def backward(self, gradOutput):
+        input, flow = self.saved_tensors
+        intSamples = input.shape[0]
+        intInputDepth, intInputHeight, intInputWidth = input.shape[1], input.shape[2], input.shape[3]
+        intFlowDepth, intFlowHeight, intFlowWidth = flow.shape[1], flow.shape[2], flow.shape[3]
+        assert(intFlowDepth == 2)
+        assert(intInputHeight == intFlowHeight)
+        assert(intInputWidth == intFlowWidth)
+        gradOutput = gradOutput.contiguous(); assert(gradOutput.is_cuda == True)
+        gradInput = input.new_zeros([ intSamples, intInputDepth, intInputHeight, intInputWidth ]) if self.needs_input_grad[0] == True else None
+        gradFlow = input.new_zeros([ intSamples, intFlowDepth, intFlowHeight, intFlowWidth ]) if self.needs_input_grad[1] == True else None
+        if input.is_cuda == True:
+            if gradInput is not None:
+                n = gradInput.nelement()
+                cupy_launch('kernel_Softsplat_updateGradInput', cupy_kernel('kernel_Softsplat_updateGradInput', {
+                    'input': input,
+                    'flow': flow,
+                    'gradOutput': gradOutput,
+                    'gradInput': gradInput,
+                    'gradFlow': gradFlow
+                }))(
+                    grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
+                    block=tuple([ 512, 1, 1 ]),
+                    args=[ cupy.int32(n), input.data_ptr(), flow.data_ptr(), gradOutput.data_ptr(), gradInput.data_ptr(), None ]
+                )
+            # end
+            if gradFlow is not None:
+                n = gradFlow.nelement()
+                cupy_launch('kernel_Softsplat_updateGradFlow', cupy_kernel('kernel_Softsplat_updateGradFlow', {
+                    'input': input,
+                    'flow': flow,
+                    'gradOutput': gradOutput,
+                    'gradInput': gradInput,
+                    'gradFlow': gradFlow
+                }))(
+                    grid=tuple([ int((n + 512 - 1) / 512), 1, 1 ]),
+                    block=tuple([ 512, 1, 1 ]),
+                    args=[ cupy.int32(n), input.data_ptr(), flow.data_ptr(), gradOutput.data_ptr(), None, gradFlow.data_ptr() ]
+                )
+            # end
+        elif input.is_cuda == False:
+            raise NotImplementedError()
+        # end
+        return gradInput, gradFlow
+    # end
+# end
+def FunctionSoftsplat(tenInput, tenFlow, tenMetric, strType):
+    assert(tenMetric is None or tenMetric.shape[1] == 1)
+    assert(strType in ['summation', 'average', 'linear', 'softmax'])
+    if strType == 'average':
+        tenInput = torch.cat([ tenInput, tenInput.new_ones(tenInput.shape[0], 1, tenInput.shape[2], tenInput.shape[3]) ], 1)
+    elif strType == 'linear':
+        tenInput = torch.cat([ tenInput * tenMetric, tenMetric ], 1)
+    elif strType == 'softmax':
+        tenInput = torch.cat([ tenInput * tenMetric.exp(), tenMetric.exp() ], 1)
+    # end
+    tenOutput = _FunctionSoftsplat.apply(tenInput, tenFlow)
+    if strType != 'summation':
+        tenNormalize = tenOutput[:, -1:, :, :]
+        tenNormalize[tenNormalize == 0.0] = 1.0
+        tenOutput = tenOutput[:, :-1, :, :] / tenNormalize
+    # end
+    return tenOutput
+# end
+class ModuleSoftsplat(torch.nn.Module):
+    def __init__(self, strType):
+        super().__init__()
+        self.strType = strType
+    # end
+    def forward(self, tenInput, tenFlow, tenMetric):
+        return FunctionSoftsplat(tenInput, tenFlow, tenMetric, self.strType)
+    # end
+# end

modules/feature_extactor.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import torch
+import torch.nn as nn
+import torchvision.models as models
+from modules.basic_layers import GroupNorm
+class Extractor(nn.Module):
+    def __init__(self, channels: list[int], num_groups: int = 32, use_residual: bool = True):
+        super().__init__()
+        self.use_residual = use_residual
+        self.layers = nn.ModuleList([
+            nn.Sequential(
+                nn.Conv2d(in_channels=channels[i], out_channels=channels[i + 1], kernel_size=3, stride=2, padding=1),
+                GroupNorm(channels[i + 1], num_groups = num_groups),
+                nn.SiLU(),
+                nn.Conv2d(in_channels=channels[i + 1], out_channels=channels[i + 1], kernel_size=3, stride=1, padding=1),
+                GroupNorm(channels[i + 1], num_groups = num_groups),
+                nn.SiLU()
+            ) for i in range(len(channels) - 1)
+        ])
+        if self.use_residual:
+            self.residual = nn.ModuleList([
+                nn.Sequential(
+                    nn.Conv2d(in_channels=channels[i], out_channels=channels[i + 1], kernel_size=3, stride=2, padding=1),
+                ) for i in range(len(channels) - 1)
+            ])
+    def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
+        features = []
+        for residual, layer in zip(self.residual, self.layers):
+            if self.use_residual:
+                x = layer(x) + residual(x)
+            else:
+                x = layer(x)
+            features.append(x)
+        return features
+class ResNetExtractor(nn.Module):
+    def __init__(self, pretrained: bool = True, layers_to_extract: list[str] = ["layer1", "layer2", "layer3"]):
+        super(ResNetExtractor, self).__init__()
+        resnet = models.resnet18(pretrained=pretrained)
+        self.initial_layers = nn.Sequential(
+            resnet.conv1,
+            resnet.bn1,
+            resnet.relu
+        )
+        self.layers = nn.ModuleDict({
+            "layer1": resnet.layer1,
+            "layer2": resnet.layer2,
+            "layer3": resnet.layer3,
+        })
+        self.layers_to_extract = layers_to_extract
+    def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
+        features = []
+        x = self.initial_layers(x)
+        for name, layer in self.layers.items():
+            x = layer(x)
+            if name in self.layers_to_extract:
+                features.append(x)
+        return features
+class VGGExtractor(nn.Module):
+    def __init__(self, layers_to_extract: list[int] = [8, 15, 22, 29]):
+        super(VGGExtractor, self).__init__()
+        self.vgg = models.vgg16(pretrained=True).features
+        self.layers_to_extract = layers_to_extract
+        self.selected_layers = [self.vgg[i] for i in layers_to_extract]
+    def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
+        features = []
+        for i, layer in enumerate(self.vgg):
+            x = layer(x)
+            if i in self.layers_to_extract:
+                features.append(x)
+        return features

modules/flow_models/flow_models.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import torch
+import torch.nn as nn
+from torch.nn.functional import interpolate
+from modules.cupy_module import correlation
+from modules.half_warper import HalfWarper
+from modules.feature_extactor import Extractor
+from modules.flow_models.raft.rfr_new import RAFT
+class Decoder(nn.Module):
+    def __init__(self, in_channels: int):
+        super().__init__()
+        self.syntesis = nn.Sequential(
+            nn.Conv2d(in_channels=in_channels, out_channels=128, kernel_size=3, stride=1, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=128, out_channels=96, kernel_size=3, stride=1, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=96, out_channels=64, kernel_size=3, stride=1, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
+            nn.SiLU(),
+            nn.Conv2d(in_channels=32, out_channels=2, kernel_size=3, stride=1, padding=1)
+        )
+    def forward(self, img1: torch.Tensor, img2: torch.Tensor, residual: torch.Tensor | None) -> torch.Tensor:
+        width = img1.shape[3] and img2.shape[3]
+        height = img1.shape[2] and img2.shape[2]
+        if residual is None:
+            corr = correlation.FunctionCorrelation(tenOne=img1, tenTwo=img2)
+            main = torch.cat([img1, corr], dim=1)
+        else:
+            flow = interpolate(input=residual,
+                               size=(height, width),
+                               mode='bilinear',
+                               align_corners=False) / \
+                                float(residual.shape[3]) * float(width)
+            backwarp_img = HalfWarper.backward_wrapping(img=img2, flow=flow)
+            corr = correlation.FunctionCorrelation(tenOne=img1, tenTwo=backwarp_img)
+            main = torch.cat([img1, corr, flow], dim=1)
+        return self.syntesis(main)
+class PWCFineFlow(nn.Module):
+    def __init__(self, pretrained_path: str | None = None):
+        super().__init__()
+        self.feature_extractor = Extractor([3, 16, 32, 64, 96, 128, 192], num_groups=16)
+        self.decoders = nn.ModuleList([
+            Decoder(16 + 81 + 2),
+            Decoder(32 + 81 + 2),
+            Decoder(64 + 81 + 2),
+            Decoder(96 + 81 + 2),
+            Decoder(128 + 81 + 2),
+            Decoder(192 + 81)
+        ])
+        if pretrained_path is not None:
+            self.load_state_dict(torch.load(pretrained_path))
+    def forward(self, img1: torch.Tensor, img2: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        width = img1.shape[3] and img2.shape[3]
+        height = img1.shape[2] and img2.shape[2]
+        feats1 = self.feature_extractor(img1)
+        feats2 = self.feature_extractor(img2)
+        forward = None
+        backward = None
+        for i in reversed(range(len(feats1))):
+            forward = self.decoders[i](feats1[i], feats2[i], forward)
+            backward = self.decoders[i](feats2[i], feats1[i], backward)
+        forward = interpolate(input=forward,
+                              size=(height, width),
+                              mode='bilinear',
+                              align_corners=False) * \
+                                 (float(width) / float(forward.shape[3]))
+        backward = interpolate(input=backward,
+                                 size=(height, width),
+                                 mode='bilinear',
+                                 align_corners=False) * \
+                                  (float(width) / float(backward.shape[3]))
+        return forward, backward
+class RAFTFineFlow(nn.Module):
+    def __init__(self, pretrained_path: str | None = None):
+        super().__init__()
+        self.raft = RAFT(pretrained_path)
+    def forward(self, img1: torch.Tensor, img2: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        forward = self.raft(img1, img2)
+        backward = self.raft(img2, img1)
+        return forward, backward

modules/flow_models/raft/LICENSE ADDED Viewed

	@@ -0,0 +1,29 @@

+BSD 3-Clause License
+Copyright (c) 2020, princeton-vl
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

modules/flow_models/raft/corr.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+import torch.nn.functional as F
+from .utils import bilinear_sampler, coords_grid
+class CorrBlock:
+    def __init__(self, fmap1, fmap2, num_levels=4, radius=4):
+        self.num_levels = num_levels
+        self.radius = radius
+        self.corr_pyramid = []
+        # all pairs correlation
+        corr = CorrBlock.corr(fmap1, fmap2)
+        batch, h1, w1, dim, h2, w2 = corr.shape
+        corr = corr.reshape(batch*h1*w1, dim, h2, w2)
+        self.corr_pyramid.append(corr)
+        for i in range(self.num_levels-1):
+            corr = F.avg_pool2d(corr, 2, stride=2)
+            self.corr_pyramid.append(corr)
+    def __call__(self, coords):
+        r = self.radius
+        coords = coords.permute(0, 2, 3, 1)
+        batch, h1, w1, _ = coords.shape
+        out_pyramid = []
+        for i in range(self.num_levels):
+            corr = self.corr_pyramid[i]
+            dx = torch.linspace(-r, r, 2*r+1)
+            dy = torch.linspace(-r, r, 2*r+1)
+            delta = torch.stack(torch.meshgrid(dy, dx), dim=-1).to(coords.device)
+            centroid_lvl = coords.reshape(batch*h1*w1, 1, 1, 2) / 2**i
+            delta_lvl = delta.view(1, 2*r+1, 2*r+1, 2)
+            coords_lvl = centroid_lvl + delta_lvl
+            corr = bilinear_sampler(corr, coords_lvl)
+            corr = corr.view(batch, h1, w1, -1)
+            out_pyramid.append(corr)
+        out = torch.cat(out_pyramid, dim=-1)
+        return out.permute(0, 3, 1, 2).contiguous().float()
+    @staticmethod
+    def corr(fmap1, fmap2):
+        batch, dim, ht, wd = fmap1.shape
+        fmap1 = fmap1.view(batch, dim, ht*wd)
+        fmap2 = fmap2.view(batch, dim, ht*wd)
+        corr = torch.matmul(fmap1.transpose(1,2), fmap2)
+        corr = corr.view(batch, ht, wd, 1, ht, wd)
+        return corr  / torch.sqrt(torch.tensor(dim).float())

modules/flow_models/raft/extractor.py ADDED Viewed

	@@ -0,0 +1,342 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class ResidualBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, stride=stride)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+        num_groups = planes // 8
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes)
+            self.norm2 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.BatchNorm2d(planes)
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes)
+            self.norm2 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm3 = nn.InstanceNorm2d(planes)
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            if not stride == 1:
+                self.norm3 = nn.Sequential()
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm3)
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x+y)
+class BottleneckBlock(nn.Module):
+    def __init__(self, in_planes, planes, norm_fn='group', stride=1):
+        super(BottleneckBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_planes, planes//4, kernel_size=1, padding=0)
+        self.conv2 = nn.Conv2d(planes//4, planes//4, kernel_size=3, padding=1, stride=stride)
+        self.conv3 = nn.Conv2d(planes//4, planes, kernel_size=1, padding=0)
+        self.relu = nn.ReLU(inplace=True)
+        num_groups = planes // 8
+        if norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm2 = nn.GroupNorm(num_groups=num_groups, num_channels=planes//4)
+            self.norm3 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+            if not stride == 1:
+                self.norm4 = nn.GroupNorm(num_groups=num_groups, num_channels=planes)
+        elif norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(planes//4)
+            self.norm2 = nn.BatchNorm2d(planes//4)
+            self.norm3 = nn.BatchNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.BatchNorm2d(planes)
+        elif norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(planes//4)
+            self.norm2 = nn.InstanceNorm2d(planes//4)
+            self.norm3 = nn.InstanceNorm2d(planes)
+            if not stride == 1:
+                self.norm4 = nn.InstanceNorm2d(planes)
+        elif norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+            self.norm2 = nn.Sequential()
+            self.norm3 = nn.Sequential()
+            if not stride == 1:
+                self.norm4 = nn.Sequential()
+        if stride == 1:
+            self.downsample = None
+        else:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride), self.norm4)
+    def forward(self, x):
+        y = x
+        y = self.relu(self.norm1(self.conv1(y)))
+        y = self.relu(self.norm2(self.conv2(y)))
+        y = self.relu(self.norm3(self.conv3(y)))
+        if self.downsample is not None:
+            x = self.downsample(x)
+        return self.relu(x+y)
+class BasicEncoder(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
+        super(BasicEncoder, self).__init__()
+        self.norm_fn = norm_fn
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64)
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(64)
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(64)
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.in_planes = 64
+        self.layer1 = self._make_layer(64,  stride=1)
+        self.layer2 = self._make_layer(96, stride=2)
+        self.layer3 = self._make_layer(128, stride=2)
+        # output convolution
+        self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1)
+        self.dropout = None
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def _make_layer(self, dim, stride=1):
+        layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        # if input is list, combine batch dimension
+        is_list = isinstance(x, tuple) or isinstance(x, list)
+        if is_list:
+            batch_dim = x[0].shape[0]
+            x = torch.cat(x, dim=0)
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.conv2(x)
+        if self.training and self.dropout is not None:
+            x = self.dropout(x)
+        if is_list:
+            x = torch.split(x, [batch_dim, batch_dim], dim=0)
+        return x
+class BasicEncoder1(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
+        super(BasicEncoder1, self).__init__()
+        self.norm_fn = norm_fn
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=64)
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(64)
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(64)
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+        self.conv1 = nn.Conv2d(2, 64, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.in_planes = 64
+        self.layer1 = self._make_layer(64,  stride=1)
+        self.layer2 = self._make_layer(96, stride=2)
+        self.layer3 = self._make_layer(128, stride=2)
+        # output convolution
+        self.conv2 = nn.Conv2d(128, output_dim, kernel_size=1)
+        self.dropout = None
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def _make_layer(self, dim, stride=1):
+        layer1 = ResidualBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = ResidualBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        # if input is list, combine batch dimension
+        is_list = isinstance(x, tuple) or isinstance(x, list)
+        if is_list:
+            batch_dim = x[0].shape[0]
+            x = torch.cat(x, dim=0)
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.conv2(x)
+        if self.training and self.dropout is not None:
+            x = self.dropout(x)
+        if is_list:
+            x = torch.split(x, [batch_dim, batch_dim], dim=0)
+        return x
+class SmallEncoder(nn.Module):
+    def __init__(self, output_dim=128, norm_fn='batch', dropout=0.0):
+        super(SmallEncoder, self).__init__()
+        self.norm_fn = norm_fn
+        if self.norm_fn == 'group':
+            self.norm1 = nn.GroupNorm(num_groups=8, num_channels=32)
+        elif self.norm_fn == 'batch':
+            self.norm1 = nn.BatchNorm2d(32)
+        elif self.norm_fn == 'instance':
+            self.norm1 = nn.InstanceNorm2d(32)
+        elif self.norm_fn == 'none':
+            self.norm1 = nn.Sequential()
+        self.conv1 = nn.Conv2d(3, 32, kernel_size=7, stride=2, padding=3)
+        self.relu1 = nn.ReLU(inplace=True)
+        self.in_planes = 32
+        self.layer1 = self._make_layer(32,  stride=1)
+        self.layer2 = self._make_layer(64, stride=2)
+        self.layer3 = self._make_layer(96, stride=2)
+        self.dropout = None
+        if dropout > 0:
+            self.dropout = nn.Dropout2d(p=dropout)
+        self.conv2 = nn.Conv2d(96, output_dim, kernel_size=1)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d, nn.GroupNorm)):
+                if m.weight is not None:
+                    nn.init.constant_(m.weight, 1)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+    def _make_layer(self, dim, stride=1):
+        layer1 = BottleneckBlock(self.in_planes, dim, self.norm_fn, stride=stride)
+        layer2 = BottleneckBlock(dim, dim, self.norm_fn, stride=1)
+        layers = (layer1, layer2)
+        self.in_planes = dim
+        return nn.Sequential(*layers)
+    def forward(self, x):
+        # if input is list, combine batch dimension
+        is_list = isinstance(x, tuple) or isinstance(x, list)
+        if is_list:
+            batch_dim = x[0].shape[0]
+            x = torch.cat(x, dim=0)
+        x = self.conv1(x)
+        x = self.norm1(x)
+        x = self.relu1(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.conv2(x)
+        if self.training and self.dropout is not None:
+            x = self.dropout(x)
+        if is_list:
+            x = torch.split(x, [batch_dim, batch_dim], dim=0)
+        return x

modules/flow_models/raft/rfr_new.py ADDED Viewed

	@@ -0,0 +1,235 @@

+##################################################
+#  RFR is implemented based on RAFT optical flow #
+##################################################
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from argparse import Namespace
+import numpy as np
+from .update import BasicUpdateBlock, SmallUpdateBlock
+from .extractor import BasicEncoder, SmallEncoder
+from .corr import CorrBlock
+from .utils import bilinear_sampler, coords_grid, upflow8
+try:
+    autocast = torch.amp.autocast
+except:
+    # dummy autocast for PyTorch < 1.6
+    class autocast:
+        def __init__(self, enabled):
+            pass
+        def __enter__(self):
+            pass
+        def __exit__(self, *args):
+            pass
+def backwarp(img, flow):
+    _, _, H, W = img.size()
+    u = flow[:, 0, :, :]
+    v = flow[:, 1, :, :]
+    gridX, gridY = np.meshgrid(np.arange(W), np.arange(H))
+    gridX = torch.tensor(gridX, requires_grad=False,).cuda()
+    gridY = torch.tensor(gridY, requires_grad=False,).cuda()
+    x = gridX.unsqueeze(0).expand_as(u).float() + u
+    y = gridY.unsqueeze(0).expand_as(v).float() + v
+    # range -1 to 1
+    x = 2*(x/(W-1) - 0.5)
+    y = 2*(y/(H-1) - 0.5)
+    # stacking X and Y
+    grid = torch.stack((x,y), dim=3)
+    # Sample pixels using bilinear interpolation.
+    imgOut = torch.nn.functional.grid_sample(img, grid, align_corners=True)
+    return imgOut
+class ErrorAttention(nn.Module):
+    """A three-layer network for predicting mask"""
+    def __init__(self, input, output):
+        super(ErrorAttention, self).__init__()
+        self.conv1 = nn.Conv2d(input, 32, 5, padding=2)
+        self.conv2 = nn.Conv2d(32, 32, 3, padding=1)
+        self.conv3 = nn.Conv2d(38, output, 3, padding=1)
+        self.prelu1 = nn.PReLU()
+        self.prelu2 = nn.PReLU()
+    def forward(self, x1):
+        x = self.prelu1(self.conv1(x1))
+        x = self.prelu2(torch.cat([self.conv2(x), x1], dim=1))
+        x = self.conv3(x)
+        return x
+class RFR(nn.Module):
+    def __init__(self, args):
+        super(RFR, self).__init__()
+        self.attention2 = ErrorAttention(6, 1)
+        self.hidden_dim = hdim = 128
+        self.context_dim = cdim = 128
+        args.corr_levels = 4
+        args.corr_radius = 4
+        args.dropout = 0
+        self.args = args
+        # feature network, context network, and update block
+        self.fnet = BasicEncoder(output_dim=256, norm_fn='none', dropout=args.dropout)
+        # self.cnet = BasicEncoder(output_dim=hdim+cdim, norm_fn='none', dropout=args.dropout)
+        self.update_block = BasicUpdateBlock(self.args, hidden_dim=hdim)
+    def freeze_bn(self):
+        for m in self.modules():
+            if isinstance(m, nn.BatchNorm2d):
+                m.eval()
+    def initialize_flow(self, img):
+        """ Flow is represented as difference between two coordinate grids flow = coords1 - coords0"""
+        N, C, H, W = img.shape
+        coords0 = coords_grid(N, H//8, W//8).to(img.device)
+        coords1 = coords_grid(N, H//8, W//8).to(img.device)
+        # optical flow computed as difference: flow = coords1 - coords0
+        return coords0, coords1
+    def upsample_flow(self, flow, mask):
+        """ Upsample flow field [H/8, W/8, 2] -> [H, W, 2] using convex combination """
+        N, _, H, W = flow.shape
+        mask = mask.view(N, 1, 9, 8, 8, H, W)
+        mask = torch.softmax(mask, dim=2)
+        up_flow = F.unfold(8 * flow, [3,3], padding=1)
+        up_flow = up_flow.view(N, 2, 9, 1, 1, H, W)
+        up_flow = torch.sum(mask * up_flow, dim=2)
+        up_flow = up_flow.permute(0, 1, 4, 2, 5, 3)
+        return up_flow.reshape(N, 2, 8*H, 8*W)
+    def forward(self, image1, image2, iters=12, flow_init=None, upsample=True, test_mode=False):
+        H, W = image1.size()[2:4]
+        H8 = H // 8 * 8
+        W8 = W // 8 * 8
+        if flow_init is not None:
+            flow_init_resize = F.interpolate(flow_init, size=(H8//8, W8//8), mode='nearest')
+            flow_init_resize[:, :1] = flow_init_resize[:, :1].clone() * (W8 // 8 *1.0) / flow_init.size()[3]
+            flow_init_resize[:, 1:] = flow_init_resize[:, 1:].clone() * (H8 // 8*1.0) / flow_init.size()[2]
+            if not hasattr(self.args, 'not_use_rfr_mask') or ( hasattr(self.args, 'not_use_rfr_mask') and (not self.args.not_use_rfr_mask)):
+                im18 = F.interpolate(image1, size=(H8//8, W8//8), mode='bilinear')
+                im28 = F.interpolate(image2, size=(H8//8, W8//8), mode='bilinear')
+                warp21 = backwarp(im28, flow_init_resize)
+                error21 = torch.sum(torch.abs(warp21 - im18), dim=1, keepdim=True)
+                # print('errormin', error21.min(), error21.max())
+                f12init = torch.exp(- self.attention2(torch.cat([im18, error21, flow_init_resize], dim=1)) ** 2) * flow_init_resize
+        else:
+            flow_init_resize = None
+            flow_init = torch.zeros(image1.size()[0], 2, image1.size()[2]//8, image1.size()[3]//8).cuda()
+            error21 = torch.zeros(image1.size()[0], 1, image1.size()[2]//8, image1.size()[3]//8).cuda()
+            f12_init = flow_init
+            # print('None inital flow!')
+        image1 = F.interpolate(image1, size=(H8, W8), mode='bilinear')
+        image2 = F.interpolate(image2, size=(H8, W8), mode='bilinear')
+        f12s, f12, f12_init = self.forward_pred(image1, image2, iters, flow_init_resize, upsample, test_mode)
+        if (hasattr(self.args, 'requires_sq_flow') and self.args.requires_sq_flow):
+            for ii in range(len(f12s)):
+                f12s[ii] = F.interpolate(f12s[ii], size=(H, W), mode='bilinear')
+                f12s[ii][:, :1] = f12s[ii][:, :1].clone() / (1.0*W8) * W
+                f12s[ii][:, 1:] = f12s[ii][:, 1:].clone() / (1.0*H8) * H
+            if self.training:
+                return f12s
+            else:
+                return [f12s[-1]], f12_init
+        else:
+            f12[:, :1] = f12[:, :1].clone() / (1.0*W8) * W
+            f12[:, 1:] = f12[:, 1:].clone() / (1.0*H8) * H
+            f12 = F.interpolate(f12, size=(H, W), mode='bilinear')
+            # print('wo!!')
+            return f12, f12_init, error21,
+    def forward_pred(self, image1, image2, iters=12, flow_init=None, upsample=True, test_mode=False):
+        """ Estimate optical flow between pair of frames """
+        image1 = image1.contiguous()
+        image2 = image2.contiguous()
+        hdim = self.hidden_dim
+        cdim = self.context_dim
+        # run the feature network
+        with autocast("cuda", enabled=self.args.mixed_precision):
+            fmap1, fmap2 = self.fnet([image1, image2])
+        fmap1 = fmap1.float()
+        fmap2 = fmap2.float()
+        corr_fn = CorrBlock(fmap1, fmap2, radius=self.args.corr_radius)
+        # run the context network
+        with autocast("cuda", enabled=self.args.mixed_precision):
+            cnet = self.fnet(image1)
+            net, inp = torch.split(cnet, [hdim, cdim], dim=1)
+            net = torch.tanh(net)
+            inp = torch.relu(inp)
+        coords0, coords1 = self.initialize_flow(image1)
+        if flow_init is not None:
+            coords1 = coords1 + flow_init
+        flow_predictions = []
+        for itr in range(iters):
+            coords1 = coords1.detach()
+            if itr == 0:
+                if flow_init is not None:
+                    coords1 = coords1 + flow_init
+            corr = corr_fn(coords1) # index correlation volume
+            flow = coords1 - coords0
+            with autocast("cuda", enabled=self.args.mixed_precision):
+                net, up_mask, delta_flow = self.update_block(net, inp, corr, flow)
+            # F(t+1) = F(t) + \Delta(t)
+            coords1 = coords1 + delta_flow
+            # upsample predictions
+            if up_mask is None:
+                flow_up = upflow8(coords1 - coords0)
+            else:
+                flow_up = self.upsample_flow(coords1 - coords0, up_mask)
+            flow_predictions.append(flow_up)
+        return flow_predictions, flow_up, flow_init
+class RAFT(nn.Module):
+    def __init__(self, path='./_pretrain_models/anime_interp_full.ckpt'):
+        super().__init__()
+        self.raft = RFR(Namespace(
+            small=False,
+            mixed_precision=False,
+        ))
+        if path is not None:
+            sd = torch.load(path)['model_state_dict']
+            self.raft.load_state_dict({
+                k[len('module.flownet.'):]: v
+                for k,v in sd.items()
+                if k.startswith('module.flownet.')
+            }, strict=False)
+        return
+    def forward(self, img0, img1, flow0=None, iters=12, return_more=False):
+        if flow0 is not None:
+            flow0 = flow0.flip(dims=(1,))
+        out = self.raft(img0, img1, iters=iters, flow_init=flow0)
+        return out[0].flip(dims=(1,))

modules/flow_models/raft/update.py ADDED Viewed

	@@ -0,0 +1,139 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class FlowHead(nn.Module):
+    def __init__(self, input_dim=128, hidden_dim=256):
+        super(FlowHead, self).__init__()
+        self.conv1 = nn.Conv2d(input_dim, hidden_dim, 3, padding=1)
+        self.conv2 = nn.Conv2d(hidden_dim, 2, 3, padding=1)
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.conv2(self.relu(self.conv1(x)))
+class ConvGRU(nn.Module):
+    def __init__(self, hidden_dim=128, input_dim=192+128):
+        super(ConvGRU, self).__init__()
+        self.convz = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+        self.convr = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+        self.convq = nn.Conv2d(hidden_dim+input_dim, hidden_dim, 3, padding=1)
+    def forward(self, h, x):
+        hx = torch.cat([h, x], dim=1)
+        z = torch.sigmoid(self.convz(hx))
+        r = torch.sigmoid(self.convr(hx))
+        q = torch.tanh(self.convq(torch.cat([r*h, x], dim=1)))
+        h = (1-z) * h + z * q
+        return h
+class SepConvGRU(nn.Module):
+    def __init__(self, hidden_dim=128, input_dim=192+128):
+        super(SepConvGRU, self).__init__()
+        self.convz1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+        self.convr1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+        self.convq1 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (1,5), padding=(0,2))
+        self.convz2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+        self.convr2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+        self.convq2 = nn.Conv2d(hidden_dim+input_dim, hidden_dim, (5,1), padding=(2,0))
+    def forward(self, h, x):
+        # horizontal
+        hx = torch.cat([h, x], dim=1)
+        z = torch.sigmoid(self.convz1(hx))
+        r = torch.sigmoid(self.convr1(hx))
+        q = torch.tanh(self.convq1(torch.cat([r*h, x], dim=1)))
+        h = (1-z) * h + z * q
+        # vertical
+        hx = torch.cat([h, x], dim=1)
+        z = torch.sigmoid(self.convz2(hx))
+        r = torch.sigmoid(self.convr2(hx))
+        q = torch.tanh(self.convq2(torch.cat([r*h, x], dim=1)))
+        h = (1-z) * h + z * q
+        return h
+class SmallMotionEncoder(nn.Module):
+    def __init__(self, args):
+        super(SmallMotionEncoder, self).__init__()
+        cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
+        self.convc1 = nn.Conv2d(cor_planes, 96, 1, padding=0)
+        self.convf1 = nn.Conv2d(2, 64, 7, padding=3)
+        self.convf2 = nn.Conv2d(64, 32, 3, padding=1)
+        self.conv = nn.Conv2d(128, 80, 3, padding=1)
+    def forward(self, flow, corr):
+        cor = F.relu(self.convc1(corr))
+        flo = F.relu(self.convf1(flow))
+        flo = F.relu(self.convf2(flo))
+        cor_flo = torch.cat([cor, flo], dim=1)
+        out = F.relu(self.conv(cor_flo))
+        return torch.cat([out, flow], dim=1)
+class BasicMotionEncoder(nn.Module):
+    def __init__(self, args):
+        super(BasicMotionEncoder, self).__init__()
+        cor_planes = args.corr_levels * (2*args.corr_radius + 1)**2
+        self.convc1 = nn.Conv2d(cor_planes, 256, 1, padding=0)
+        self.convc2 = nn.Conv2d(256, 192, 3, padding=1)
+        self.convf1 = nn.Conv2d(2, 128, 7, padding=3)
+        self.convf2 = nn.Conv2d(128, 64, 3, padding=1)
+        self.conv = nn.Conv2d(64+192, 128-2, 3, padding=1)
+    def forward(self, flow, corr):
+        cor = F.relu(self.convc1(corr))
+        cor = F.relu(self.convc2(cor))
+        flo = F.relu(self.convf1(flow))
+        flo = F.relu(self.convf2(flo))
+        cor_flo = torch.cat([cor, flo], dim=1)
+        out = F.relu(self.conv(cor_flo))
+        return torch.cat([out, flow], dim=1)
+class SmallUpdateBlock(nn.Module):
+    def __init__(self, args, hidden_dim=96):
+        super(SmallUpdateBlock, self).__init__()
+        self.encoder = SmallMotionEncoder(args)
+        self.gru = ConvGRU(hidden_dim=hidden_dim, input_dim=82+64)
+        self.flow_head = FlowHead(hidden_dim, hidden_dim=128)
+    def forward(self, net, inp, corr, flow):
+        motion_features = self.encoder(flow, corr)
+        inp = torch.cat([inp, motion_features], dim=1)
+        net = self.gru(net, inp)
+        delta_flow = self.flow_head(net)
+        return net, None, delta_flow
+class BasicUpdateBlock(nn.Module):
+    def __init__(self, args, hidden_dim=128, input_dim=128):
+        super(BasicUpdateBlock, self).__init__()
+        self.args = args
+        self.encoder = BasicMotionEncoder(args)
+        self.gru = SepConvGRU(hidden_dim=hidden_dim, input_dim=128+hidden_dim)
+        self.flow_head = FlowHead(hidden_dim, hidden_dim=256)
+        self.mask = nn.Sequential(
+            nn.Conv2d(128, 256, 3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 64*9, 1, padding=0))
+    def forward(self, net, inp, corr, flow, upsample=True):
+        motion_features = self.encoder(flow, corr)
+        inp = torch.cat([inp, motion_features], dim=1)
+        net = self.gru(net, inp)
+        delta_flow = self.flow_head(net)
+        # scale mask to balence gradients
+        mask = .25 * self.mask(net)
+        return net, mask, delta_flow

modules/flow_models/raft/utils.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import torch
+import torch.nn.functional as F
+import numpy as np
+from scipy import interpolate
+class InputPadder:
+    """ Pads images such that dimensions are divisible by 8 """
+    def __init__(self, dims):
+        self.ht, self.wd = dims[-2:]
+        pad_ht = (((self.ht // 8) + 1) * 8 - self.ht) % 8
+        pad_wd = (((self.wd // 8) + 1) * 8 - self.wd) % 8
+        self._pad = [pad_wd//2, pad_wd - pad_wd//2, 0, pad_ht]
+    def pad(self, *inputs):
+        return [F.pad(x, self._pad, mode='replicate') for x in inputs]
+    def unpad(self,x):
+        ht, wd = x.shape[-2:]
+        c = [self._pad[2], ht-self._pad[3], self._pad[0], wd-self._pad[1]]
+        return x[..., c[0]:c[1], c[2]:c[3]]
+def forward_interpolate(flow):
+    flow = flow.detach().cpu().numpy()
+    dx, dy = flow[0], flow[1]
+    ht, wd = dx.shape
+    x0, y0 = np.meshgrid(np.arange(wd), np.arange(ht))
+    x1 = x0 + dx
+    y1 = y0 + dy
+    x1 = x1.reshape(-1)
+    y1 = y1.reshape(-1)
+    dx = dx.reshape(-1)
+    dy = dy.reshape(-1)
+    valid = (x1 > 0) & (x1 < wd) & (y1 > 0) & (y1 < ht)
+    x1 = x1[valid]
+    y1 = y1[valid]
+    dx = dx[valid]
+    dy = dy[valid]
+    flow_x = interpolate.griddata(
+        (x1, y1), dx, (x0, y0), method='cubic', fill_value=0)
+    flow_y = interpolate.griddata(
+        (x1, y1), dy, (x0, y0), method='cubic', fill_value=0)
+    flow = np.stack([flow_x, flow_y], axis=0)
+    return torch.from_numpy(flow).float()
+def bilinear_sampler(img, coords, mode='bilinear', mask=False):
+    """ Wrapper for grid_sample, uses pixel coordinates """
+    H, W = img.shape[-2:]
+    xgrid, ygrid = coords.split([1,1], dim=-1)
+    xgrid = 2*xgrid/(W-1) - 1
+    ygrid = 2*ygrid/(H-1) - 1
+    grid = torch.cat([xgrid, ygrid], dim=-1)
+    # print(img.size())
+    img = F.grid_sample(img, grid, align_corners=True)
+    if mask:
+        mask = (xgrid > -1) & (ygrid > -1) & (xgrid < 1) & (ygrid < 1)
+        return img, mask.float()
+    return img
+def coords_grid(batch, ht, wd):
+    coords = torch.meshgrid(torch.arange(ht), torch.arange(wd))
+    coords = torch.stack(coords[::-1], dim=0).float()
+    return coords[None].repeat(batch, 1, 1, 1)
+def upflow8(flow, mode='bilinear'):
+    new_size = (8 * flow.shape[2], 8 * flow.shape[3])
+    return  8 * F.interpolate(flow, size=new_size, mode=mode, align_corners=True)

modules/half_warper.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from kornia.color import rgb_to_lab
+from utils.utils import morph_open
+from modules.cupy_module.softsplat import FunctionSoftsplat
+class HalfWarper(nn.Module):
+    def __init__(self):
+        super().__init__()
+    @staticmethod
+    def backward_wrapping(
+            img: torch.Tensor,
+            flow: torch.Tensor,
+            resample: str = 'bilinear',
+            padding_mode: str = 'border',
+            align_corners: bool = False
+        ) -> torch.Tensor:
+        if len(img.shape) != 4: img = img[None,]
+        if len(flow.shape) != 4: flow = flow[None,]
+        q = 2 * flow / torch.tensor([
+            flow.shape[-2], flow.shape[-1],
+        ], device=flow.device, dtype=torch.float)[None,:,None,None]
+        q = q + torch.stack(torch.meshgrid(
+            torch.linspace(-1, 1, flow.shape[-2]),
+            torch.linspace(-1, 1, flow.shape[-1]),
+        ))[None,].to(flow.device)
+        if img.dtype != q.dtype:
+            img = img.type(q.dtype)
+        return F.grid_sample(
+            img,
+            q.flip(dims=(1,)).permute(0, 2, 3, 1).contiguous(),
+            mode = resample, # nearest, bicubic, bilinear
+            padding_mode = padding_mode,  # border, zeros, reflection
+            align_corners = align_corners,
+        )
+    @staticmethod
+    def forward_warpping(
+            img: torch.Tensor,
+            flow: torch.Tensor,
+            mode: str = 'softmax',
+            metric: torch.Tensor | None = None,
+            mask: bool = True
+        ) -> torch.Tensor:
+        if len(img.shape) != 4: img = img[None,]
+        if len(flow.shape) != 4: flow = flow[None,]
+        if metric is not None and len(metric.shape)!=4: metric = metric[None,]
+        flow = flow.flip(dims=(1,))
+        if img.dtype != torch.float32:
+            img = img.type(torch.float32)
+        if flow.dtype != torch.float32:
+            flow = flow.type(torch.float32)
+        if metric is not None and metric.dtype != torch.float32:
+            metric = metric.type(torch.float32)
+        assert img.device == flow.device
+        if metric is not None: assert img.device == metric.device
+        if img.device.type=='cpu':
+            img = img.to('cuda')
+            flow = flow.to('cuda')
+            if metric is not None: metric = metric.to('cuda')
+        if mask:
+            batch, _, h, w = img.shape
+            img = torch.cat([img, torch.ones(batch, 1, h, w, dtype=img.dtype, device=img.device)], dim=1)
+        return FunctionSoftsplat(img, flow, metric, mode)
+    @staticmethod
+    def z_metric(
+            img0: torch.Tensor,
+            img1: torch.Tensor,
+            flow0to1: torch.Tensor,
+            flow1to0: torch.Tensor
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+        img0 = rgb_to_lab(img0[:,:3])
+        img1 = rgb_to_lab(img1[:,:3])
+        z1to0 = -0.1*(img1 - HalfWarper.backward_wrapping(img0, flow1to0)).norm(dim=1, keepdim=True)
+        z0to1 = -0.1*(img0 - HalfWarper.backward_wrapping(img1, flow0to1)).norm(dim=1, keepdim=True)
+        return z0to1, z1to0
+    def forward(
+            self,
+            I0: torch.Tensor,
+            I1: torch.Tensor,
+            flow0to1: torch.Tensor,
+            flow1to0: torch.Tensor,
+            z0to1: torch.Tensor | None = None,
+            z1to0: torch.Tensor | None = None,
+            tau: float | None = None,
+            morph_kernel_size: int = 5,
+            mask: bool = True
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+        if z1to0 is None or z0to1 is None:
+            z0to1, z1to0 = self.z_metric(I0, I1, flow0to1, flow1to0)
+        if tau is not None:
+            flow0tot = tau*flow0to1
+            flow1tot = (1 - tau)*flow1to0
+        else:
+            flow0tot = flow0to1
+            flow1tot = flow1to0
+        # image warping
+        fw0to1 = HalfWarper.forward_warpping(I0, flow0tot, mode='softmax', metric=z0to1, mask=True)
+        fw1to0 = HalfWarper.forward_warpping(I1, flow1tot, mode='softmax', metric=z1to0, mask=True)
+        wrapped_image0tot = fw0to1[:,:-1]
+        wrapped_image1tot = fw1to0[:,:-1]
+        mask0tot = morph_open(fw0to1[:,-1:], k=morph_kernel_size)
+        mask1tot = morph_open(fw1to0[:,-1:], k=morph_kernel_size)
+        base0 = mask0tot*wrapped_image0tot + (1 - mask0tot)*wrapped_image1tot
+        base1 = mask1tot*wrapped_image1tot + (1 - mask1tot)*wrapped_image0tot
+        if mask:
+            base0 = torch.cat([base0, mask0tot], dim=1)
+            base1 = torch.cat([base1, mask1tot], dim=1)
+        return base0, base1

modules/synthesizer.py ADDED Viewed

	@@ -0,0 +1,277 @@

+import torch
+import torch.nn as nn
+from modules.basic_layers import (
+    SinusoidalPositionalEmbedding,
+    ResGatedBlock,
+    MaxViTBlock,
+    Downsample,
+    Upsample
+)
+class UnetDownBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        temb_channels: int = 128,
+        heads: int = 1,
+        window_size: int = 7,
+        window_attn: bool = True,
+        grid_attn: bool = True,
+        expansion_rate: int = 4,
+        num_conv_blocks: int = 2,
+        dropout: float = 0.0
+    ):
+        super(UnetDownBlock, self).__init__()
+        self.pool = Downsample(
+            in_channels = in_channels,
+            out_channels = in_channels,
+            use_conv = True
+        )
+        in_channels = 3 * in_channels + 2
+        self.conv = nn.ModuleList([
+            ResGatedBlock(
+                in_channels = in_channels if i == 0 else out_channels,
+                out_channels = out_channels,
+                emb_channels = temb_channels,
+                gated_conv = True
+            ) for i in range(num_conv_blocks)
+        ])
+        self.maxvit = MaxViTBlock(
+            channels = out_channels,
+            #latent_dim = out_channels // 6,
+            heads = heads,
+            window_size = window_size,
+            window_attn = window_attn,
+            grid_attn = grid_attn,
+            expansion_rate = expansion_rate,
+            dropout = dropout,
+            emb_channels = temb_channels
+        )
+    def forward(
+        self,
+        x: torch.Tensor,
+        warp0: torch.Tensor,
+        warp1: torch.Tensor,
+        temb: torch.Tensor
+    ):
+        x = self.pool(x)
+        x = torch.cat([x, warp0, warp1], dim=1)
+        for conv in self.conv:
+            x = conv(x, temb)
+        x = self.maxvit(x, temb)
+        return x
+class UnetMiddleBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        mid_channels: int,
+        out_channels: int,
+        temb_channels: int = 128,
+        heads: int = 1,
+        window_size: int = 7,
+        window_attn: bool = True,
+        grid_attn: bool = True,
+        expansion_rate: int = 4,
+        dropout: float = 0.0
+    ):
+        super(UnetMiddleBlock, self).__init__()
+        self.middle_blocks = nn.ModuleList([
+            ResGatedBlock(
+                in_channels = in_channels,
+                out_channels = mid_channels,
+                emb_channels = temb_channels,
+                gated_conv = True
+            ),
+            MaxViTBlock(
+                channels = mid_channels,
+                #latent_dim = mid_channels // 6,
+                heads = heads,
+                window_size = window_size,
+                window_attn = window_attn,
+                grid_attn = grid_attn,
+                expansion_rate = expansion_rate,
+                dropout = dropout,
+                emb_channels = temb_channels
+            ),
+            ResGatedBlock(
+                in_channels = mid_channels,
+                out_channels = out_channels,
+                emb_channels = temb_channels,
+                gated_conv = True
+            )
+        ])
+    def forward(self, x, temb):
+        for block in self.middle_blocks:
+            x = block(x, temb)
+        return x
+class UnetUpBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        temb_channels: int = 128,
+        heads: int = 1,
+        window_size: int = 7,
+        window_attn: bool = True,
+        grid_attn: bool = True,
+        expansion_rate: int = 4,
+        num_conv_blocks: int = 2,
+        dropout: float = 0.0
+    ):
+        super(UnetUpBlock, self).__init__()
+        in_channels = 2 * in_channels
+        self.maxvit = MaxViTBlock(
+            channels = in_channels,
+            #latent_dim = in_channels // 6,
+            heads = heads,
+            window_size = window_size,
+            window_attn = window_attn,
+            grid_attn = grid_attn,
+            expansion_rate = expansion_rate,
+            dropout = dropout,
+            emb_channels = temb_channels
+        )
+        self.upsample = Upsample(
+            in_channels = in_channels,
+            out_channels = in_channels,
+            use_conv = True
+        )
+        self.conv = nn.ModuleList([
+            ResGatedBlock(
+                in_channels if i == 0 else out_channels,
+                out_channels,
+                emb_channels = temb_channels,
+                gated_conv = True
+            ) for i in range(num_conv_blocks)
+        ])
+    def forward(
+        self,
+        x: torch.Tensor,
+        skip_connection: torch.Tensor,
+        temb: torch.Tensor
+    ):
+        x = torch.cat([x, skip_connection], dim=1)
+        x = self.maxvit(x, temb)
+        x = self.upsample(x)
+        for conv in self.conv:
+            x = conv(x, temb)
+        return x
+class Synthesis(nn.Module):
+    def __init__(
+        self,
+        in_channels: int,
+        channels: list[int],
+        temb_channels: int,
+        heads: int = 1,
+        window_size: int = 7,
+        window_attn: bool = True,
+        grid_attn: bool = True,
+        expansion_rate: int = 4,
+        num_conv_blocks: int = 2,
+        dropout: float = 0.0
+    ):
+        super(Synthesis, self).__init__()
+        self.t_pos_encoding = SinusoidalPositionalEmbedding(temb_channels)
+        self.input_blocks = nn.ModuleList([
+            nn.Conv2d(3*in_channels + 4, channels[0], kernel_size=3, padding=1),
+            ResGatedBlock(
+                in_channels = channels[0],
+                out_channels = channels[0],
+                emb_channels = temb_channels,
+                gated_conv = True
+            )
+        ])
+        self.down_blocks = nn.ModuleList([
+            UnetDownBlock(
+                #3 * channels[i] + 2,
+                channels[i],
+                channels[i + 1],
+                temb_channels,
+                heads = heads,
+                window_size = window_size,
+                window_attn = window_attn,
+                grid_attn = grid_attn,
+                expansion_rate = expansion_rate,
+                num_conv_blocks = num_conv_blocks,
+                dropout = dropout,
+            ) for i in range(len(channels) - 1)
+        ])
+        self.middle_block = UnetMiddleBlock(
+            in_channels = channels[-1],
+            mid_channels = channels[-1],
+            out_channels = channels[-1],
+            temb_channels = temb_channels,
+            heads = heads,
+            window_size = window_size,
+            window_attn = window_attn,
+            grid_attn = grid_attn,
+            expansion_rate = expansion_rate,
+            dropout = dropout,
+        )
+        self.up_blocks = nn.ModuleList([
+            UnetUpBlock(
+                channels[i + 1],
+                channels[i],
+                temb_channels,
+                heads = heads,
+                window_size = window_size,
+                window_attn = window_attn,
+                grid_attn = grid_attn,
+                expansion_rate = expansion_rate,
+                num_conv_blocks = num_conv_blocks,
+                dropout = dropout,
+            ) for i in reversed(range(len(channels) - 1))
+        ])
+        self.output_blocks = nn.ModuleList([
+            ResGatedBlock(
+                in_channels = channels[0],
+                out_channels = channels[0],
+                emb_channels = temb_channels,
+                gated_conv = True
+            ),
+            nn.Conv2d(channels[0], in_channels, kernel_size=3, padding=1)
+        ])
+    def forward(
+        self,
+        x: torch.Tensor,
+        warp0: list[torch.Tensor],
+        warp1: list[torch.Tensor],
+        temb: torch.Tensor
+    ):
+        temb = temb.unsqueeze(-1).type(torch.float)
+        temb = self.t_pos_encoding(temb)
+        x = self.input_blocks[0](torch.cat([x, warp0[0], warp1[0]], dim=1))
+        x = self.input_blocks[1](x, temb)
+        features = []
+        for i, down_block in enumerate(self.down_blocks):
+            x = down_block(x, warp0[i + 1], warp1[i + 1], temb)
+            features.append(x)
+        x = self.middle_block(x, temb)
+        for i, up_block in enumerate(self.up_blocks):
+            x = up_block(x, features[-(i + 1)], temb)
+        x = self.output_blocks[0](x, temb)
+        x = self.output_blocks[1](x)
+        return x

requirements.txt ADDED Viewed

	@@ -0,0 +1,42 @@

+# Main dependencies
+torch>=2.6.0
+torchvision>=0.21.0
+lightning>=2.2.4
+numpy>=1.26.4
+matplotlib>=3.8.0
+pyyaml>=6.0.0
+# Huggingface
+huggingface-hub>=0.30.2
+# Image processing and computer vision
+kornia>=0.7.2
+opencv-python>=4.10.0.84
+opencv-contrib-python>=4.10.0.84
+einops>=0.8.0
+# Custom cuda implementation /modules/cupy_module/
+cupy-cuda12x>=12.0.0  # For CUDA 12.4
+# Note: For cupy, you need to install the specific version for your CUDA version
+# Examples:
+# cupy-cuda11x for CUDA 11.x
+# cupy-cuda12x for CUDA 12.x
+# cupy-cuda10x for CUDA 10.x
+# Utilities and tools
+scipy>=1.7.0
+tensorboard>=2.8.0
+# Project-Specific Dependencies
+# RAFT (Flow Estimation)
+# Note: RAFT is included in the project code; no external installation is required.
+# FLOLPIPS (Quality Metrics)
+# Note: FLOLPIPS is included in the project code; no external installation is required.
+# Gradio
+gradio>=4.34.0
+imageio>=2.34.1
+imageio-ffmpeg>=0.6.0

utils/ema.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+import torch.nn as nn
+class EMA:
+    def __init__(self, beta: float):
+        super().__init__()
+        self.beta = beta
+        self.step = 0
+    def update_model_average(self, ema_model: nn.Module, current_model: nn.Module) -> None:
+        for current_params, ema_model in zip(current_model.parameters(), ema_model.parameters()):
+            old_weight, up_weight = ema_model.data, current_params.data
+            ema_model.data = self.update_average(old_weight, up_weight)
+    def update_average(self, old: torch.Tensor | None, new: torch.Tensor) -> torch.Tensor:
+        if old is None:
+            return new
+        return old * self.beta + (1 - self.beta) * new
+    def step_ema(self, ema_model: nn.Module, model: nn.Module, step_start_ema: int = 2000) -> None:
+        if self.step < step_start_ema:
+            self.reset_parameters(ema_model, model)
+            self.step += 1
+            return
+        self.update_model_average(ema_model, model)
+        self.step += 1
+    def copy_to(self, ema_model: nn.Module, model: nn.Module) -> None:
+        model.load_state_dict(ema_model.state_dict())
+    def reset_parameters(self, ema_model: nn.Module, model: nn.Module) -> None:
+        ema_model.load_state_dict(model.state_dict())

utils/inter_frame_idx.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from utils.utils import morph_open
+import torch
+from kornia.color import rgb_to_grayscale
+import cv2
+import numpy as np
+class FlowEstimation:
+    def __init__(self, flow_estimator: str = "farneback"):
+        assert flow_estimator in ["farneback", "dualtvl1"], "Flow estimator must be one of [farneback, dualtvl1]"
+        if flow_estimator == "farneback":
+            self.flow_estimator = self.OptFlow_Farneback
+        elif flow_estimator == "dualtvl1":
+            self.flow_estimator = self.OptFlow_DualTVL1
+        else:
+            raise NotImplementedError
+    def OptFlow_Farneback(self, I0: torch.Tensor, I1: torch.Tensor) -> torch.Tensor:
+        device = I0.device
+        I0 = I0.cpu().clamp(0, 1) * 255
+        I1 = I1.cpu().clamp(0, 1) * 255
+        batch_size = I0.shape[0]
+        for i in range(batch_size):
+            I0_np = I0[i].permute(1, 2, 0).numpy().astype(np.uint8)
+            I1_np = I1[i].permute(1, 2, 0).numpy().astype(np.uint8)
+            I0_gray = cv2.cvtColor(I0_np, cv2.COLOR_BGR2GRAY)
+            I1_gray = cv2.cvtColor(I1_np, cv2.COLOR_BGR2GRAY)
+            flow = cv2.calcOpticalFlowFarneback(I0_gray, I1_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
+            flow = torch.from_numpy(flow).permute(2, 0, 1).unsqueeze(0).float()
+            if i == 0:
+                flows = flow
+            else:
+                flows = torch.cat((flows, flow), dim = 0)
+        return flows.to(device)
+    def OptFlow_DualTVL1(
+        self,
+        I0: torch.Tensor,
+        I1: torch.Tensor,
+        tau: float = 0.25,
+        lambda_: float = 0.15,
+        theta: float = 0.3,
+        scales_number: int = 5,
+        warps: int = 5,
+        epsilon: float = 0.01,
+        inner_iterations: int = 30,
+        outer_iterations: int = 10,
+        scale_step: float = 0.8,
+        gamma: float = 0.0
+    ) -> torch.Tensor:
+        optical_flow = cv2.optflow.createOptFlow_DualTVL1()
+        optical_flow.setTau(tau)
+        optical_flow.setLambda(lambda_)
+        optical_flow.setTheta(theta)
+        optical_flow.setScalesNumber(scales_number)
+        optical_flow.setWarpingsNumber(warps)
+        optical_flow.setEpsilon(epsilon)
+        optical_flow.setInnerIterations(inner_iterations)
+        optical_flow.setOuterIterations(outer_iterations)
+        optical_flow.setScaleStep(scale_step)
+        optical_flow.setGamma(gamma)
+        device = I0.device
+        I0 = I0.cpu().clamp(0, 1) * 255
+        I1 = I1.cpu().clamp(0, 1) * 255
+        batch_size = I0.shape[0]
+        for i in range(batch_size):
+            I0_np = I0[i].permute(1, 2, 0).numpy().astype(np.uint8)
+            I1_np = I1[i].permute(1, 2, 0).numpy().astype(np.uint8)
+            I0_gray = cv2.cvtColor(I0_np, cv2.COLOR_BGR2GRAY)
+            I1_gray = cv2.cvtColor(I1_np, cv2.COLOR_BGR2GRAY)
+            flow = optical_flow.calc(I0_gray, I1_gray, None)
+            flow = torch.from_numpy(flow).permute(2, 0, 1).unsqueeze(0).float()
+            if i == 0:
+                flows = flow
+            else:
+                flows = torch.cat((flows, flow), dim = 0)
+        return flows.to(device)
+    def __call__(self, I1: torch.Tensor, I0: torch.Tensor) -> torch.Tensor:
+        return self.flow_estimator(I1, I0)
+def get_inter_frame_temp_index(
+    I0: torch.Tensor,
+    It: torch.Tensor,
+    I1: torch.Tensor,
+    flow0tot: torch.Tensor,
+    flow1tot: torch.Tensor,
+    k: int = 5,
+    threshold: float = 2e-2
+) -> torch.Tensor:
+    I0_gray = rgb_to_grayscale(I0)
+    It_gray = rgb_to_grayscale(It)
+    I1_gray = rgb_to_grayscale(I1)
+    mask0tot = morph_open(It_gray - I0_gray, k=k)
+    mask1tot = morph_open(I1_gray - It_gray, k=k)
+    mask0tot = (abs(mask0tot) > threshold).to(torch.uint8)
+    mask1tot = (abs(mask1tot) > threshold).to(torch.uint8)
+    flow_mag0tot = torch.sqrt(flow0tot[:, 0, :, :]**2 + flow0tot[:, 1, :, :]**2).unsqueeze(1)
+    flow_mag1tot = torch.sqrt(flow1tot[:, 0, :, :]**2 + flow1tot[:, 1, :, :]**2).unsqueeze(1)
+    norm0tot = (flow_mag0tot*mask0tot).squeeze(1)
+    norm1tot = (flow_mag1tot*mask1tot).squeeze(1)
+    d0tot = torch.sum(norm0tot, dim = (1, 2))
+    d1tot = torch.sum(norm1tot, dim = (1, 2))
+    return d0tot / (d0tot + d1tot + 1e-12)

utils/raft.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import torch
+from torchvision.models.optical_flow import raft_large
+from modules.flow_models.raft.rfr_new import RAFT
+def raft_flow(
+        I0: torch.Tensor,
+        I1: torch.Tensor,
+        data_domain: str = "animation",
+        device: str = 'cuda'
+) -> tuple[torch.Tensor, torch.Tensor]:
+    if I0.dtype != torch.float32 or I1.dtype != torch.float32:
+        I0 = I0.to(torch.float32)
+        I1 = I1.to(torch.float32)
+    if data_domain == "animation":
+        raft = RAFT().requires_grad_(False).eval().to(device)
+    elif data_domain == "photorealism":
+        raft = raft_large().requires_grad_(False).eval().to(device)
+    else:
+        raise ValueError("data_domain must be either 'animation' or 'photorealism'")
+    return raft(I0, I1) if data_domain == "animation" else raft(I0, I1)[-1]

utils/uncertainty.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import torch
+import itertools
+from torchmetrics.image import LearnedPerceptualImagePatchSimilarity as LPIPS
+from utils.utils import denorm
+def compute_lpips_variability(samples: torch.Tensor,
+                              net: str = 'alex',
+                              device: str = 'cuda'
+                              ) -> float:
+    loss_fn = LPIPS(net_type=net).to(device)
+    loss_fn.eval()
+    if samples.min() >= 0.0:
+        samples = samples * 2 - 1  # Convertir [0, 1] → [-1, 1]
+    N = samples.size(0)
+    scores = []
+    for i, j in itertools.combinations(range(N), 2):
+        x = samples[i:i+1].to(device)
+        y = samples[j:j+1].to(device)
+        dist = loss_fn(denorm(x.clamp(-1, 1)), denorm(y.clamp(-1, 1)))
+        scores.append(dist.item())
+    return sum(scores) / len(scores)
+def compute_pixelwise_correlation(samples: torch.Tensor) -> float:
+    N, C, H, W = samples.shape
+    samples_flat = samples.view(N, C, -1)  # (N, C, H*W)
+    corrs = []
+    for i, j in itertools.combinations(range(N), 2):
+        x = samples_flat[i]  # (C, HW)
+        y = samples_flat[j]  # (C, HW)
+        mean_x = x.mean(dim=1, keepdim=True)
+        mean_y = y.mean(dim=1, keepdim=True)
+        x_centered = x - mean_x
+        y_centered = y - mean_y
+        numerator = (x_centered * y_centered).sum(dim=1)
+        denominator = (x_centered.norm(dim=1) * y_centered.norm(dim=1)) + 1e-8
+        corr = numerator / denominator  # (C,)
+        corrs.append(corr.mean().item())
+    return sum(corrs) / len(corrs)
+def compute_dynamic_range(samples: torch.Tensor) -> float:
+    max_vals, _ = samples.max(dim=0)  # (C, H, W)
+    min_vals, _ = samples.min(dim=0)  # (C, H, W)
+    dynamic_range = max_vals - min_vals  # (C, H, W)
+    return dynamic_range.mean().item()

utils/utils.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import os
+import torch
+import numpy as np
+import matplotlib.pyplot as plt
+try:
+    from kornia.morphology import opening
+except ImportError:
+    from kornia.morphology import open as opening
+from torchvision import transforms
+from torchvision.utils import make_grid, save_image
+from typing import Any
+def exist(val: Any) -> bool:
+    return val is not None
+def morph_open(x: torch.Tensor, k: int) -> torch.Tensor:
+    if k==0:
+        return x
+    else:
+        with torch.no_grad():
+            return opening(x, torch.ones(k,k,device=x.device))
+def make_grid_images(images: list[torch.Tensor], **kwargs) -> torch.Tensor:
+    concatenated_images = torch.cat(images, dim=3)
+    grid_concatenated = make_grid(concatenated_images, **kwargs)
+    return grid_concatenated
+def save_images(images: tuple[torch.Tensor, torch.Tensor], path: str, **kwargs) -> None:
+    gen, real = images
+    concatenated_images = torch.cat((gen, real), dim=3)
+    grid_concatenated = make_grid(concatenated_images, **kwargs)
+    ndarr_concatenated = grid_concatenated.permute(1, 2, 0).to("cpu").numpy()
+    ndarr_concatenated = (ndarr_concatenated * 255).astype(np.uint8)
+    save_image(torch.from_numpy(ndarr_concatenated).permute(2, 0, 1) / 255, path)
+def save_triplet(images: tuple[torch.Tensor, ...], path: str, **kwargs) -> None:
+    concatenated_images = torch.cat(images, dim=3)
+    grid_concatenated = make_grid(concatenated_images, **kwargs)
+    ndarr_concatenated = grid_concatenated.permute(1, 2, 0).to("cpu").numpy()
+    ndarr_concatenated = (ndarr_concatenated * 255).astype(np.uint8)
+    save_image(torch.from_numpy(ndarr_concatenated).permute(2, 0, 1) / 255, path)
+def plot_images(images: torch.Tensor) -> None:
+    plt.figure(figsize=(32, 32))
+    plt.imshow(torch.cat([
+        torch.cat([i for i in images.cpu()], dim=-1),
+    ], dim=-2).permute(1, 2, 0).cpu())
+    plt.show()
+def make_graphic(metric_name: str, metrics: list[torch.Tensor], path: str) -> None:
+    plt.figure(figsize=(32, 32))
+    metrics = [m.cpu().numpy() for m in metrics]
+    plt.plot(metrics)
+    plt.title(metric_name)
+    plt.xlabel("Epoch")
+    plt.ylabel(metric_name)
+    path = os.path.join(path, f"{metric_name}.png")
+    plt.savefig(path)
+    plt.close()
+def norm(
+    img: torch.Tensor,
+    mean: list[float] = [0.5, 0.5, 0.5],
+    std: list[float] = [0.5, 0.5, 0.5]
+) -> torch.Tensor:
+    normalize = transforms.Normalize(mean, std)
+    return normalize(img)
+def denorm(
+    img: torch.Tensor,
+    mean: list[float] = [0.5, 0.5, 0.5],
+    std: list[float] = [0.5, 0.5, 0.5]
+) -> torch.Tensor:
+    mean = torch.tensor(mean, device=img.device)
+    std = torch.tensor(std, device=img.device)
+    return img*std[None][...,None,None] + mean[None][...,None,None]