Spaces:

endless-ai
/

Feat2GS

Running on Zero

App Files Files Community

faneggg commited on Feb 12

Commit

123719b

1 Parent(s): d89af41

init

Browse files

Files changed (43) hide show

.gitignore +23 -0
README.md +5 -4
arguments/__init__.py +187 -0
command +33 -0
gaussian_renderer/__init__.py +245 -0
gaussian_renderer/network_gui.py +86 -0
lpipsPyTorch/__init__.py +23 -0
lpipsPyTorch/modules/lpips.py +44 -0
lpipsPyTorch/modules/networks.py +96 -0
lpipsPyTorch/modules/utils.py +30 -0
requirements.txt +21 -0
run_video.py +275 -0
scene/__init__.py +105 -0
scene/cameras.py +71 -0
scene/colmap_loader.py +294 -0
scene/dataset_readers.py +382 -0
scene/gaussian_model.py +830 -0
train_feat2gs.py +243 -0
utils/camera_traj_config.py +655 -0
utils/camera_utils.py +481 -0
utils/dust3r_utils.py +432 -0
utils/feat_utils.py +827 -0
utils/general_utils.py +133 -0
utils/graphics_utils.py +210 -0
utils/image_utils.py +118 -0
utils/loss_utils.py +247 -0
utils/pose_utils.py +570 -0
utils/sh_utils.py +118 -0
utils/stepfun.py +403 -0
utils/system_utils.py +28 -0
utils/trajectories.py +243 -0
utils/utils_poses/ATE/align_trajectory.py +80 -0
utils/utils_poses/ATE/align_utils.py +144 -0
utils/utils_poses/ATE/compute_trajectory_errors.py +89 -0
utils/utils_poses/ATE/results_writer.py +75 -0
utils/utils_poses/ATE/trajectory_utils.py +46 -0
utils/utils_poses/ATE/transformations.py +1974 -0
utils/utils_poses/align_traj.py +97 -0
utils/utils_poses/comp_ate.py +74 -0
utils/utils_poses/lie_group_helper.py +81 -0
utils/utils_poses/relative_pose.py +20 -0
utils/utils_poses/vis_cam_traj.py +138 -0
utils/utils_poses/vis_pose_utils.py +270 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,23 @@

+/.idea/
+/work_dirs*
+.vscode/
+/tmp
+/data
+# /checkpoints
+*.so
+*.patch
+__pycache__/
+*.egg-info/
+/viz*
+/submit*
+build/
+*.pyd
+/cache*
+*.stl
+# *.pth
+/venv/
+.nk8s
+*.mp4
+.vs
+/exp/
+/dev/

README.md CHANGED Viewed

@@ -1,10 +1,11 @@
 ---
 title: Feat2GS
-emoji: 📈
-colorFrom: red
-colorTo: blue
 sdk: gradio
-sdk_version: 5.16.0
 app_file: app.py
 pinned: false
 license: apache-2.0

 ---
 title: Feat2GS
+emoji: ✨
+colorFrom: yellow
+colorTo: green
 sdk: gradio
+sdk_version: 4.20.1
+python_version: 3.10.13
 app_file: app.py
 pinned: false
 license: apache-2.0

arguments/__init__.py ADDED Viewed

	@@ -0,0 +1,187 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+from argparse import ArgumentParser, Namespace
+import sys
+import os
+class GroupParams:
+    pass
+class ParamGroup:
+    def __init__(self, parser: ArgumentParser, name : str, fill_none = False):
+        group = parser.add_argument_group(name)
+        for key, value in vars(self).items():
+            shorthand = False
+            if key.startswith("_"):
+                shorthand = True
+                key = key[1:]
+            t = type(value)
+            value = value if not fill_none else None
+            if shorthand:
+                if t == bool:
+                    group.add_argument("--" + key, ("-" + key[0:1]), default=value, action="store_true")
+                else:
+                    group.add_argument("--" + key, ("-" + key[0:1]), default=value, type=t)
+            else:
+                if t == bool:
+                    group.add_argument("--" + key, default=value, action="store_true")
+                else:
+                    group.add_argument("--" + key, default=value, type=t)
+    def extract(self, args):
+        group = GroupParams()
+        for arg in vars(args).items():
+            if arg[0] in vars(self) or ("_" + arg[0]) in vars(self):
+                setattr(group, arg[0], arg[1])
+        return group
+class ModelParams(ParamGroup):
+    def __init__(self, parser, sentinel=False):
+        self.sh_degree = 3
+        self._source_path = ""
+        self._model_path = ""
+        self._images = "images"
+        self._resolution = -1
+        self._white_background = False
+        self.data_device = "cuda"
+        self.eval = False
+        self.feat_default_dim = {
+            'iuv': 3,
+            'iuvrgb': 6,
+            'mast3r': 1024,
+            'dust3r': 1024,
+            'dift': 1280,
+            'dino_b16': 768,
+            'dinov2_b14': 768,
+            'radio': 1280,
+            'clip_b16': 512,
+            'mae_b16': 768,
+            'midas_l16': 1024,
+            'sam_base': 768,
+            # 'dino16': 384,
+            # 'dinov2': 384,
+            # 'clip': 512,
+            # 'maskclip': 512,
+            # 'vit': 384,
+            # 'resnet50': 2048,
+            # 'midas': 768,
+            # 'mae': 1024,
+        }
+        self.gs_params_group = {
+            'G':{
+                'head': ['xyz', 'scaling', 'rotation', 'opacity'],
+                'opt':['f_dc', 'f_rest']
+                },
+            'T':{
+                'head': ['f_dc', 'f_rest'],
+                'opt':['xyz', 'scaling', 'rotation', 'opacity']
+                },
+            'A':{
+                'head': ['xyz', 'scaling', 'rotation', 'opacity', 'f_dc', 'f_rest'],
+                'opt':[]
+                },
+            'Gft':{
+                'head': ['xyz', 'scaling', 'rotation', 'opacity'],
+                'opt':['f_dc', 'f_rest', 'pc_feat']
+                },
+            'Tft':{
+                'head': ['f_dc', 'f_rest'],
+                'opt':['xyz', 'scaling', 'rotation', 'opacity', 'pc_feat']
+                },
+            'Aft':{
+                'head': ['xyz', 'scaling', 'rotation', 'opacity', 'f_dc', 'f_rest'],
+                'opt':['pc_feat']
+                },
+        }
+        super().__init__(parser, "Loading Parameters", sentinel)
+    def extract(self, args):
+        g = super().extract(args)
+        g.source_path = os.path.abspath(g.source_path)
+        return g
+class PipelineParams(ParamGroup):
+    def __init__(self, parser):
+        self.convert_SHs_python = False
+        self.compute_cov3D_python = False
+        self.debug = False
+        super().__init__(parser, "Pipeline Parameters")
+class DefualtOptimizationParams(ParamGroup):
+    def __init__(self, parser):
+        self.lr_multiplier = 1.
+        self.iterations = 30_000
+        self.position_lr_init =  0.00016 * self.lr_multiplier
+        self.position_lr_final = 0.0000016 * self.lr_multiplier
+        self.position_lr_delay_mult = 0.01
+        self.position_lr_max_steps = 30_000
+        self.feature_lr = 0.0025 * self.lr_multiplier
+        self.opacity_lr = 0.05 * self.lr_multiplier
+        self.scaling_lr = 0.005 * self.lr_multiplier
+        self.rotation_lr = 0.001 * self.lr_multiplier
+        self.percent_dense = 0.01
+        self.lambda_dssim = 0.2
+        self.densification_interval = 100
+        self.opacity_reset_interval = 3000
+        self.densify_from_iter = 500
+        self.densify_until_iter = 15_000
+        self.densify_grad_threshold = 0.0002
+        self.random_background = False
+        super().__init__(parser, "Optimization Parameters")
+class OptimizationParams(ParamGroup):
+    def __init__(self, parser):
+        self.lr_multiplier = 0.1
+        self.iterations = 30_000
+        self.pose_lr_init = 0.0001  #0.0001
+        self.pose_lr_final = 0.000001   #0.0001
+        self.position_lr_init = 0.00016 * self.lr_multiplier #0.000001
+        self.position_lr_final = 0.0000016 * self.lr_multiplier #0.000001
+        self.position_lr_delay_mult = 0.01
+        self.position_lr_max_steps = 30_000
+        self.feature_lr = 0.0025 * self.lr_multiplier #0.001
+        self.feature_sh_lr = (0.0025/20.) * self.lr_multiplier #0.000001
+        self.opacity_lr = 0.05 * self.lr_multiplier #0.0001
+        self.scaling_lr = 0.005 * self.lr_multiplier  # 0.001
+        self.rotation_lr = 0.001 * self.lr_multiplier  # 0.00001
+        self.percent_dense = 0.01
+        self.lambda_dssim = 0.2
+        self.densification_interval = 100
+        self.opacity_reset_interval = 3000
+        # self.densify_from_iter = 500
+        # self.densify_until_iter = 15_000
+        # self.densify_grad_threshold = 0.0002
+        self.random_background = False
+        super().__init__(parser, "Optimization Parameters")
+def get_combined_args(parser : ArgumentParser):
+    cmdlne_string = sys.argv[1:]
+    cfgfile_string = "Namespace()"
+    args_cmdline = parser.parse_args(cmdlne_string)
+    try:
+        cfgfilepath = os.path.join(args_cmdline.model_path, "cfg_args")
+        print("Looking for config file in", cfgfilepath)
+        with open(cfgfilepath) as cfg_file:
+            print("Config file found: {}".format(cfgfilepath))
+            cfgfile_string = cfg_file.read()
+    except TypeError:
+        print("Config file not found at")
+        pass
+    args_cfgfile = eval(cfgfile_string)
+    merged_dict = vars(args_cfgfile).copy()
+    for k,v in vars(args_cmdline).items():
+        if v != None:
+            merged_dict[k] = v
+    return Namespace(**merged_dict)

command ADDED Viewed

	@@ -0,0 +1,33 @@

+conda activate feat2gs
+cd Feat2GS/
+bash scripts/run_feat2gs_eval_parallel.sh
+bash scripts/run_feat2gs_eval.sh
+bash scripts/run_instantsplat_eval_parallel.sh
+bash scripts/run_feat2gs_eval_dtu_parallel.sh
+python video/generate_video.py
+bash scripts/run_all_trajectories.sh
+bash scripts/run_video_render.sh
+bash scripts/run_video_render_instantsplat.sh
+bash scripts/run_video_render_dtu.sh
+tensorboard --logdir=/home/chenyue/output/Feat2gs/output/eval/ --port=7001
+cd /home/chenyue/output/Feat2gs/output/eval/Tanks/Train/6_views/feat2gs-G/dust3r/
+tensorboard --logdir_spec \
+radio:radio,\
+dust3r:dust3r,\
+dino_b16:dino_b16,\
+mast3r:mast3r,\
+dift:dift,\
+dinov2:dinov2_b14,\
+clip:clip_b16,\
+mae:mae_b16,\
+midas:midas_l16,\
+sam:sam_base,\
+iuvrgb:iuvrgb \
+--port 7002
+CUDA_VISIBLE_DEVICES=7 gradio demo.py

gaussian_renderer/__init__.py ADDED Viewed

	@@ -0,0 +1,245 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import torch
+import math
+from scene.gaussian_model import GaussianModel
+from utils.pose_utils import get_camera_from_tensor, quadmultiply
+from utils.graphics_utils import depth_to_normal
+### if use [diff-gaussian-rasterization](https://github.com/graphdeco-inria/diff-gaussian-rasterization)
+# from diff_gaussian_rasterization import (
+#     GaussianRasterizationSettings,
+#     GaussianRasterizer,
+# )
+# from utils.sh_utils import eval_sh
+# def render(
+#     viewpoint_camera,
+#     pc: GaussianModel,
+#     pipe,
+#     bg_color: torch.Tensor,
+#     scaling_modifier=1.0,
+#     override_color=None,
+#     camera_pose=None,
+# ):
+#     """
+#     Render the scene.
+#     Background tensor (bg_color) must be on GPU!
+#     """
+#     # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
+#     screenspace_points = (
+#         torch.zeros_like(
+#             pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda"
+#         )
+#         + 0
+#     )
+#     try:
+#         screenspace_points.retain_grad()
+#     except:
+#         pass
+#     # Set up rasterization configuration
+#     tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
+#     tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
+#     # Set camera pose as identity. Then, we will transform the Gaussians around camera_pose
+#     w2c = torch.eye(4).cuda()
+#     projmatrix = (
+#         w2c.unsqueeze(0).bmm(viewpoint_camera.projection_matrix.unsqueeze(0))
+#     ).squeeze(0)
+#     camera_pos = w2c.inverse()[3, :3]
+#     raster_settings = GaussianRasterizationSettings(
+#         image_height=int(viewpoint_camera.image_height),
+#         image_width=int(viewpoint_camera.image_width),
+#         tanfovx=tanfovx,
+#         tanfovy=tanfovy,
+#         bg=bg_color,
+#         scale_modifier=scaling_modifier,
+#         # viewmatrix=viewpoint_camera.world_view_transform,
+#         # projmatrix=viewpoint_camera.full_proj_transform,
+#         viewmatrix=w2c,
+#         projmatrix=projmatrix,
+#         sh_degree=pc.active_sh_degree,
+#         # campos=viewpoint_camera.camera_center,
+#         campos=camera_pos,
+#         prefiltered=False,
+#         debug=pipe.debug,
+#     )
+#     rasterizer = GaussianRasterizer(raster_settings=raster_settings)
+#     # means3D = pc.get_xyz
+#     rel_w2c = get_camera_from_tensor(camera_pose)
+#     # Transform mean and rot of Gaussians to camera frame
+#     gaussians_xyz = pc._xyz.clone()
+#     gaussians_rot = pc._rotation.clone()
+#     xyz_ones = torch.ones(gaussians_xyz.shape[0], 1).cuda().float()
+#     xyz_homo = torch.cat((gaussians_xyz, xyz_ones), dim=1)
+#     gaussians_xyz_trans = (rel_w2c @ xyz_homo.T).T[:, :3]
+#     gaussians_rot_trans = quadmultiply(camera_pose[:4], gaussians_rot)
+#     means3D = gaussians_xyz_trans
+#     means2D = screenspace_points
+#     opacity = pc.get_opacity
+#     # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
+#     # scaling / rotation by the rasterizer.
+#     scales = None
+#     rotations = None
+#     cov3D_precomp = None
+#     if pipe.compute_cov3D_python:
+#         cov3D_precomp = pc.get_covariance(scaling_modifier)
+#     else:
+#         scales = pc.get_scaling
+#         rotations = gaussians_rot_trans  # pc.get_rotation
+#     # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
+#     # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
+#     shs = None
+#     colors_precomp = None
+#     if override_color is None:
+#         if pipe.convert_SHs_python:
+#             shs_view = pc.get_features.transpose(1, 2).view(
+#                 -1, 3, (pc.max_sh_degree + 1) ** 2
+#             )
+#             dir_pp = pc.get_xyz - viewpoint_camera.camera_center.repeat(
+#                 pc.get_features.shape[0], 1
+#             )
+#             dir_pp_normalized = dir_pp / dir_pp.norm(dim=1, keepdim=True)
+#             sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
+#             colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
+#         else:
+#             shs = pc.get_features
+#     else:
+#         colors_precomp = override_color
+#     # Rasterize visible Gaussians to image, obtain their radii (on screen).
+#     rendered_image, radii = rasterizer(
+#         means3D=means3D,
+#         means2D=means2D,
+#         shs=shs,
+#         colors_precomp=colors_precomp,
+#         opacities=opacity,
+#         scales=scales,
+#         rotations=rotations,
+#         cov3D_precomp=cov3D_precomp,
+#     )
+#     # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
+#     # They will be excluded from value updates used in the splitting criteria.
+#     return {
+#         "render": rendered_image,
+#         "viewspace_points": screenspace_points,
+#         "visibility_filter": radii > 0,
+#         "radii": radii,
+#     }
+### if use [gsplat](https://github.com/nerfstudio-project/gsplat)
+from gsplat import rasterization
+def render_gsplat(
+        viewpoint_camera,
+        pc : GaussianModel,
+        pipe,
+        bg_color : torch.Tensor,
+        scaling_modifier = 1.0,
+        override_color = None,
+        camera_pose = None,
+        fov = None,
+        render_mode="RGB"):
+    """
+    Render the scene.
+    Background tensor (bg_color) must be on GPU!
+    """
+    if fov is None:
+        FoVx = viewpoint_camera.FoVx
+        FoVy = viewpoint_camera.FoVy
+    else:
+        FoVx = fov[0]
+        FoVy = fov[1]
+    tanfovx = math.tan(FoVx * 0.5)
+    tanfovy = math.tan(FoVy * 0.5)
+    focal_length_x = viewpoint_camera.image_width / (2 * tanfovx)
+    focal_length_y = viewpoint_camera.image_height / (2 * tanfovy)
+    K = torch.tensor(
+        [
+            [focal_length_x, 0, viewpoint_camera.image_width / 2.0],
+            [0, focal_length_y, viewpoint_camera.image_height / 2.0],
+            [0, 0, 1],
+        ],
+        device="cuda",
+    )
+    means3D = pc.get_xyz
+    opacity = pc.get_opacity
+    scales = pc.get_scaling * scaling_modifier
+    rotations = pc.get_rotation
+    if override_color is not None:
+        colors = override_color # [N, 3]
+        sh_degree = None
+    else:
+        colors = pc.get_features # [N, K, 3]
+        sh_degree = pc.active_sh_degree
+    if camera_pose is None:
+        viewmat = viewpoint_camera.world_view_transform.transpose(0, 1) # [4, 4]
+    else:
+        viewmat = get_camera_from_tensor(camera_pose)
+    render_colors, render_alphas, info = rasterization(
+        means=means3D,  # [N, 3]
+        quats=rotations,  # [N, 4]
+        scales=scales,  # [N, 3]
+        opacities=opacity.squeeze(-1),  # [N,]
+        colors=colors,
+        viewmats=viewmat[None],  # [1, 4, 4]
+        Ks=K[None],  # [1, 3, 3]
+        backgrounds=bg_color[None],
+        width=int(viewpoint_camera.image_width),
+        height=int(viewpoint_camera.image_height),
+        packed=False,
+        sh_degree=sh_degree,
+        render_mode=render_mode,
+    )
+    if "D" in render_mode:
+        if "+" in render_mode:
+            depth_map = render_colors[..., -1:]
+        else:
+            depth_map = render_colors
+        normals_surf = depth_to_normal(
+            depth_map, torch.inverse(viewmat[None]), K[None])
+        normals_surf = normals_surf * (render_alphas).detach()
+        render_colors = torch.cat([render_colors, normals_surf], dim=-1)
+    # [1, H, W, 3] -> [3, H, W]
+    rendered_image = render_colors[0].permute(2, 0, 1)
+    radii = info["radii"].squeeze(0) # [N,]
+    try:
+        info["means2d"].retain_grad() # [1, N, 2]
+    except:
+        pass
+    # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
+    # They will be excluded from value updates used in the splitting criteria.
+    return {"render": rendered_image,
+            "viewspace_points": info["means2d"],
+            "visibility_filter" : radii > 0,
+            "radii": radii}

gaussian_renderer/network_gui.py ADDED Viewed

	@@ -0,0 +1,86 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import torch
+import traceback
+import socket
+import json
+from scene.cameras import MiniCam
+host = "127.0.0.1"
+port = 6009
+conn = None
+addr = None
+listener = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+def init(wish_host, wish_port):
+    global host, port, listener
+    host = wish_host
+    port = wish_port
+    listener.bind((host, port))
+    listener.listen()
+    listener.settimeout(0)
+def try_connect():
+    global conn, addr, listener
+    try:
+        conn, addr = listener.accept()
+        print(f"\nConnected by {addr}")
+        conn.settimeout(None)
+    except Exception as inst:
+        pass
+def read():
+    global conn
+    messageLength = conn.recv(4)
+    messageLength = int.from_bytes(messageLength, 'little')
+    message = conn.recv(messageLength)
+    return json.loads(message.decode("utf-8"))
+def send(message_bytes, verify):
+    global conn
+    if message_bytes != None:
+        conn.sendall(message_bytes)
+    conn.sendall(len(verify).to_bytes(4, 'little'))
+    conn.sendall(bytes(verify, 'ascii'))
+def receive():
+    message = read()
+    width = message["resolution_x"]
+    height = message["resolution_y"]
+    if width != 0 and height != 0:
+        try:
+            do_training = bool(message["train"])
+            fovy = message["fov_y"]
+            fovx = message["fov_x"]
+            znear = message["z_near"]
+            zfar = message["z_far"]
+            do_shs_python = bool(message["shs_python"])
+            do_rot_scale_python = bool(message["rot_scale_python"])
+            keep_alive = bool(message["keep_alive"])
+            scaling_modifier = message["scaling_modifier"]
+            world_view_transform = torch.reshape(torch.tensor(message["view_matrix"]), (4, 4)).cuda()
+            world_view_transform[:,1] = -world_view_transform[:,1]
+            world_view_transform[:,2] = -world_view_transform[:,2]
+            full_proj_transform = torch.reshape(torch.tensor(message["view_projection_matrix"]), (4, 4)).cuda()
+            full_proj_transform[:,1] = -full_proj_transform[:,1]
+            custom_cam = MiniCam(width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform)
+        except Exception as e:
+            print("")
+            traceback.print_exc()
+            raise e
+        return custom_cam, do_training, do_shs_python, do_rot_scale_python, keep_alive, scaling_modifier
+    else:
+        return None, None, None, None, None, None

lpipsPyTorch/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import torch
+from .modules.lpips import LPIPS
+def lpips(x: torch.Tensor,
+          y: torch.Tensor,
+          net_type: str = 'alex',
+          version: str = '0.1',
+          return_spatial_map=False):
+    r"""Function that measures
+    Learned Perceptual Image Patch Similarity (LPIPS).
+    Arguments:
+        x, y (torch.Tensor): the input tensors to compare.
+        net_type (str): the network type to compare the features:
+                        'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
+        version (str): the version of LPIPS. Default: 0.1.
+        return_spatial_map (bool): whether to return the spatial map. Default: False.
+    """
+    device = x.device
+    criterion = LPIPS(net_type, version).to(device)
+    return criterion(x, y, return_spatial_map)

lpipsPyTorch/modules/lpips.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+import torch.nn as nn
+from .networks import get_network, LinLayers
+from .utils import get_state_dict
+class LPIPS(nn.Module):
+    r"""Creates a criterion that measures
+    Learned Perceptual Image Patch Similarity (LPIPS).
+    Arguments:
+        net_type (str): the network type to compare the features:
+                        'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
+        version (str): the version of LPIPS. Default: 0.1.
+    """
+    def __init__(self, net_type: str = 'alex', version: str = '0.1'):
+        assert version in ['0.1'], 'v0.1 is only supported now'
+        super(LPIPS, self).__init__()
+        # pretrained network
+        self.net = get_network(net_type)
+        # linear layers
+        self.lin = LinLayers(self.net.n_channels_list)
+        self.lin.load_state_dict(get_state_dict(net_type, version))
+    def forward(self, x: torch.Tensor, y: torch.Tensor, return_spatial_map=False):
+        feat_x, feat_y = self.net(x), self.net(y)
+        diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)]
+        res = [l(d) for d, l in zip(diff, self.lin)]
+        if return_spatial_map:
+            target_size = (x.shape[2], x.shape[3])
+            res_upsampled = [torch.nn.functional.interpolate(r, size=target_size, mode='bilinear', align_corners=False)
+                            for r in res]
+            spatial_map = torch.sum(torch.cat(res_upsampled, 1), 1, keepdim=True)
+            return spatial_map
+        else:
+            res = [r.mean((2, 3), True) for r in res]
+            return torch.sum(torch.cat(res, 0), 0, True)

lpipsPyTorch/modules/networks.py ADDED Viewed

	@@ -0,0 +1,96 @@

+from typing import Sequence
+from itertools import chain
+import torch
+import torch.nn as nn
+from torchvision import models
+from .utils import normalize_activation
+def get_network(net_type: str):
+    if net_type == 'alex':
+        return AlexNet()
+    elif net_type == 'squeeze':
+        return SqueezeNet()
+    elif net_type == 'vgg':
+        return VGG16()
+    else:
+        raise NotImplementedError('choose net_type from [alex, squeeze, vgg].')
+class LinLayers(nn.ModuleList):
+    def __init__(self, n_channels_list: Sequence[int]):
+        super(LinLayers, self).__init__([
+            nn.Sequential(
+                nn.Identity(),
+                nn.Conv2d(nc, 1, 1, 1, 0, bias=False)
+            ) for nc in n_channels_list
+        ])
+        for param in self.parameters():
+            param.requires_grad = False
+class BaseNet(nn.Module):
+    def __init__(self):
+        super(BaseNet, self).__init__()
+        # register buffer
+        self.register_buffer(
+            'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
+        self.register_buffer(
+            'std', torch.Tensor([.458, .448, .450])[None, :, None, None])
+    def set_requires_grad(self, state: bool):
+        for param in chain(self.parameters(), self.buffers()):
+            param.requires_grad = state
+    def z_score(self, x: torch.Tensor):
+        return (x - self.mean) / self.std
+    def forward(self, x: torch.Tensor):
+        x = self.z_score(x)
+        output = []
+        for i, (_, layer) in enumerate(self.layers._modules.items(), 1):
+            x = layer(x)
+            if i in self.target_layers:
+                output.append(normalize_activation(x))
+            if len(output) == len(self.target_layers):
+                break
+        return output
+class SqueezeNet(BaseNet):
+    def __init__(self):
+        super(SqueezeNet, self).__init__()
+        self.layers = models.squeezenet1_1(True).features
+        self.target_layers = [2, 5, 8, 10, 11, 12, 13]
+        self.n_channels_list = [64, 128, 256, 384, 384, 512, 512]
+        self.set_requires_grad(False)
+class AlexNet(BaseNet):
+    def __init__(self):
+        super(AlexNet, self).__init__()
+        self.layers = models.alexnet(True).features
+        self.target_layers = [2, 5, 8, 10, 12]
+        self.n_channels_list = [64, 192, 384, 256, 256]
+        self.set_requires_grad(False)
+class VGG16(BaseNet):
+    def __init__(self):
+        super(VGG16, self).__init__()
+        self.layers = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features
+        self.target_layers = [4, 9, 16, 23, 30]
+        self.n_channels_list = [64, 128, 256, 512, 512]
+        self.set_requires_grad(False)

lpipsPyTorch/modules/utils.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from collections import OrderedDict
+import torch
+def normalize_activation(x, eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True))
+    return x / (norm_factor + eps)
+def get_state_dict(net_type: str = 'alex', version: str = '0.1'):
+    # build url
+    url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \
+        + f'master/lpips/weights/v{version}/{net_type}.pth'
+    # download
+    old_state_dict = torch.hub.load_state_dict_from_url(
+        url, progress=True,
+        map_location=None if torch.cuda.is_available() else torch.device('cpu')
+    )
+    # rename keys
+    new_state_dict = OrderedDict()
+    for key, val in old_state_dict.items():
+        new_key = key
+        new_key = new_key.replace('lin', '')
+        new_key = new_key.replace('model.', '')
+        new_state_dict[new_key] = val
+    return new_state_dict

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+torch==2.5.1
+torchvision==0.20.1
+roma
+evo
+gradio>=4,<5
+matplotlib
+tqdm
+opencv-python
+scipy
+einops
+trimesh
+tensorboard
+pyglet<2
+imageio
+gsplat
+scikit-learn
+hydra-submitit-launcher
+huggingface-hub[torch]==0.24
+plyfile
+imageio[ffmpeg]
+spaces

run_video.py ADDED Viewed

	@@ -0,0 +1,275 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact [email protected]
+#
+import matplotlib
+matplotlib.use('Agg')
+import math
+import copy
+import torch
+from scene import Scene
+import os
+from tqdm import tqdm
+from gaussian_renderer import render_gsplat
+from argparse import ArgumentParser
+from arguments import ModelParams, PipelineParams, get_combined_args
+from gaussian_renderer import GaussianModel
+from utils.pose_utils import get_tensor_from_camera
+import numpy as np
+import imageio.v3 as iio
+from utils.graphics_utils import resize_render, make_video_divisble
+from utils.trajectories import (
+    get_arc_w2cs,
+    get_avg_w2c,
+    get_lemniscate_w2cs,
+    get_spiral_w2cs,
+    get_wander_w2cs,
+    get_lookat,
+)
+from utils.camera_utils import generate_interpolated_path, generate_ellipse_path
+from utils.camera_traj_config import trajectory_configs
+def save_interpolated_pose(model_path, iter, n_views):
+    org_pose = np.load(model_path + f"pose/pose_{iter}.npy")
+    # visualizer(org_pose, ["green" for _ in org_pose], model_path + "pose/poses_optimized.png")
+    n_interp = int(10 * 30 / n_views)  # 10second, fps=30
+    all_inter_pose = []
+    for i in range(n_views-1):
+        tmp_inter_pose = generate_interpolated_path(poses=org_pose[i:i+2], n_interp=n_interp)
+        all_inter_pose.append(tmp_inter_pose)
+    all_inter_pose = np.array(all_inter_pose).reshape(-1, 3, 4)
+    inter_pose_list = []
+    for p in all_inter_pose:
+        tmp_view = np.eye(4)
+        tmp_view[:3, :3] = p[:3, :3]
+        tmp_view[:3, 3] = p[:3, 3]
+        inter_pose_list.append(tmp_view)
+    inter_pose = np.stack(inter_pose_list, 0)
+    return inter_pose
+def save_ellipse_pose(model_path, iter, n_views):
+    org_pose = np.load(model_path + f"pose/pose_{iter}.npy")
+    # visualizer(org_pose, ["green" for _ in org_pose], model_path + "pose/poses_optimized.png")
+    n_interp = int(10 * 30 / n_views) * (n_views-1)  # 10second, fps=30
+    all_inter_pose = generate_ellipse_path(org_pose, n_interp)
+    inter_pose_list = []
+    for p in all_inter_pose:
+        c2w = np.eye(4)
+        c2w[:3, :4] = p
+        inter_pose_list.append(np.linalg.inv(c2w))
+    inter_pose = np.stack(inter_pose_list, 0)
+    return inter_pose
+def save_traj_pose(dataset, iter, args):
+    traj_up = trajectory_configs.get(args.dataset, {}).get(args.scene, {}).get('up', [-1, 1])  # Use -y axis in camera space as up vector
+    traj_params = trajectory_configs.get(args.dataset, {}).get(args.scene, {}).get(args.cam_traj, {})
+    # 1. Get training camera poses and calculate trajectory
+    org_pose = np.load(dataset.model_path + f"pose/pose_{iter}.npy")
+    train_w2cs = torch.from_numpy(org_pose).cuda()
+    # Calculate reference camera pose
+    avg_w2c = get_avg_w2c(train_w2cs)
+    train_c2ws = torch.linalg.inv(train_w2cs)
+    lookat = get_lookat(train_c2ws[:, :3, -1], train_c2ws[:, :3, 2])
+    # up = torch.tensor([0.0, 0.0, 1.0], device="cuda")
+    avg_c2w = torch.linalg.inv(avg_w2c)
+    up = traj_up[0] * (avg_c2w[:3, traj_up[1]])
+    # up = traj_up[0] * (avg_c2w[:3, 0]+avg_c2w[:3, 1])/2
+    # Temporarily load a camera to get intrinsic parameters
+    tmp_args = copy.deepcopy(args)
+    tmp_args.get_video = False
+    tmp_dataset = copy.deepcopy(dataset)
+    tmp_dataset.eval = False
+    with torch.no_grad():
+        temp_gaussians = GaussianModel(dataset.sh_degree)
+        temp_scene = Scene(tmp_dataset, temp_gaussians, load_iteration=iter, opt=tmp_args, shuffle=False)
+    view = temp_scene.getTrainCameras()[0]
+    tanfovx = math.tan(view.FoVx * 0.5)
+    tanfovy = math.tan(view.FoVy * 0.5)
+    focal_length_x = view.image_width / (2 * tanfovx)
+    focal_length_y = view.image_height / (2 * tanfovy)
+    K = torch.tensor([[focal_length_x, 0, view.image_width/2],
+                     [0, focal_length_y, view.image_height/2],
+                     [0, 0, 1]], device="cuda")
+    img_wh = (view.image_width, view.image_height)
+    del temp_scene  # Release temporary scene
+    del temp_gaussians  # Release temporary gaussians
+    # Calculate bounding sphere radius
+    rc_train_c2ws = torch.einsum("ij,njk->nik", torch.linalg.inv(avg_w2c), train_c2ws)
+    rc_pos = rc_train_c2ws[:, :3, -1]
+    rads = (rc_pos.amax(0) - rc_pos.amin(0)) * 1.25
+    num_frames = int(10 * 30 / args.n_views) * (args.n_views-1)
+    # Generate camera poses based on trajectory type
+    if args.cam_traj == 'arc':
+        w2cs = get_arc_w2cs(
+            ref_w2c=avg_w2c,
+            lookat=lookat,
+            up=up,
+            focal_length=K[0, 0].item(),
+            rads=rads,
+            num_frames=num_frames,
+            degree=traj_params.get('degree', 180.0)
+        )
+    elif args.cam_traj == 'spiral':
+        w2cs = get_spiral_w2cs(
+            ref_w2c=avg_w2c,
+            lookat=lookat,
+            up=up,
+            focal_length=K[0, 0].item(),
+            rads=rads,
+            num_frames=num_frames,
+            zrate=traj_params.get('zrate', 0.5),
+            rots=traj_params.get('rots', 1)
+        )
+    elif args.cam_traj == 'lemniscate':
+        w2cs = get_lemniscate_w2cs(
+            ref_w2c=avg_w2c,
+            lookat=lookat,
+            up=up,
+            focal_length=K[0, 0].item(),
+            rads=rads,
+            num_frames=num_frames,
+            degree=traj_params.get('degree', 45.0)
+        )
+    elif args.cam_traj == 'wander':
+        w2cs = get_wander_w2cs(
+            ref_w2c=avg_w2c,
+            focal_length=K[0, 0].item(),
+            num_frames=num_frames,
+            max_disp=traj_params.get('max_disp', 48.0)
+        )
+    else:
+        raise ValueError(f"Unknown camera trajectory: {args.cam_traj}")
+    return w2cs.cpu().numpy()
+def render_sets(dataset: ModelParams, iteration: int, pipeline: PipelineParams, args):
+    if args.cam_traj in ['interpolated', 'ellipse']:
+        w2cs = globals().get(f'save_{args.cam_traj}_pose')(dataset.model_path, iteration, args.n_views)
+    else:
+        w2cs = save_traj_pose(dataset, iteration, args)
+    # visualizer(org_pose, ["green" for _ in org_pose], dataset.model_path + f"pose/poses_optimized.png")
+    # visualizer(w2cs, ["blue" for _ in w2cs], dataset.model_path + f"pose/poses_{args.cam_traj}.png")
+    np.save(dataset.model_path + f"pose/pose_{args.cam_traj}.npy", w2cs)
+    # 2. Load model and scene
+    with torch.no_grad():
+        gaussians = GaussianModel(dataset.sh_degree)
+        scene = Scene(dataset, gaussians, load_iteration=iteration, opt=args, shuffle=False)
+        # bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0]
+        bg_color = [1, 1, 1]
+        background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
+    # 3. Rendering
+    # render_path = os.path.join(dataset.model_path, args.cam_traj, f"ours_{iteration}", "renders")
+    # if os.path.exists(render_path):
+    #     shutil.rmtree(render_path)
+    # makedirs(render_path, exist_ok=True)
+    video = []
+    for idx, w2c in enumerate(tqdm(w2cs, desc="Rendering progress")):
+        camera_pose = get_tensor_from_camera(w2c.transpose(0, 1))
+        view = scene.getTrainCameras()[0]  # Use parameters from the first camera as template
+        if args.resize:
+            view = resize_render(view)
+        rendering = render_gsplat(
+            view, gaussians, pipeline, background, camera_pose=camera_pose
+        )["render"]
+        # # Save single frame image
+        # torchvision.utils.save_image(
+        #     rendering, os.path.join(render_path, "{0:05d}".format(idx) + ".png")
+        # )
+        # Add to video list
+        # img = (rendering.detach().cpu().numpy() * 255.0).astype(np.uint8)
+        img = (torch.clamp(rendering, 0, 1).detach().cpu().numpy() * 255.0).round().astype(np.uint8)
+        video.append(img)
+    video = np.stack(video, 0).transpose(0, 2, 3, 1)
+    # Save video
+    if args.get_video:
+        video_dir = os.path.join(dataset.model_path, 'videos')
+        os.makedirs(video_dir, exist_ok=True)
+        output_video_file = os.path.join(video_dir, f'{args.scene}_{args.n_views}_view_{args.cam_traj}.mp4')
+        # iio.imwrite(output_video_file, make_video_divisble(video), fps=30)
+        iio.imwrite(
+            output_video_file,
+            make_video_divisble(video),
+            fps=30,
+            codec='libx264',
+            quality=None,
+            output_params=[
+                '-crf', '28',     # Good quality range between 18-28
+                '-preset', 'veryslow',
+                '-pix_fmt', 'yuv420p',
+                '-movflags', '+faststart'
+            ]
+        )
+    # if args.get_video:
+    #     image_folder = os.path.join(dataset.model_path, f'{args.cam_traj}/ours_{args.iteration}/renders')
+    #     output_video_file = os.path.join(dataset.model_path, f'{args.scene}_{args.n_views}_view_{args.cam_traj}.mp4')
+    #     images_to_video(image_folder, output_video_file, fps=30)
+if __name__ == "__main__":
+    # Set up command line argument parser
+    parser = ArgumentParser(description="Testing script parameters")
+    model = ModelParams(parser, sentinel=True)
+    pipeline = PipelineParams(parser)
+    parser.add_argument("--iteration", default=-1, type=int)
+    parser.add_argument("--quiet", action="store_true")
+    parser.add_argument("--get_video", action="store_true")
+    parser.add_argument("--n_views", default=120, type=int)
+    parser.add_argument("--dataset", default=None, type=str)
+    parser.add_argument("--scene", default=None, type=str)
+    parser.add_argument("--cam_traj", default='arc', type=str,
+                       choices=['arc', 'spiral', 'lemniscate', 'wander', 'interpolated', 'ellipse'],
+                       help="Camera trajectory type")
+    parser.add_argument("--resize", action="store_true", default=True,
+                       help="If True, resize rendering to square")
+    parser.add_argument("--feat_type", type=str, nargs='*', default=None,
+                       help="Feature type(s). Multiple types can be specified for combination.")
+    parser.add_argument("--method", type=str, default='dust3r',
+                       help="Method of Initialization, e.g., 'dust3r' or 'mast3r'")
+    args = get_combined_args(parser)
+    print("Rendering " + args.model_path)
+    render_sets(
+        model.extract(args),
+        args.iteration,
+        pipeline.extract(args),
+        args,
+    )

scene/__init__.py ADDED Viewed

	@@ -0,0 +1,105 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import os
+import random
+import json
+from utils.system_utils import searchForMaxIteration
+from scene.dataset_readers import sceneLoadTypeCallbacks
+from scene.gaussian_model import GaussianModel, Feat2GaussianModel
+from arguments import ModelParams
+from utils.camera_utils import cameraList_from_camInfos, camera_to_JSON
+class Scene:
+    gaussian : GaussianModel
+    def __init__(self, args : ModelParams, gaussian : GaussianModel, load_iteration=None, opt=None, shuffle=True, resolution_scales=[1.0]):
+        """b
+        :param path: Path to colmap scene main folder.
+        """
+        self.model_path = args.model_path
+        self.loaded_iter = None
+        self.gaussians = gaussian
+        if load_iteration:
+            if load_iteration == -1:
+                self.loaded_iter = searchForMaxIteration(os.path.join(self.model_path, "point_cloud"))
+            else:
+                self.loaded_iter = load_iteration
+            print("Loading trained model at iteration {}".format(self.loaded_iter))
+        self.train_cameras = {}
+        self.test_cameras = {}
+        # self.render_cameras = {}
+        if os.path.exists(os.path.join(args.source_path, "sparse")):
+            scene_info = sceneLoadTypeCallbacks["Colmap"](args.source_path, args.images, args.eval, args, opt)
+        elif os.path.exists(os.path.join(args.source_path, "transforms_train.json")):
+            print("Found transforms_train.json file, assuming Blender data set!")
+            scene_info = sceneLoadTypeCallbacks["Blender"](args.source_path, args.white_background, args.eval)
+        else:
+            assert False, "Could not recognize scene type!"
+        if not self.loaded_iter:
+            with open(scene_info.ply_path, 'rb') as src_file, open(os.path.join(self.model_path, "input.ply") , 'wb') as dest_file:
+                dest_file.write(src_file.read())
+            json_cams = []
+            camlist = []
+            if scene_info.test_cameras:
+                camlist.extend(scene_info.test_cameras)
+            if scene_info.train_cameras:
+                camlist.extend(scene_info.train_cameras)
+            # if scene_info.render_cameras:
+            #     camlist.extend(scene_info.render_cameras)
+            for id, cam in enumerate(camlist):
+                json_cams.append(camera_to_JSON(id, cam))
+            with open(os.path.join(self.model_path, "cameras.json"), 'w') as file:
+                json.dump(json_cams, file)
+        if shuffle:
+            random.shuffle(scene_info.train_cameras)  # Multi-res consistent random shuffling
+            random.shuffle(scene_info.test_cameras)  # Multi-res consistent random shuffling
+        self.cameras_extent = scene_info.nerf_normalization["radius"]
+        for resolution_scale in resolution_scales:
+            print("Loading Training Cameras")
+            self.train_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.train_cameras, resolution_scale, args)
+            print('train_camera_num: ', len(self.train_cameras[resolution_scale]))
+            print("Loading Test Cameras")
+            self.test_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.test_cameras, resolution_scale, args)
+            print('test_camera_num: ', len(self.test_cameras[resolution_scale]))
+            # print("Loading Render Cameras")
+            # self.render_cameras[resolution_scale] = cameraList_from_camInfos(scene_info.render_cameras, resolution_scale, args)
+            # print('render_camera_num: ', len(self.render_cameras[resolution_scale]))
+        if self.loaded_iter:
+            self.gaussians.load_ply(os.path.join(self.model_path,
+                                                           "point_cloud",
+                                                           "iteration_" + str(self.loaded_iter),
+                                                           "point_cloud.ply"))
+        else:
+            self.gaussians.create_from_pcd(scene_info.point_cloud, self.cameras_extent)
+            self.gaussians.init_RT_seq(self.train_cameras)
+    def save(self, iteration):
+        point_cloud_path = os.path.join(self.model_path, "point_cloud/iteration_{}".format(iteration))
+        self.gaussians.save_ply(os.path.join(point_cloud_path, "point_cloud.ply"))
+    def getTrainCameras(self, scale=1.0):
+        return self.train_cameras[scale]
+    def getTestCameras(self, scale=1.0):
+        return self.test_cameras[scale]
+    # def getRenderCameras(self, scale=1.0):
+    #     return self.render_cameras[scale]

scene/cameras.py ADDED Viewed

	@@ -0,0 +1,71 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import torch
+from torch import nn
+import numpy as np
+from utils.graphics_utils import getWorld2View2, getProjectionMatrix
+class Camera(nn.Module):
+    def __init__(self, colmap_id, R, T, FoVx, FoVy, image, gt_alpha_mask,
+                 image_name, uid,
+                 trans=np.array([0.0, 0.0, 0.0]), scale=1.0, data_device = "cuda"
+                 ):
+        super(Camera, self).__init__()
+        self.uid = uid
+        self.colmap_id = colmap_id
+        self.R = R
+        self.T = T
+        self.FoVx = FoVx
+        self.FoVy = FoVy
+        self.image_name = image_name
+        try:
+            self.data_device = torch.device(data_device)
+        except Exception as e:
+            print(e)
+            print(f"[Warning] Custom device {data_device} failed, fallback to default cuda device" )
+            self.data_device = torch.device("cuda")
+        self.original_image = image.clamp(0.0, 1.0).to(self.data_device)
+        self.image_width = self.original_image.shape[2]
+        self.image_height = self.original_image.shape[1]
+        if gt_alpha_mask is not None:
+            self.original_image *= gt_alpha_mask.to(self.data_device)
+        else:
+            self.original_image *= torch.ones((1, self.image_height, self.image_width), device=self.data_device)
+        self.zfar = 100.0
+        self.znear = 0.01
+        self.trans = trans
+        self.scale = scale
+        self.world_view_transform = torch.tensor(getWorld2View2(R, T, trans, scale)).transpose(0, 1).cuda()
+        self.projection_matrix = getProjectionMatrix(znear=self.znear, zfar=self.zfar, fovX=self.FoVx, fovY=self.FoVy).transpose(0,1).cuda()
+        self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
+        self.camera_center = self.world_view_transform.inverse()[3, :3]
+class MiniCam:
+    def __init__(self, width, height, fovy, fovx, znear, zfar, world_view_transform, full_proj_transform):
+        self.image_width = width
+        self.image_height = height
+        self.FoVy = fovy
+        self.FoVx = fovx
+        self.znear = znear
+        self.zfar = zfar
+        self.world_view_transform = world_view_transform
+        self.full_proj_transform = full_proj_transform
+        view_inv = torch.inverse(self.world_view_transform)
+        self.camera_center = view_inv[3][:3]

scene/colmap_loader.py ADDED Viewed

	@@ -0,0 +1,294 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import numpy as np
+import collections
+import struct
+CameraModel = collections.namedtuple(
+    "CameraModel", ["model_id", "model_name", "num_params"])
+Camera = collections.namedtuple(
+    "Camera", ["id", "model", "width", "height", "params"])
+BaseImage = collections.namedtuple(
+    "Image", ["id", "qvec", "tvec", "camera_id", "name", "xys", "point3D_ids"])
+Point3D = collections.namedtuple(
+    "Point3D", ["id", "xyz", "rgb", "error", "image_ids", "point2D_idxs"])
+CAMERA_MODELS = {
+    CameraModel(model_id=0, model_name="SIMPLE_PINHOLE", num_params=3),
+    CameraModel(model_id=1, model_name="PINHOLE", num_params=4),
+    CameraModel(model_id=2, model_name="SIMPLE_RADIAL", num_params=4),
+    CameraModel(model_id=3, model_name="RADIAL", num_params=5),
+    CameraModel(model_id=4, model_name="OPENCV", num_params=8),
+    CameraModel(model_id=5, model_name="OPENCV_FISHEYE", num_params=8),
+    CameraModel(model_id=6, model_name="FULL_OPENCV", num_params=12),
+    CameraModel(model_id=7, model_name="FOV", num_params=5),
+    CameraModel(model_id=8, model_name="SIMPLE_RADIAL_FISHEYE", num_params=4),
+    CameraModel(model_id=9, model_name="RADIAL_FISHEYE", num_params=5),
+    CameraModel(model_id=10, model_name="THIN_PRISM_FISHEYE", num_params=12)
+}
+CAMERA_MODEL_IDS = dict([(camera_model.model_id, camera_model)
+                         for camera_model in CAMERA_MODELS])
+CAMERA_MODEL_NAMES = dict([(camera_model.model_name, camera_model)
+                           for camera_model in CAMERA_MODELS])
+def qvec2rotmat(qvec):
+    return np.array([
+        [1 - 2 * qvec[2]**2 - 2 * qvec[3]**2,
+         2 * qvec[1] * qvec[2] - 2 * qvec[0] * qvec[3],
+         2 * qvec[3] * qvec[1] + 2 * qvec[0] * qvec[2]],
+        [2 * qvec[1] * qvec[2] + 2 * qvec[0] * qvec[3],
+         1 - 2 * qvec[1]**2 - 2 * qvec[3]**2,
+         2 * qvec[2] * qvec[3] - 2 * qvec[0] * qvec[1]],
+        [2 * qvec[3] * qvec[1] - 2 * qvec[0] * qvec[2],
+         2 * qvec[2] * qvec[3] + 2 * qvec[0] * qvec[1],
+         1 - 2 * qvec[1]**2 - 2 * qvec[2]**2]])
+def rotmat2qvec(R):
+    Rxx, Ryx, Rzx, Rxy, Ryy, Rzy, Rxz, Ryz, Rzz = R.flat
+    K = np.array([
+        [Rxx - Ryy - Rzz, 0, 0, 0],
+        [Ryx + Rxy, Ryy - Rxx - Rzz, 0, 0],
+        [Rzx + Rxz, Rzy + Ryz, Rzz - Rxx - Ryy, 0],
+        [Ryz - Rzy, Rzx - Rxz, Rxy - Ryx, Rxx + Ryy + Rzz]]) / 3.0
+    eigvals, eigvecs = np.linalg.eigh(K)
+    qvec = eigvecs[[3, 0, 1, 2], np.argmax(eigvals)]
+    if qvec[0] < 0:
+        qvec *= -1
+    return qvec
+class Image(BaseImage):
+    def qvec2rotmat(self):
+        return qvec2rotmat(self.qvec)
+def read_next_bytes(fid, num_bytes, format_char_sequence, endian_character="<"):
+    """Read and unpack the next bytes from a binary file.
+    :param fid:
+    :param num_bytes: Sum of combination of {2, 4, 8}, e.g. 2, 6, 16, 30, etc.
+    :param format_char_sequence: List of {c, e, f, d, h, H, i, I, l, L, q, Q}.
+    :param endian_character: Any of {@, =, <, >, !}
+    :return: Tuple of read and unpacked values.
+    """
+    data = fid.read(num_bytes)
+    return struct.unpack(endian_character + format_char_sequence, data)
+def read_points3D_text(path):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DText(const std::string& path)
+        void Reconstruction::WritePoints3DText(const std::string& path)
+    """
+    xyzs = None
+    rgbs = None
+    errors = None
+    num_points = 0
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                num_points += 1
+    xyzs = np.empty((num_points, 3))
+    rgbs = np.empty((num_points, 3))
+    errors = np.empty((num_points, 1))
+    count = 0
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                xyz = np.array(tuple(map(float, elems[1:4])))
+                rgb = np.array(tuple(map(int, elems[4:7])))
+                error = np.array(float(elems[7]))
+                xyzs[count] = xyz
+                rgbs[count] = rgb
+                errors[count] = error
+                count += 1
+    return xyzs, rgbs, errors
+def read_points3D_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadPoints3DBinary(const std::string& path)
+        void Reconstruction::WritePoints3DBinary(const std::string& path)
+    """
+    with open(path_to_model_file, "rb") as fid:
+        num_points = read_next_bytes(fid, 8, "Q")[0]
+        xyzs = np.empty((num_points, 3))
+        rgbs = np.empty((num_points, 3))
+        errors = np.empty((num_points, 1))
+        for p_id in range(num_points):
+            binary_point_line_properties = read_next_bytes(
+                fid, num_bytes=43, format_char_sequence="QdddBBBd")
+            xyz = np.array(binary_point_line_properties[1:4])
+            rgb = np.array(binary_point_line_properties[4:7])
+            error = np.array(binary_point_line_properties[7])
+            track_length = read_next_bytes(
+                fid, num_bytes=8, format_char_sequence="Q")[0]
+            track_elems = read_next_bytes(
+                fid, num_bytes=8*track_length,
+                format_char_sequence="ii"*track_length)
+            xyzs[p_id] = xyz
+            rgbs[p_id] = rgb
+            errors[p_id] = error
+    return xyzs, rgbs, errors
+def read_intrinsics_text(path):
+    """
+    Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py
+    """
+    cameras = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                camera_id = int(elems[0])
+                model = elems[1]
+                assert model == "PINHOLE", "While the loader support other types, the rest of the code assumes PINHOLE"
+                width = int(elems[2])
+                height = int(elems[3])
+                params = np.array(tuple(map(float, elems[4:])))
+                cameras[camera_id] = Camera(id=camera_id, model=model,
+                                            width=width, height=height,
+                                            params=params)
+    return cameras
+def read_extrinsics_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::ReadImagesBinary(const std::string& path)
+        void Reconstruction::WriteImagesBinary(const std::string& path)
+    """
+    images = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_reg_images = read_next_bytes(fid, 8, "Q")[0]
+        for _ in range(num_reg_images):
+            binary_image_properties = read_next_bytes(
+                fid, num_bytes=64, format_char_sequence="idddddddi")
+            image_id = binary_image_properties[0]
+            qvec = np.array(binary_image_properties[1:5])
+            tvec = np.array(binary_image_properties[5:8])
+            camera_id = binary_image_properties[8]
+            image_name = ""
+            current_char = read_next_bytes(fid, 1, "c")[0]
+            while current_char != b"\x00":   # look for the ASCII 0 entry
+                image_name += current_char.decode("utf-8")
+                current_char = read_next_bytes(fid, 1, "c")[0]
+            num_points2D = read_next_bytes(fid, num_bytes=8,
+                                           format_char_sequence="Q")[0]
+            x_y_id_s = read_next_bytes(fid, num_bytes=24*num_points2D,
+                                       format_char_sequence="ddq"*num_points2D)
+            xys = np.column_stack([tuple(map(float, x_y_id_s[0::3])),
+                                   tuple(map(float, x_y_id_s[1::3]))])
+            point3D_ids = np.array(tuple(map(int, x_y_id_s[2::3])))
+            images[image_id] = Image(
+                id=image_id, qvec=qvec, tvec=tvec,
+                camera_id=camera_id, name=image_name,
+                xys=xys, point3D_ids=point3D_ids)
+    return images
+def read_intrinsics_binary(path_to_model_file):
+    """
+    see: src/base/reconstruction.cc
+        void Reconstruction::WriteCamerasBinary(const std::string& path)
+        void Reconstruction::ReadCamerasBinary(const std::string& path)
+    """
+    cameras = {}
+    with open(path_to_model_file, "rb") as fid:
+        num_cameras = read_next_bytes(fid, 8, "Q")[0]
+        for _ in range(num_cameras):
+            camera_properties = read_next_bytes(
+                fid, num_bytes=24, format_char_sequence="iiQQ")
+            camera_id = camera_properties[0]
+            model_id = camera_properties[1]
+            model_name = CAMERA_MODEL_IDS[camera_properties[1]].model_name
+            width = camera_properties[2]
+            height = camera_properties[3]
+            num_params = CAMERA_MODEL_IDS[model_id].num_params
+            params = read_next_bytes(fid, num_bytes=8*num_params,
+                                     format_char_sequence="d"*num_params)
+            cameras[camera_id] = Camera(id=camera_id,
+                                        model=model_name,
+                                        width=width,
+                                        height=height,
+                                        params=np.array(params))
+        assert len(cameras) == num_cameras
+    return cameras
+def read_extrinsics_text(path):
+    """
+    Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_write_model.py
+    """
+    images = {}
+    with open(path, "r") as fid:
+        while True:
+            line = fid.readline()
+            if not line:
+                break
+            line = line.strip()
+            if len(line) > 0 and line[0] != "#":
+                elems = line.split()
+                image_id = int(elems[0])
+                qvec = np.array(tuple(map(float, elems[1:5])))
+                tvec = np.array(tuple(map(float, elems[5:8])))
+                camera_id = int(elems[8])
+                image_name = elems[9]
+                elems = fid.readline().split()
+                xys = np.column_stack([tuple(map(float, elems[0::3])),
+                                       tuple(map(float, elems[1::3]))])
+                point3D_ids = np.array(tuple(map(int, elems[2::3])))
+                images[image_id] = Image(
+                    id=image_id, qvec=qvec, tvec=tvec,
+                    camera_id=camera_id, name=image_name,
+                    xys=xys, point3D_ids=point3D_ids)
+    return images
+def read_colmap_bin_array(path):
+    """
+    Taken from https://github.com/colmap/colmap/blob/dev/scripts/python/read_dense.py
+    :param path: path to the colmap binary file.
+    :return: nd array with the floating point values in the value
+    """
+    with open(path, "rb") as fid:
+        width, height, channels = np.genfromtxt(fid, delimiter="&", max_rows=1,
+                                                usecols=(0, 1, 2), dtype=int)
+        fid.seek(0)
+        num_delimiter = 0
+        byte = fid.read(1)
+        while True:
+            if byte == b"&":
+                num_delimiter += 1
+                if num_delimiter >= 3:
+                    break
+            byte = fid.read(1)
+        array = np.fromfile(fid, np.float32)
+    array = array.reshape((width, height, channels), order="F")
+    return np.transpose(array, (1, 0, 2)).squeeze()

scene/dataset_readers.py ADDED Viewed

	@@ -0,0 +1,382 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import os
+import sys
+from PIL import Image
+from typing import NamedTuple
+from scene.colmap_loader import read_extrinsics_text, read_intrinsics_text, qvec2rotmat, \
+    read_extrinsics_binary, read_intrinsics_binary, read_points3D_binary, read_points3D_text
+from utils.graphics_utils import getWorld2View2, focal2fov, fov2focal
+import numpy as np
+import json
+from pathlib import Path
+from plyfile import PlyData, PlyElement
+from utils.sh_utils import SH2RGB
+from scene.gaussian_model import BasicPointCloud
+# from utils.camera_utils import generate_ellipse_path_from_camera_infos
+class CameraInfo(NamedTuple):
+    uid: int
+    R: np.array
+    T: np.array
+    FovY: np.array
+    FovX: np.array
+    image: np.array
+    image_path: str
+    image_name: str
+    width: int
+    height: int
+class SceneInfo(NamedTuple):
+    point_cloud: BasicPointCloud
+    train_cameras: list
+    test_cameras: list
+    # render_cameras: list
+    nerf_normalization: dict
+    ply_path: str
+    train_poses: list
+    test_poses: list
+def getNerfppNorm(cam_info):
+    def get_center_and_diag(cam_centers):
+        cam_centers = np.hstack(cam_centers)
+        avg_cam_center = np.mean(cam_centers, axis=1, keepdims=True)
+        center = avg_cam_center
+        dist = np.linalg.norm(cam_centers - center, axis=0, keepdims=True)
+        diagonal = np.max(dist)
+        return center.flatten(), diagonal
+    cam_centers = []
+    for cam in cam_info:
+        W2C = getWorld2View2(cam.R, cam.T)
+        C2W = np.linalg.inv(W2C)
+        cam_centers.append(C2W[:3, 3:4])
+    center, diagonal = get_center_and_diag(cam_centers)
+    radius = diagonal * 1.1
+    translate = -center
+    return {"translate": translate, "radius": radius}
+def readColmapCameras(cam_extrinsics, cam_intrinsics, images_folder, eval):
+    cam_infos = []
+    poses=[]
+    for idx, key in enumerate(cam_extrinsics):
+        sys.stdout.write('\r')
+        # the exact output you're looking for:
+        sys.stdout.write("Reading camera {}/{}".format(idx+1, len(cam_extrinsics)))
+        sys.stdout.flush()
+        if eval:
+            extr = cam_extrinsics[key]
+            intr = cam_intrinsics[1]
+            uid = idx+1
+        else:
+            extr = cam_extrinsics[key]
+            intr = cam_intrinsics[extr.camera_id]
+            uid = intr.id
+        height = intr.height
+        width = intr.width
+        R = np.transpose(qvec2rotmat(extr.qvec))
+        T = np.array(extr.tvec)
+        pose =  np.vstack((np.hstack((R, T.reshape(3,-1))),np.array([[0, 0, 0, 1]])))
+        poses.append(pose)
+        if intr.model=="SIMPLE_PINHOLE":
+            focal_length_x = intr.params[0]
+            FovY = focal2fov(focal_length_x, height)
+            FovX = focal2fov(focal_length_x, width)
+        elif intr.model=="PINHOLE":
+            focal_length_x = intr.params[0]
+            focal_length_y = intr.params[1]
+            FovY = focal2fov(focal_length_y, height)
+            FovX = focal2fov(focal_length_x, width)
+        else:
+            assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
+        if eval:
+            tmp = os.path.dirname(os.path.dirname(os.path.join(images_folder)))
+            all_images_folder = os.path.join(tmp, 'images')
+            image_path = os.path.join(all_images_folder, os.path.basename(extr.name))
+        else:
+            image_path = os.path.join(images_folder, os.path.basename(extr.name))
+        image_name = os.path.basename(image_path).split(".")[0]
+        image = Image.open(image_path)
+        cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
+                              image_path=image_path, image_name=image_name, width=width, height=height)
+        cam_infos.append(cam_info)
+    sys.stdout.write('\n')
+    return cam_infos, poses
+# For interpolated video, open when only render interpolated video
+def readColmapCamerasInterp(cam_extrinsics, cam_intrinsics, images_folder, model_path, cam_traj):
+    # pose_interpolated_path = model_path + 'pose/pose_interpolated.npy'
+    pose_interpolated_path = model_path + f'pose/pose_{cam_traj}.npy'
+    pose_interpolated = np.load(pose_interpolated_path)
+    intr = cam_intrinsics[1]
+    cam_infos = []
+    poses=[]
+    for idx, pose_npy in enumerate(pose_interpolated):
+        sys.stdout.write('\r')
+        sys.stdout.write("Reading camera {}/{}".format(idx+1, pose_interpolated.shape[0]))
+        sys.stdout.flush()
+        extr = pose_npy
+        intr = intr
+        height = intr.height
+        width = intr.width
+        uid = idx
+        R = extr[:3, :3].transpose()
+        T = extr[:3, 3]
+        pose =  np.vstack((np.hstack((R, T.reshape(3,-1))),np.array([[0, 0, 0, 1]])))
+        # print(uid)
+        # print(pose.shape)
+        # pose = np.linalg.inv(pose)
+        poses.append(pose)
+        if intr.model=="SIMPLE_PINHOLE":
+            focal_length_x = intr.params[0]
+            FovY = focal2fov(focal_length_x, height)
+            FovX = focal2fov(focal_length_x, width)
+        elif intr.model=="PINHOLE":
+            focal_length_x = intr.params[0]
+            focal_length_y = intr.params[1]
+            FovY = focal2fov(focal_length_y, height)
+            FovX = focal2fov(focal_length_x, width)
+        else:
+            assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
+        images_list = os.listdir(os.path.join(images_folder))
+        image_name_0 = images_list[0]
+        image_name = str(idx).zfill(4)
+        image = Image.open(images_folder + '/' + image_name_0)
+        cam_info = CameraInfo(uid=uid, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
+                              image_path=images_folder, image_name=image_name, width=width, height=height)
+        cam_infos.append(cam_info)
+    sys.stdout.write('\n')
+    return cam_infos, poses
+def fetchPly(path):
+    plydata = PlyData.read(path)
+    vertices = plydata['vertex']
+    positions = np.vstack([vertices['x'], vertices['y'], vertices['z']]).T
+    colors = np.vstack([vertices['red'], vertices['green'], vertices['blue']]).T / 255.0
+    normals = np.vstack([vertices['nx'], vertices['ny'], vertices['nz']]).T
+    features = None
+    feat_keys = [key for key in vertices.data.dtype.names if key.startswith('feat_')]
+    if feat_keys:
+        features = np.vstack([vertices[key] for key in feat_keys]).T
+    return BasicPointCloud(points=positions, colors=colors, normals=normals, features=features)
+def storePly(path, xyz, rgb):
+    # Define the dtype for the structured array
+    dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
+            ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'),
+            ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')]
+    normals = np.zeros_like(xyz)
+    elements = np.empty(xyz.shape[0], dtype=dtype)
+    attributes = np.concatenate((xyz, normals, rgb), axis=1)
+    elements[:] = list(map(tuple, attributes))
+    # Create the PlyData object and write to file
+    vertex_element = PlyElement.describe(elements, 'vertex')
+    ply_data = PlyData([vertex_element])
+    ply_data.write(path)
+def readColmapSceneInfo(path, images, eval, args, opt, llffhold=2):
+    # try:
+    #     cameras_extrinsic_file = os.path.join(path, "sparse/0", "images.bin")
+    #     cameras_intrinsic_file = os.path.join(path, "sparse/0", "cameras.bin")
+    #     cam_extrinsics = read_extrinsics_binary(cameras_extrinsic_file)
+    #     cam_intrinsics = read_intrinsics_binary(cameras_intrinsic_file)
+    # except:
+    ##### For initializing test pose using PCD_Registration
+    if eval and opt.get_video==False:
+        print("Loading initial test pose for evaluation.")
+        cameras_extrinsic_file = os.path.join(path, f"test_view/sparse/0/{opt.method}", "images.txt")
+    else:
+        cameras_extrinsic_file = os.path.join(path, f"sparse/0/{opt.method}", "images.txt")
+    cameras_intrinsic_file = os.path.join(path, f"sparse/0/{opt.method}", "cameras.txt")
+    if hasattr(opt, 'feat_type') and opt.feat_type is not None:
+        feat_type_str = '-'.join(opt.feat_type)
+        if "test_view" not in cameras_extrinsic_file:
+            cameras_extrinsic_file = cameras_extrinsic_file.replace("images.txt", f"{feat_type_str}/images.txt")
+        cameras_intrinsic_file = cameras_intrinsic_file.replace("cameras.txt", f"{feat_type_str}/cameras.txt")
+    cam_extrinsics = read_extrinsics_text(cameras_extrinsic_file)
+    cam_intrinsics = read_intrinsics_text(cameras_intrinsic_file)
+    reading_dir = "images" if images == None else images
+    if opt.get_video:
+        cam_infos_unsorted, poses = readColmapCamerasInterp(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics,
+                                                            images_folder=os.path.join(path, reading_dir),
+                                                            model_path=args.model_path, cam_traj=opt.cam_traj)
+    else:
+        cam_infos_unsorted, poses = readColmapCameras(cam_extrinsics=cam_extrinsics, cam_intrinsics=cam_intrinsics, images_folder=os.path.join(path, reading_dir), eval=eval)
+    sorting_indices = sorted(range(len(cam_infos_unsorted)), key=lambda x: cam_infos_unsorted[x].image_name)
+    cam_infos = [cam_infos_unsorted[i] for i in sorting_indices]
+    sorted_poses = [poses[i] for i in sorting_indices]
+    cam_infos = sorted(cam_infos_unsorted.copy(), key = lambda x : x.image_name)
+    if eval:
+        # train_cam_infos = [c for idx, c in enumerate(cam_infos) if (idx+1) % llffhold != 0]
+        # test_cam_infos = [c for idx, c in enumerate(cam_infos) if (idx+1) % llffhold == 0]
+        # train_poses = [c for idx, c in enumerate(sorted_poses) if (idx+1) % llffhold != 0]
+        # test_poses = [c for idx, c in enumerate(sorted_poses) if (idx+1) % llffhold == 0]
+        train_cam_infos = cam_infos
+        test_cam_infos = cam_infos
+        train_poses = sorted_poses
+        test_poses = sorted_poses
+    else:
+        train_cam_infos = cam_infos
+        test_cam_infos = []
+        train_poses = sorted_poses
+        test_poses = []
+    # render_cam_infos = generate_ellipse_path_from_camera_infos(cam_infos)
+    nerf_normalization = getNerfppNorm(train_cam_infos)
+    ply_path = os.path.join(path, f"sparse/0/{opt.method}/points3D.ply")
+    if hasattr(opt, 'feat_type') and opt.feat_type is not None:
+        ply_path = ply_path.replace("points3D.ply", f"{feat_type_str}/points3D.ply")
+    bin_path = os.path.join(path, "sparse/0/points3D.bin")
+    txt_path = os.path.join(path, "sparse/0/points3D.txt")
+    if not os.path.exists(ply_path):
+        print("Converting point3d.bin to .ply, will happen only the first time you open the scene.")
+        try:
+            xyz, rgb, _ = read_points3D_binary(bin_path)
+        except:
+            xyz, rgb, _ = read_points3D_text(txt_path)
+        storePly(ply_path, xyz, rgb)
+    try:
+        pcd = fetchPly(ply_path)
+    except:
+        pcd = None
+    # np.save("poses_family.npy", sorted_poses)
+    # breakpoint()
+    # np.save("3dpoints.npy", pcd.points)
+    # np.save("3dcolors.npy", pcd.colors)
+    scene_info = SceneInfo(point_cloud=pcd,
+                           train_cameras=train_cam_infos,
+                           test_cameras=test_cam_infos,
+                        #    render_cameras=render_cam_infos,
+                           nerf_normalization=nerf_normalization,
+                           ply_path=ply_path,
+                           train_poses=train_poses,
+                           test_poses=test_poses)
+    return scene_info
+def readCamerasFromTransforms(path, transformsfile, white_background, extension=".png"):
+    cam_infos = []
+    with open(os.path.join(path, transformsfile)) as json_file:
+        contents = json.load(json_file)
+        fovx = contents["camera_angle_x"]
+        frames = contents["frames"]
+        for idx, frame in enumerate(frames):
+            cam_name = os.path.join(path, frame["file_path"] + extension)
+            # NeRF 'transform_matrix' is a camera-to-world transform
+            c2w = np.array(frame["transform_matrix"])
+            # change from OpenGL/Blender camera axes (Y up, Z back) to COLMAP (Y down, Z forward)
+            c2w[:3, 1:3] *= -1
+            # get the world-to-camera transform and set R, T
+            w2c = np.linalg.inv(c2w)
+            R = np.transpose(w2c[:3,:3])  # R is stored transposed due to 'glm' in CUDA code
+            T = w2c[:3, 3]
+            image_path = os.path.join(path, cam_name)
+            image_name = Path(cam_name).stem
+            image = Image.open(image_path)
+            im_data = np.array(image.convert("RGBA"))
+            bg = np.array([1,1,1]) if white_background else np.array([0, 0, 0])
+            norm_data = im_data / 255.0
+            arr = norm_data[:,:,:3] * norm_data[:, :, 3:4] + bg * (1 - norm_data[:, :, 3:4])
+            image = Image.fromarray(np.array(arr*255.0, dtype=np.byte), "RGB")
+            fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1])
+            FovY = fovy
+            FovX = fovx
+            cam_infos.append(CameraInfo(uid=idx, R=R, T=T, FovY=FovY, FovX=FovX, image=image,
+                            image_path=image_path, image_name=image_name, width=image.size[0], height=image.size[1]))
+    return cam_infos
+def readNerfSyntheticInfo(path, white_background, eval, extension=".png"):
+    print("Reading Training Transforms")
+    train_cam_infos = readCamerasFromTransforms(path, "transforms_train.json", white_background, extension)
+    print("Reading Test Transforms")
+    test_cam_infos = readCamerasFromTransforms(path, "transforms_test.json", white_background, extension)
+    if not eval:
+        train_cam_infos.extend(test_cam_infos)
+        test_cam_infos = []
+    nerf_normalization = getNerfppNorm(train_cam_infos)
+    ply_path = os.path.join(path, "points3d.ply")
+    if not os.path.exists(ply_path):
+        # Since this data set has no colmap data, we start with random points
+        num_pts = 100_000
+        print(f"Generating random point cloud ({num_pts})...")
+        # We create random points inside the bounds of the synthetic Blender scenes
+        xyz = np.random.random((num_pts, 3)) * 2.6 - 1.3
+        shs = np.random.random((num_pts, 3)) / 255.0
+        pcd = BasicPointCloud(points=xyz, colors=SH2RGB(shs), normals=np.zeros((num_pts, 3)))
+        storePly(ply_path, xyz, SH2RGB(shs) * 255)
+    try:
+        pcd = fetchPly(ply_path)
+    except:
+        pcd = None
+    scene_info = SceneInfo(point_cloud=pcd,
+                           train_cameras=train_cam_infos,
+                           test_cameras=test_cam_infos,
+                           nerf_normalization=nerf_normalization,
+                           ply_path=ply_path)
+    return scene_info
+sceneLoadTypeCallbacks = {
+    "Colmap": readColmapSceneInfo,
+    "Blender" : readNerfSyntheticInfo
+}

scene/gaussian_model.py ADDED Viewed

	@@ -0,0 +1,830 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import torch
+# from lietorch import SO3, SE3, Sim3, LieGroupParameter
+import numpy as np
+from utils.general_utils import inverse_sigmoid, get_expon_lr_func, build_rotation
+from torch import nn
+import os
+from utils.system_utils import mkdir_p
+from plyfile import PlyData, PlyElement
+from utils.sh_utils import RGB2SH
+from simple_knn._C import distCUDA2
+from utils.graphics_utils import BasicPointCloud
+from utils.general_utils import strip_symmetric, build_scaling_rotation
+from scipy.spatial.transform import Rotation as R
+from utils.pose_utils import rotation2quad, get_tensor_from_camera
+from utils.graphics_utils import getWorld2View2
+import torch.nn.functional as F
+def quaternion_to_rotation_matrix(quaternion):
+    """
+    Convert a quaternion to a rotation matrix.
+    Parameters:
+    - quaternion: A tensor of shape (..., 4) representing quaternions.
+    Returns:
+    - A tensor of shape (..., 3, 3) representing rotation matrices.
+    """
+    # Ensure quaternion is of float type for computation
+    quaternion = quaternion.float()
+    # Normalize the quaternion to unit length
+    quaternion = quaternion / quaternion.norm(p=2, dim=-1, keepdim=True)
+    # Extract components
+    w, x, y, z = quaternion[..., 0], quaternion[..., 1], quaternion[..., 2], quaternion[..., 3]
+    # Compute rotation matrix components
+    xx, yy, zz = x * x, y * y, z * z
+    xy, xz, yz = x * y, x * z, y * z
+    xw, yw, zw = x * w, y * w, z * w
+    # Assemble the rotation matrix
+    R = torch.stack([
+        torch.stack([1 - 2 * (yy + zz),     2 * (xy - zw),     2 * (xz + yw)], dim=-1),
+        torch.stack([    2 * (xy + zw), 1 - 2 * (xx + zz),     2 * (yz - xw)], dim=-1),
+        torch.stack([    2 * (xz - yw),     2 * (yz + xw), 1 - 2 * (xx + yy)], dim=-1)
+    ], dim=-2)
+    return R
+class GaussianModel:
+    def setup_functions(self):
+        def build_covariance_from_scaling_rotation(scaling, scaling_modifier, rotation):
+            L = build_scaling_rotation(scaling_modifier * scaling, rotation)
+            actual_covariance = L @ L.transpose(1, 2)
+            symm = strip_symmetric(actual_covariance)
+            return symm
+        self.scaling_activation = torch.exp
+        self.scaling_inverse_activation = torch.log
+        self.covariance_activation = build_covariance_from_scaling_rotation
+        self.opacity_activation = torch.sigmoid
+        self.inverse_opacity_activation = inverse_sigmoid
+        self.rotation_activation = torch.nn.functional.normalize
+    def __init__(self, sh_degree : int):
+        # self.active_sh_degree = 0
+        self.active_sh_degree = sh_degree
+        self.max_sh_degree = sh_degree
+        self._xyz = torch.empty(0)
+        self._features_dc = torch.empty(0)
+        self._features_rest = torch.empty(0)
+        self._scaling = torch.empty(0)
+        self._rotation = torch.empty(0)
+        self._opacity = torch.empty(0)
+        self.max_radii2D = torch.empty(0)
+        self.xyz_gradient_accum = torch.empty(0)
+        self.denom = torch.empty(0)
+        self.optimizer = None
+        self.percent_dense = 0
+        self.spatial_lr_scale = 0
+        self.param_init = {}
+        self.setup_functions()
+    def capture(self):
+        return (
+            self.active_sh_degree,
+            self._xyz,
+            self._features_dc,
+            self._features_rest,
+            self._scaling,
+            self._rotation,
+            self._opacity,
+            self.max_radii2D,
+            self.xyz_gradient_accum,
+            self.denom,
+            self.optimizer.state_dict(),
+            self.spatial_lr_scale,
+            self.P,
+        )
+    def restore(self, model_args, training_args):
+        (self.active_sh_degree,
+        self._xyz,
+        self._features_dc,
+        self._features_rest,
+        self._scaling,
+        self._rotation,
+        self._opacity,
+        self.max_radii2D,
+        xyz_gradient_accum,
+        denom,
+        opt_dict,
+        self.spatial_lr_scale,
+        self.P) = model_args
+        self.training_setup(training_args)
+        self.xyz_gradient_accum = xyz_gradient_accum
+        self.denom = denom
+        self.optimizer.load_state_dict(opt_dict)
+    @property
+    def get_scaling(self):
+        return self.scaling_activation(self._scaling)
+    @property
+    def get_rotation(self):
+        return self.rotation_activation(self._rotation)
+    @property
+    def get_xyz(self):
+        return self._xyz
+    def compute_relative_world_to_camera(self, R1, t1, R2, t2):
+        # Create a row of zeros with a one at the end, for homogeneous coordinates
+        zero_row = np.array([[0, 0, 0, 1]], dtype=np.float32)
+        # Compute the inverse of the first extrinsic matrix
+        E1_inv = np.hstack([R1.T, -R1.T @ t1.reshape(-1, 1)])  # Transpose and reshape for correct dimensions
+        E1_inv = np.vstack([E1_inv, zero_row])  # Append the zero_row to make it a 4x4 matrix
+        # Compute the second extrinsic matrix
+        E2 = np.hstack([R2, -R2 @ t2.reshape(-1, 1)])  # No need to transpose R2
+        E2 = np.vstack([E2, zero_row])  # Append the zero_row to make it a 4x4 matrix
+        # Compute the relative transformation
+        E_rel = E2 @ E1_inv
+        return E_rel
+    def init_RT_seq(self, cam_list):
+        poses =[]
+        for cam in cam_list[1.0]:
+            p = get_tensor_from_camera(cam.world_view_transform.transpose(0, 1)) # R T -> quat t
+            poses.append(p)
+        poses = torch.stack(poses)
+        self.P = poses.cuda().requires_grad_(True)
+        # poses_ = torch.randn(poses.detach().clone().shape, device='cuda')
+        # self.P = poses_.cuda().requires_grad_(True)
+        self.param_init['pose'] = poses.detach().clone()
+    def get_RT(self, idx):
+        pose = self.P[idx]
+        return pose
+    def get_RT_test(self, idx):
+        pose = self.test_P[idx]
+        return pose
+    @property
+    def get_features(self):
+        features_dc = self._features_dc
+        features_rest = self._features_rest
+        return torch.cat((features_dc, features_rest), dim=1)
+    @property
+    def get_opacity(self):
+        return self.opacity_activation(self._opacity)
+    def get_covariance(self, scaling_modifier = 1):
+        return self.covariance_activation(self.get_scaling, scaling_modifier, self._rotation)
+    def oneupSHdegree(self):
+        if self.active_sh_degree < self.max_sh_degree:
+            self.active_sh_degree += 1
+    def create_from_pcd(self, pcd : BasicPointCloud, spatial_lr_scale : float):
+        self.spatial_lr_scale = spatial_lr_scale
+        fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda()
+        fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda())
+        features = torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2)).float().cuda()
+        features[:, :3, 0 ] = fused_color
+        features[:, 3:, 1:] = 0.0
+        print("Number of points at initialisation : ", fused_point_cloud.shape[0])
+        dist2 = torch.clamp_min(distCUDA2(torch.from_numpy(np.asarray(pcd.points)).float().cuda()), 0.0000001)
+        scales = torch.log(torch.sqrt(dist2))[...,None].repeat(1, 3)
+        rots = torch.zeros((fused_point_cloud.shape[0], 4), device="cuda")
+        rots[:, 0] = 1
+        opacities = inverse_sigmoid(0.1 * torch.ones((fused_point_cloud.shape[0], 1), dtype=torch.float, device="cuda"))
+        self._xyz = nn.Parameter(fused_point_cloud.requires_grad_(True))
+        self._features_dc = nn.Parameter(features[:,:,0:1].transpose(1, 2).contiguous().requires_grad_(True))
+        self._features_rest = nn.Parameter(features[:,:,1:].transpose(1, 2).contiguous().requires_grad_(True))
+        self._scaling = nn.Parameter(scales.requires_grad_(True))
+        self._rotation = nn.Parameter(rots.requires_grad_(True))
+        self._opacity = nn.Parameter(opacities.requires_grad_(True))
+        self.max_radii2D = torch.zeros((self.get_xyz.shape[0]), device="cuda")
+        self.param_init.update({
+            'xyz': fused_point_cloud.detach().clone(),
+            'f_dc':  features[:,:,0:1].transpose(1, 2).contiguous().detach().clone(),
+            'f_rest': features[:,:,1:].transpose(1, 2).contiguous().detach().clone(),
+            'opacity': opacities.detach().clone(),
+            'scaling': scales.detach().clone(),
+            'rotation': rots.detach().clone(),
+        })
+    def training_setup(self, training_args):
+        self.percent_dense = training_args.percent_dense
+        self.xyz_gradient_accum = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        l = [
+            {'params': [self._xyz], 'lr': training_args.position_lr_init * self.spatial_lr_scale, "name": "xyz"},
+            {'params': [self._features_dc], 'lr': training_args.feature_lr, "name": "f_dc"},
+            {'params': [self._features_rest], 'lr': training_args.feature_lr / 20.0, "name": "f_rest"},
+            {'params': [self._opacity], 'lr': training_args.opacity_lr, "name": "opacity"},
+            {'params': [self._scaling], 'lr': training_args.scaling_lr, "name": "scaling"},
+            {'params': [self._rotation], 'lr': training_args.rotation_lr, "name": "rotation"},
+        ]
+        l_cam = [{'params': [self.P],'lr': training_args.rotation_lr*0.1, "name": "pose"},]
+        # l_cam = [{'params': [self.P],'lr': training_args.rotation_lr, "name": "pose"},]
+        l += l_cam
+        self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15)
+        self.xyz_scheduler_args = get_expon_lr_func(lr_init=training_args.position_lr_init*self.spatial_lr_scale,
+                                                    lr_final=training_args.position_lr_final*self.spatial_lr_scale,
+                                                    lr_delay_mult=training_args.position_lr_delay_mult,
+                                                    max_steps=training_args.position_lr_max_steps)
+        self.cam_scheduler_args = get_expon_lr_func(
+                                                    # lr_init=0,
+                                                    # lr_final=0,
+                                                    lr_init=training_args.rotation_lr*0.1,
+                                                    lr_final=training_args.rotation_lr*0.001,
+                                                    # lr_init=training_args.position_lr_init*self.spatial_lr_scale*10,
+                                                    # lr_final=training_args.position_lr_final*self.spatial_lr_scale*10,
+                                                    lr_delay_mult=training_args.position_lr_delay_mult,
+                                                    max_steps=1000)
+    def update_learning_rate(self, iteration):
+        ''' Learning rate scheduling per step '''
+        for param_group in self.optimizer.param_groups:
+            if param_group["name"] == "pose":
+                lr = self.cam_scheduler_args(iteration)
+                # print("pose learning rate", iteration, lr)
+                param_group['lr'] = lr
+            if param_group["name"] == "xyz":
+                lr = self.xyz_scheduler_args(iteration)
+                param_group['lr'] = lr
+        # return lr
+    def construct_list_of_attributes(self):
+        l = ['x', 'y', 'z', 'nx', 'ny', 'nz']
+        # All channels except the 3 DC
+        for i in range(self._features_dc.shape[1]*self._features_dc.shape[2]):
+            l.append('f_dc_{}'.format(i))
+        for i in range(self._features_rest.shape[1]*self._features_rest.shape[2]):
+            l.append('f_rest_{}'.format(i))
+        l.append('opacity')
+        for i in range(self._scaling.shape[1]):
+            l.append('scale_{}'.format(i))
+        for i in range(self._rotation.shape[1]):
+            l.append('rot_{}'.format(i))
+        return l
+    def save_ply(self, path):
+        mkdir_p(os.path.dirname(path))
+        xyz = self._xyz.detach().cpu().numpy()
+        normals = np.zeros_like(xyz)
+        f_dc = self._features_dc.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy()
+        f_rest = self._features_rest.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy()
+        opacities = self._opacity.detach().cpu().numpy()
+        scale = self._scaling.detach().cpu().numpy()
+        rotation = self._rotation.detach().cpu().numpy()
+        dtype_full = [(attribute, 'f4') for attribute in self.construct_list_of_attributes()]
+        elements = np.empty(xyz.shape[0], dtype=dtype_full)
+        attributes = np.concatenate((xyz, normals, f_dc, f_rest, opacities, scale, rotation), axis=1)
+        elements[:] = list(map(tuple, attributes))
+        el = PlyElement.describe(elements, 'vertex')
+        PlyData([el]).write(path)
+    def reset_opacity(self):
+        opacities_new = inverse_sigmoid(torch.min(self.get_opacity, torch.ones_like(self.get_opacity)*0.01))
+        optimizable_tensors = self.replace_tensor_to_optimizer(opacities_new, "opacity")
+        self._opacity = optimizable_tensors["opacity"]
+    def load_ply(self, path):
+        plydata = PlyData.read(path)
+        xyz = np.stack((np.asarray(plydata.elements[0]["x"]),
+                        np.asarray(plydata.elements[0]["y"]),
+                        np.asarray(plydata.elements[0]["z"])),  axis=1)
+        opacities = np.asarray(plydata.elements[0]["opacity"])[..., np.newaxis]
+        features_dc = np.zeros((xyz.shape[0], 3, 1))
+        features_dc[:, 0, 0] = np.asarray(plydata.elements[0]["f_dc_0"])
+        features_dc[:, 1, 0] = np.asarray(plydata.elements[0]["f_dc_1"])
+        features_dc[:, 2, 0] = np.asarray(plydata.elements[0]["f_dc_2"])
+        extra_f_names = [p.name for p in plydata.elements[0].properties if p.name.startswith("f_rest_")]
+        extra_f_names = sorted(extra_f_names, key = lambda x: int(x.split('_')[-1]))
+        assert len(extra_f_names)==3*(self.max_sh_degree + 1) ** 2 - 3
+        features_extra = np.zeros((xyz.shape[0], len(extra_f_names)))
+        for idx, attr_name in enumerate(extra_f_names):
+            features_extra[:, idx] = np.asarray(plydata.elements[0][attr_name])
+        # Reshape (P,F*SH_coeffs) to (P, F, SH_coeffs except DC)
+        features_extra = features_extra.reshape((features_extra.shape[0], 3, (self.max_sh_degree + 1) ** 2 - 1))
+        scale_names = [p.name for p in plydata.elements[0].properties if p.name.startswith("scale_")]
+        scale_names = sorted(scale_names, key = lambda x: int(x.split('_')[-1]))
+        scales = np.zeros((xyz.shape[0], len(scale_names)))
+        for idx, attr_name in enumerate(scale_names):
+            scales[:, idx] = np.asarray(plydata.elements[0][attr_name])
+        rot_names = [p.name for p in plydata.elements[0].properties if p.name.startswith("rot")]
+        rot_names = sorted(rot_names, key = lambda x: int(x.split('_')[-1]))
+        rots = np.zeros((xyz.shape[0], len(rot_names)))
+        for idx, attr_name in enumerate(rot_names):
+            rots[:, idx] = np.asarray(plydata.elements[0][attr_name])
+        self._xyz = nn.Parameter(torch.tensor(xyz, dtype=torch.float, device="cuda").requires_grad_(True))
+        self._features_dc = nn.Parameter(torch.tensor(features_dc, dtype=torch.float, device="cuda").transpose(1, 2).contiguous().requires_grad_(True))
+        self._features_rest = nn.Parameter(torch.tensor(features_extra, dtype=torch.float, device="cuda").transpose(1, 2).contiguous().requires_grad_(True))
+        self._opacity = nn.Parameter(torch.tensor(opacities, dtype=torch.float, device="cuda").requires_grad_(True))
+        self._scaling = nn.Parameter(torch.tensor(scales, dtype=torch.float, device="cuda").requires_grad_(True))
+        self._rotation = nn.Parameter(torch.tensor(rots, dtype=torch.float, device="cuda").requires_grad_(True))
+        self.active_sh_degree = self.max_sh_degree
+    def replace_tensor_to_optimizer(self, tensor, name):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            if group["name"] == name:
+                # breakpoint()
+                stored_state = self.optimizer.state.get(group['params'][0], None)
+                stored_state["exp_avg"] = torch.zeros_like(tensor)
+                stored_state["exp_avg_sq"] = torch.zeros_like(tensor)
+                del self.optimizer.state[group['params'][0]]
+                group["params"][0] = nn.Parameter(tensor.requires_grad_(True))
+                self.optimizer.state[group['params'][0]] = stored_state
+                optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def _prune_optimizer(self, mask):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            stored_state = self.optimizer.state.get(group['params'][0], None)
+            if stored_state is not None:
+                stored_state["exp_avg"] = stored_state["exp_avg"][mask]
+                stored_state["exp_avg_sq"] = stored_state["exp_avg_sq"][mask]
+                del self.optimizer.state[group['params'][0]]
+                group["params"][0] = nn.Parameter((group["params"][0][mask].requires_grad_(True)))
+                self.optimizer.state[group['params'][0]] = stored_state
+                optimizable_tensors[group["name"]] = group["params"][0]
+            else:
+                group["params"][0] = nn.Parameter(group["params"][0][mask].requires_grad_(True))
+                optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def prune_points(self, mask):
+        valid_points_mask = ~mask
+        optimizable_tensors = self._prune_optimizer(valid_points_mask)
+        self._xyz = optimizable_tensors["xyz"]
+        self._features_dc = optimizable_tensors["f_dc"]
+        self._features_rest = optimizable_tensors["f_rest"]
+        self._opacity = optimizable_tensors["opacity"]
+        self._scaling = optimizable_tensors["scaling"]
+        self._rotation = optimizable_tensors["rotation"]
+        self.xyz_gradient_accum = self.xyz_gradient_accum[valid_points_mask]
+        self.denom = self.denom[valid_points_mask]
+        self.max_radii2D = self.max_radii2D[valid_points_mask]
+    def cat_tensors_to_optimizer(self, tensors_dict):
+        optimizable_tensors = {}
+        for group in self.optimizer.param_groups:
+            assert len(group["params"]) == 1
+            extension_tensor = tensors_dict[group["name"]]
+            stored_state = self.optimizer.state.get(group['params'][0], None)
+            if stored_state is not None:
+                stored_state["exp_avg"] = torch.cat((stored_state["exp_avg"], torch.zeros_like(extension_tensor)), dim=0)
+                stored_state["exp_avg_sq"] = torch.cat((stored_state["exp_avg_sq"], torch.zeros_like(extension_tensor)), dim=0)
+                del self.optimizer.state[group['params'][0]]
+                group["params"][0] = nn.Parameter(torch.cat((group["params"][0], extension_tensor), dim=0).requires_grad_(True))
+                self.optimizer.state[group['params'][0]] = stored_state
+                optimizable_tensors[group["name"]] = group["params"][0]
+            else:
+                group["params"][0] = nn.Parameter(torch.cat((group["params"][0], extension_tensor), dim=0).requires_grad_(True))
+                optimizable_tensors[group["name"]] = group["params"][0]
+        return optimizable_tensors
+    def densification_postfix(self, new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation):
+        d = {"xyz": new_xyz,
+        "f_dc": new_features_dc,
+        "f_rest": new_features_rest,
+        "opacity": new_opacities,
+        "scaling" : new_scaling,
+        "rotation" : new_rotation}
+        optimizable_tensors = self.cat_tensors_to_optimizer(d)
+        self._xyz = optimizable_tensors["xyz"]
+        self._features_dc = optimizable_tensors["f_dc"]
+        self._features_rest = optimizable_tensors["f_rest"]
+        self._opacity = optimizable_tensors["opacity"]
+        self._scaling = optimizable_tensors["scaling"]
+        self._rotation = optimizable_tensors["rotation"]
+        self.xyz_gradient_accum = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        self.max_radii2D = torch.zeros((self.get_xyz.shape[0]), device="cuda")
+    def densify_and_split(self, grads, grad_threshold, scene_extent, N=2):
+        n_init_points = self.get_xyz.shape[0]
+        # Extract points that satisfy the gradient condition
+        padded_grad = torch.zeros((n_init_points), device="cuda")
+        padded_grad[:grads.shape[0]] = grads.squeeze()
+        selected_pts_mask = torch.where(padded_grad >= grad_threshold, True, False)
+        selected_pts_mask = torch.logical_and(selected_pts_mask,
+                                              torch.max(self.get_scaling, dim=1).values > self.percent_dense*scene_extent)
+        stds = self.get_scaling[selected_pts_mask].repeat(N,1)
+        means =torch.zeros((stds.size(0), 3),device="cuda")
+        samples = torch.normal(mean=means, std=stds)
+        rots = build_rotation(self._rotation[selected_pts_mask]).repeat(N,1,1)
+        new_xyz = torch.bmm(rots, samples.unsqueeze(-1)).squeeze(-1) + self.get_xyz[selected_pts_mask].repeat(N, 1)
+        new_scaling = self.scaling_inverse_activation(self.get_scaling[selected_pts_mask].repeat(N,1) / (0.8*N))
+        new_rotation = self._rotation[selected_pts_mask].repeat(N,1)
+        new_features_dc = self._features_dc[selected_pts_mask].repeat(N,1,1)
+        new_features_rest = self._features_rest[selected_pts_mask].repeat(N,1,1)
+        new_opacity = self._opacity[selected_pts_mask].repeat(N,1)
+        self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacity, new_scaling, new_rotation)
+        prune_filter = torch.cat((selected_pts_mask, torch.zeros(N * selected_pts_mask.sum(), device="cuda", dtype=bool)))
+        self.prune_points(prune_filter)
+    def densify_and_clone(self, grads, grad_threshold, scene_extent):
+        # Extract points that satisfy the gradient condition
+        selected_pts_mask = torch.where(torch.norm(grads, dim=-1) >= grad_threshold, True, False)
+        selected_pts_mask = torch.logical_and(selected_pts_mask,
+                                              torch.max(self.get_scaling, dim=1).values <= self.percent_dense*scene_extent)
+        new_xyz = self._xyz[selected_pts_mask]
+        new_features_dc = self._features_dc[selected_pts_mask]
+        new_features_rest = self._features_rest[selected_pts_mask]
+        new_opacities = self._opacity[selected_pts_mask]
+        new_scaling = self._scaling[selected_pts_mask]
+        new_rotation = self._rotation[selected_pts_mask]
+        self.densification_postfix(new_xyz, new_features_dc, new_features_rest, new_opacities, new_scaling, new_rotation)
+    def densify_and_prune(self, max_grad, min_opacity, extent, max_screen_size):
+        grads = self.xyz_gradient_accum / self.denom
+        grads[grads.isnan()] = 0.0
+        # self.densify_and_clone(grads, max_grad, extent)
+        # self.densify_and_split(grads, max_grad, extent)
+        prune_mask = (self.get_opacity < min_opacity).squeeze()
+        if max_screen_size:
+            big_points_vs = self.max_radii2D > max_screen_size
+            big_points_ws = self.get_scaling.max(dim=1).values > 0.1 * extent
+            prune_mask = torch.logical_or(torch.logical_or(prune_mask, big_points_vs), big_points_ws)
+        self.prune_points(prune_mask)
+        torch.cuda.empty_cache()
+    def add_densification_stats(self, viewspace_point_tensor, update_filter):
+        self.xyz_gradient_accum[update_filter] += torch.norm(viewspace_point_tensor.grad[update_filter,:2], dim=-1, keepdim=True)
+        self.denom[update_filter] += 1
+class Feat2GaussianModel(GaussianModel):
+    def __init__(self, sh_degree : int, feat_dim : int, gs_params_group : dict, noise_std=0):
+        super().__init__(sh_degree)
+        self.noise_std = noise_std
+        self.pc_feat = torch.empty(0)
+        self.param_init = {}
+        self.feat_dim = feat_dim
+        self.gs_params_group = gs_params_group
+        self.active_sh_degree = sh_degree
+        self.sh_coeffs = ((sh_degree + 1) ** 2) * 3-3
+        net_width = feat_dim
+        out_dim = {'xyz': 3, 'scaling': 3, 'rotation': 4, 'opacity': 1, 'f_dc': 3, 'f_rest': self.sh_coeffs}
+        for key in gs_params_group.get('head', []):
+            setattr(self, f'head_{key}', conditionalWarp(layers=[feat_dim, net_width, out_dim[key]], skip=[]).cuda())
+        self.param_key = {
+            'xyz': '_xyz',
+            'scaling': '_scaling',
+            'rotation': '_rotation',
+            'opacity': '_opacity',
+            'f_dc': '_features_dc',
+            'f_rest': '_features_rest',
+            'pc_feat': 'pc_feat',
+            }
+        # ## FOR DEBUGGING
+        # self.head_xyz = conditionalWarp(layers=[self.feat_dim, net_width, 3], skip=[]).cuda()
+        # self.head_scaling = conditionalWarp(layers=[self.feat_dim, net_width, 3], skip=[]).cuda()
+        # self.head_rotation = conditionalWarp(layers=[self.feat_dim, net_width, 4], skip=[]).cuda()
+        # self.head_opacity = conditionalWarp(layers=[self.feat_dim, net_width, 1], skip=[]).cuda()
+        # self.head_f_dc = conditionalWarp(layers=[feat_dim, net_width, 3], skip=[]).cuda()
+        # self.head_f_rest = conditionalWarp(layers=[feat_dim, net_width, self.sh_coeffs], skip=[]).cuda()
+    def capture(self):
+        head_state_dicts = {f'head_{key}': getattr(self, f'head_{key}').state_dict() for key in self.gs_params_group.get('head', [])}
+        return (
+            self.active_sh_degree,
+            self._xyz,
+            self._features_dc,
+            self._features_rest,
+            self._scaling,
+            self._rotation,
+            self._opacity,
+            self.max_radii2D,
+            self.xyz_gradient_accum,
+            self.denom,
+            self.optimizer.state_dict(),
+            self.spatial_lr_scale,
+            self.P,
+            head_state_dicts
+        )
+    def restore(self, model_args, training_args):
+        (self.active_sh_degree,
+        self._xyz,
+        self._features_dc,
+        self._features_rest,
+        self._scaling,
+        self._rotation,
+        self._opacity,
+        self.max_radii2D,
+        xyz_gradient_accum,
+        denom,
+        opt_dict,
+        self.spatial_lr_scale,
+        self.P,
+        head_state_dicts
+        ) = model_args
+        self.training_setup(training_args)
+        self.xyz_gradient_accum = xyz_gradient_accum
+        self.denom = denom
+        self.optimizer.load_state_dict(opt_dict)
+        for key, state_dict in head_state_dicts.items():
+            getattr(self, key).load_state_dict(state_dict)
+    def inference(self):
+        feat_in = self.pc_feat
+        for key in self.gs_params_group.get('head', []):
+            if key == 'f_dc':
+                self._features_dc = getattr(self, f'head_{key}')(feat_in, self.param_init[key].view(-1, 3)).reshape(-1, 1, 3)
+            elif key == 'f_rest':
+                self._features_rest = getattr(self, f'head_{key}')(feat_in.detach(), self.param_init[key].view(-1, self.sh_coeffs)).reshape(-1, self.sh_coeffs // 3, 3)
+            else:
+                setattr(self, f'_{key}', getattr(self, f'head_{key}')(feat_in, self.param_init[key]))
+            # if key == 'f_dc':
+            #     self._features_dc = getattr(self, f'head_{key}')(feat_in, self.param_init[key].view(-1, 3)).reshape(-1, 1, 3)
+            #     self._features_dc += self.param_init[key].view(-1, 1, 3).mean(dim=0, keepdim=True)
+            # elif key == 'f_rest':
+            #     self._features_rest = getattr(self, f'head_{key}')(feat_in.detach(), self.param_init[key].view(-1, self.sh_coeffs)).reshape(-1, self.sh_coeffs // 3, 3)
+            #     self._features_rest += self.param_init[key].view(-1, self.sh_coeffs // 3, 3).mean(dim=0, keepdim=True)
+            # else:
+            #     pred = getattr(self, f'head_{key}')(feat_in, self.param_init[key])
+            #     setattr(self, f'_{key}', pred + self.param_init[key].mean(dim=0, keepdim=True))
+        # ## FOR DEBUGGING
+        # self._xyz = self.head_xyz(pred, self.param_init['xyz'])
+        # self._opacity = self.head_opacity(pred, self.param_init['opacity'])
+        # self._scaling = self.head_scaling(pred, self.param_init['scaling'])
+        # self._rotation = self.head_rotation(pred, self.param_init['rotation'])
+        # self._features_dc = self.head_f_dc(pred, self.param_init['f_dc'].view(-1,3)).reshape(-1, 1, 3)
+        # self._features_rest = self.head_f_rest(pred, self.param_init['f_rest'].view(-1,self.sh_coeffs)).reshape(-1, self.sh_coeffs//3, 3)
+    def create_from_pcd(self, pcd : BasicPointCloud, spatial_lr_scale : float):
+        self.spatial_lr_scale = spatial_lr_scale
+        fused_point_cloud = torch.tensor(np.asarray(pcd.points)).float().cuda()
+        fused_point_feat = torch.tensor(np.asarray(pcd.features)).float().cuda()    # get features from .PLY file
+        assert fused_point_feat.shape[-1] == self.feat_dim, f"Expected feature dimension {self.feat_dim}, but got {fused_point_feat.shape[-1]}"
+        fused_color = RGB2SH(torch.tensor(np.asarray(pcd.colors)).float().cuda())
+        features = torch.zeros((fused_color.shape[0], 3, (self.max_sh_degree + 1) ** 2)).float().cuda()
+        features[:, :3, 0 ] = fused_color
+        features[:, 3:, 1:] = 0.0
+        print("Number of points at initialisation : ", fused_point_cloud.shape[0])
+        dist2 = torch.clamp_min(distCUDA2(torch.from_numpy(np.asarray(pcd.points)).float().cuda()), 0.0000001)
+        scales = torch.log(torch.sqrt(dist2))[...,None].repeat(1, 3)
+        rots = torch.zeros((fused_point_cloud.shape[0], 4), device="cuda")
+        rots[:, 0] = 1
+        opacities = inverse_sigmoid(0.1 * torch.ones((fused_point_cloud.shape[0], 1), dtype=torch.float, device="cuda"))
+        self.max_radii2D = torch.zeros((self.get_xyz.shape[0]), device="cuda")
+        self.pc_feat = fused_point_feat#.requires_grad_(True)
+        # fused_point_feat = torch.randn_like(fused_point_feat)
+        # self.pc_feat = fused_point_feat.requires_grad_(True)
+        self.gt_xyz = fused_point_cloud.clone()
+        if self.noise_std != 0:
+            self.noise_std /= 1000.0
+            torch.manual_seed(0)
+            torch.cuda.manual_seed(0)
+            noise = torch.randn_like(fused_point_cloud) * self.noise_std
+            fused_point_cloud += noise
+            # fused_point_cloud = noise + fused_point_cloud.mean(dim=0, keepdim=True)
+            # fused_point_cloud = torch.zeros_like(fused_point_cloud) + fused_point_cloud.mean(dim=0, keepdim=True)
+        param_init = {
+            'xyz': fused_point_cloud,
+            'scaling': scales,
+            'rotation': rots,
+            'opacity': opacities,
+            'f_dc': features[:, :, 0:1].transpose(1, 2).contiguous(),
+            'f_rest': features[:, :, 1:].transpose(1, 2).contiguous(),
+            'pc_feat': fused_point_feat,
+        }
+        for key in self.gs_params_group.get('opt', []):
+            setattr(self, self.param_key[key], nn.Parameter(param_init[key].requires_grad_(True)))
+        self.param_init.update({key: value.detach().clone() for key, value in param_init.items()})
+        # ## FOR DEBUGGING
+        # self._xyz = nn.Parameter(fused_point_cloud.requires_grad_(True))
+        # self._scaling = nn.Parameter(scales.requires_grad_(True))
+        # self._rotation = nn.Parameter(rots.requires_grad_(True))
+        # self._opacity = nn.Parameter(opacities.requires_grad_(True))
+        # self._features_dc = nn.Parameter(features[:,:,0:1].transpose(1, 2).contiguous().requires_grad_(True))
+        # self._features_rest = nn.Parameter(features[:,:,1:].transpose(1, 2).contiguous().requires_grad_(True))
+        # self.param_init.update({
+        #     'xyz': fused_point_cloud.detach().clone(),
+        #     'f_dc':  features[:,:,0:1].transpose(1, 2).contiguous().detach().clone(),
+        #     'f_rest': features[:,:,1:].transpose(1, 2).contiguous().detach().clone(),
+        #     'opacity': opacities.detach().clone(),
+        #     'scaling': scales.detach().clone(),
+        #     'rotation': rots.detach().clone(),
+        #     'pc_feat':fused_point_feat.detach().clone(),
+        # })
+    def training_setup(self, training_args):
+        self.percent_dense = training_args.percent_dense
+        self.xyz_gradient_accum = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        self.denom = torch.zeros((self.get_xyz.shape[0], 1), device="cuda")
+        self.param_lr = {
+            "xyz": training_args.position_lr_init * self.spatial_lr_scale,
+            "f_dc": training_args.feature_lr,
+            "f_rest": training_args.feature_sh_lr,
+            "opacity": training_args.opacity_lr,
+            "scaling": training_args.scaling_lr,
+            "rotation": training_args.rotation_lr
+        }
+        warm_start_lr = 0.01
+        l = []
+        for key in self.gs_params_group.get('head', []):
+            l.append({
+                'params': getattr(self, f'head_{key}').parameters(),
+                'lr': warm_start_lr,
+                'name': key
+            })
+        for key in self.gs_params_group.get('opt', []):
+            l.append({
+                'params': [getattr(self, self.param_key[key])],
+                'lr': warm_start_lr,
+                'name': key
+            })
+        # ## FOR DEBUGGING
+        # l += [
+        #     {'params': self.head_f_dc.parameters(), 'lr': warm_start_lr, "name": "warm_start_f_dc"},
+        #     {'params': self.head_f_rest.parameters(), 'lr': warm_start_lr, "name": "warm_start_f_rest"},
+        # ]
+        # l = [
+        #     {'params': self.head_xyz.parameters(), 'lr': warm_start_lr, "name": "xyz"},
+        #     # {'params': [self._xyz], 'lr': warm_start_lr, "name": "xyz"},
+        #     {'params': self.head_scaling.parameters(), 'lr': warm_start_lr, "name": "scaling"},
+        #     # {'params': [self._scaling], 'lr': warm_start_lr, "name": "scaling"},
+        #     {'params': self.head_rotation.parameters(), 'lr': warm_start_lr, "name": "rotation"},
+        #     # {'params': [self._rotation], 'lr': warm_start_lr, "name": "rotation"},
+        #     {'params': self.head_opacity.parameters(), 'lr': warm_start_lr, "name": "opacity"},
+        #     # {'params': [self._opacity], 'lr': warm_start_lr, "name": "opacity"},
+        #     # {'params': self.head_f_dc.parameters(), 'lr': warm_start_lr, "name": "f_dc"},
+        #     {'params': [self._features_dc], 'lr': warm_start_lr, "name": "f_dc"},
+        #     # {'params': self.head_f_rest.parameters(), 'lr': warm_start_lr, "name": "f_rest"},
+        #     {'params': [self._features_rest], 'lr': warm_start_lr, "name": "f_rest"},
+        #     # {'params': [self.pc_feat], 'lr': warm_start_lr, "name": "feat"},
+        # ]
+        l_cam = [{'params': [self.P],'lr': training_args.pose_lr_init, "name": "pose"},]
+        l += l_cam
+        self.optimizer = torch.optim.Adam(l, lr=0.0, eps=1e-15)
+        self.xyz_scheduler_args = get_expon_lr_func(lr_init=training_args.position_lr_init * self.spatial_lr_scale,
+                                                    lr_final=training_args.position_lr_final * self.spatial_lr_scale,
+                                                    lr_delay_mult=training_args.position_lr_delay_mult,
+                                                    max_steps=training_args.position_lr_max_steps)
+        self.cam_scheduler_args = get_expon_lr_func(lr_init=training_args.pose_lr_init,
+                                                    lr_final=training_args.pose_lr_final,
+                                                    lr_delay_mult=training_args.position_lr_delay_mult,
+                                                    max_steps=1000)
+        self.warm_start_scheduler_args = get_expon_lr_func(lr_init=warm_start_lr,
+                                                           lr_final=warm_start_lr*0.01,
+                                                            max_steps=1000)
+    def setup_rendering_learning_rate(self, ):
+        ''' Setup learning rate scheduling'''
+        for param_group in self.optimizer.param_groups:
+            if param_group["name"] in self.param_lr:
+                param_group['lr'] = self.param_lr[param_group["name"]]
+            # elif param_group["name"] == "feat":
+            #     param_group['lr'] = 1e-6
+    def update_warm_start_learning_rate(self, iteration):
+        ''' Warm start learning rate scheduling per step '''
+        for param_group in self.optimizer.param_groups:
+            lr = self.warm_start_scheduler_args(iteration)
+            param_group['lr'] = lr
+    def update_learning_rate(self, iteration):
+        ''' Learning rate scheduling per step '''
+        for param_group in self.optimizer.param_groups:
+            if param_group["name"] == "pose":
+                lr = self.cam_scheduler_args(iteration)
+                param_group['lr'] = lr
+            if param_group["name"] == "xyz":
+                lr = self.xyz_scheduler_args(iteration)
+                param_group['lr'] = lr
+        # return lr
+class conditionalWarp(torch.nn.Module):
+    def __init__(self, layers, skip, skip_dim=None, res=[], freq=None, zero_init=False):
+        super().__init__()
+        self.skip = skip
+        self.res = res
+        self.freq = freq
+        self.mlp_warp = torch.nn.ModuleList()
+        L = self.get_layer_dims(layers)
+        for li,(k_in,k_out) in enumerate(L):
+            if li in self.skip: k_in += layers[-1] if skip_dim is None else skip_dim
+            linear = torch.nn.Linear(k_in,k_out)
+            # Init network output as 0
+            if zero_init:
+                if li == (len(L) - 1):
+                    torch.nn.init.constant_(linear.weight, 0)
+                    torch.nn.init.constant_(linear.bias, 0)
+            self.mlp_warp.append(linear)
+    def get_layer_dims(self, layers):
+        # return a list of tuples (k_in,k_out)
+        return list(zip(layers[:-1],layers[1:]))
+    def positional_encoding(self, input): # [B,...,N]
+        shape = input.shape
+        freq = 2**torch.arange(self.freq, dtype=torch.float32,device=input.device)*np.pi # [L]
+        spectrum = input[...,None]*freq # [B,...,N,L]
+        sin,cos = spectrum.sin(),spectrum.cos() # [B,...,N,L]
+        input_enc = torch.stack([sin,cos],dim=-2) # [B,...,N,2,L]
+        input_enc = input_enc.view(*shape[:-1],-1) # [B,...,2NL]
+        return input_enc
+    def forward(self, feat_in, color):
+        if self.freq != None:
+            feat_in = torch.cat([feat_in, self.positional_encoding(feat_in)], dim=-1)
+        feat = feat_in
+        for li,layer in enumerate(self.mlp_warp):
+            if li in self.skip: feat = torch.cat([feat, color],dim=-1)
+            if li in self.res: feat = feat + feat_in
+            feat = layer(feat)
+            if li!=len(self.mlp_warp)-1:
+                feat = nn.functional.relu(feat)
+        warp = feat
+        return warp

train_feat2gs.py ADDED Viewed

	@@ -0,0 +1,243 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import os
+import numpy as np
+import torch
+from random import randint
+from utils.loss_utils import l1_loss, ssim
+from gaussian_renderer import render_gsplat
+import sys
+from scene import Scene, Feat2GaussianModel
+from argparse import ArgumentParser
+from arguments import ModelParams, PipelineParams, OptimizationParams
+from utils.pose_utils import get_camera_from_tensor
+from tqdm import tqdm
+from time import perf_counter
+def save_pose(path, quat_pose, train_cams, llffhold=2):
+    output_poses=[]
+    index_colmap = [cam.colmap_id for cam in train_cams]
+    for quat_t in quat_pose:
+        w2c = get_camera_from_tensor(quat_t)
+        output_poses.append(w2c)
+    colmap_poses = []
+    for i in range(len(index_colmap)):
+        ind = index_colmap.index(i+1)
+        bb=output_poses[ind]
+        bb = bb#.inverse()
+        colmap_poses.append(bb)
+    colmap_poses = torch.stack(colmap_poses).detach().cpu().numpy()
+    np.save(path, colmap_poses)
+def training(dataset, opt, pipe, testing_iterations, saving_iterations, checkpoint_iterations, checkpoint, debug_from, args):
+    first_iter = 0
+    # tb_writer = prepare_output_and_logger(dataset, opt.iterations)
+    feat_type = '-'.join(args.feat_type)
+    feat_dim = args.feat_dim if feat_type not in ['iuv', 'iuvrgb'] else dataset.feat_default_dim[feat_type]
+    gs_params_group = dataset.gs_params_group[args.model]
+    gaussians = Feat2GaussianModel(dataset.sh_degree, feat_dim, gs_params_group)
+    scene = Scene(dataset, gaussians, opt=args, shuffle=True)
+    gaussians.training_setup(opt)
+    # if checkpoint:
+    #     (model_params, first_iter) = torch.load(checkpoint)
+    #     gaussians.restore(model_params, opt)
+    train_cams_init = scene.getTrainCameras().copy()
+    os.makedirs(scene.model_path + 'pose', exist_ok=True)
+    # save_pose(scene.model_path + 'pose' + "/pose_org.npy", gaussians.P, train_cams_init)
+    bg_color = [1, 1, 1] if dataset.white_background else [0, 0, 0]
+    background = torch.tensor(bg_color, dtype=torch.float32, device="cuda")
+    iter_start = torch.cuda.Event(enable_timing = True)
+    iter_end = torch.cuda.Event(enable_timing = True)
+    viewpoint_stack = None
+    ema_loss_for_log = 0.0
+    progress_bar = tqdm(range(first_iter, opt.iterations), desc="Training progress")
+    first_iter += 1
+    warm_iter = 1000
+    start = perf_counter()
+    for iteration in range(first_iter, opt.iterations + 1):
+        # if network_gui.conn == None:
+        #     network_gui.try_connect()
+        # while network_gui.conn != None:
+        #     try:
+        #         net_image_bytes = None
+        #         custom_cam, do_training, pipe.convert_SHs_python, pipe.compute_cov3D_python, keep_alive, scaling_modifer = network_gui.receive()
+        #         if custom_cam != None:
+        #             net_image = render(custom_cam, gaussians, pipe, background, scaling_modifer)["render"]
+        #             net_image_bytes = memoryview((torch.clamp(net_image, min=0, max=1.0) * 255).byte().permute(1, 2, 0).contiguous().cpu().numpy())
+        #         network_gui.send(net_image_bytes, dataset.source_path)
+        #         if do_training and ((iteration < int(opt.iterations)) or not keep_alive):
+        #             break
+        #     except Exception as e:
+        #         network_gui.conn = None
+        iter_start.record()
+        if iteration > warm_iter:
+            if iteration == warm_iter+1:
+                gaussians.pc_feat.requires_grad_(False)
+                gaussians.setup_rendering_learning_rate()
+            gaussians.update_learning_rate(iteration - warm_iter)
+        else:
+            gaussians.update_warm_start_learning_rate(iteration)
+        if args.optim_pose==False:
+            gaussians.P.requires_grad_(False)
+        # (DISABLED) Every 1000 its we increase the levels of SH up to a maximum degree
+        # if iteration % 1000 == 0:
+        #     gaussians.oneupSHdegree()
+        # Pick a random Camera
+        if not viewpoint_stack:
+            viewpoint_stack = scene.getTrainCameras().copy()
+        viewpoint_cam = viewpoint_stack.pop(randint(0, len(viewpoint_stack)-1))
+        pose = gaussians.get_RT(viewpoint_cam.uid)
+        # Render
+        if (iteration - 1) == debug_from:
+            pipe.debug = True
+        bg = torch.rand((3), device="cuda") if opt.random_background else background
+        gaussians.inference()
+        pretrained_loss_dict = {
+            'xyz': l1_loss(gaussians._xyz, gaussians.param_init['xyz']),
+            # 'f_dc': l1_loss(gaussians._features_dc, gaussians.param_init['f_dc']),
+            # 'f_rest': l1_loss(gaussians._features_rest, gaussians.param_init['f_rest']),
+            'opacity': l1_loss(gaussians._opacity, gaussians.param_init['opacity']),
+            'scaling': l1_loss(gaussians._scaling, gaussians.param_init['scaling']),
+            'rotation': l1_loss(gaussians._rotation, gaussians.param_init['rotation']),
+            # 'pose': l1_loss(gaussians.P, gaussians.param_init['pose']),
+            # 'focal': l1_loss(gaussians._focal_params, gaussians.param_init['focal']),
+            # 'pc_feat':l1_loss(gaussians.pc_feat, gaussians.param_init['pc_feat']),
+            }
+        if iteration <= warm_iter:
+            loss = sum(loss for key, loss in pretrained_loss_dict.items() if key in gs_params_group['head'])
+            Ll1 = torch.tensor(0)
+        if iteration > warm_iter:
+            render_pkg = render_gsplat(viewpoint_cam, gaussians, pipe, bg, camera_pose=pose)
+            image, viewspace_point_tensor, visibility_filter, radii = render_pkg["render"], render_pkg["viewspace_points"], render_pkg["visibility_filter"], render_pkg["radii"]
+            # Loss
+            gt_image = viewpoint_cam.original_image.cuda()
+            Ll1 = l1_loss(image, gt_image)
+            loss = (1.0 - opt.lambda_dssim) * Ll1 + opt.lambda_dssim * (1.0 - ssim(image, gt_image))
+            # if feat_type in ['iuv', 'iuvrgb']:
+            #     # Add scaling regularization for 'iuv' and 'iuvrgb' features
+            #     # Prevents their gaussians scale from becoming too large to cause CUDA out of memory
+            #     loss += l1_loss(gaussians._scaling, gaussians.param_init['scaling']) * 0.1
+        loss.backward()
+        iter_end.record()
+        with torch.no_grad():
+            # Progress bar
+            ema_loss_for_log = 0.4 * loss.item() + 0.6 * ema_loss_for_log
+            if iteration % 10 == 0:
+                progress_bar.set_postfix({"Loss": f"{ema_loss_for_log:.{7}f}"})
+                progress_bar.update(10)
+            if iteration == opt.iterations:
+                progress_bar.close()
+            # Log and save
+            # training_report(tb_writer, iteration, Ll1, loss, l1_loss, iter_start.elapsed_time(iter_end), testing_iterations, scene, render_gsplat, (pipe, background), pretrained_loss_dict)
+            if (iteration in saving_iterations):
+                print("\n[ITER {}] Saving Gaussians".format(iteration))
+                scene.save(iteration)
+                save_pose(scene.model_path + 'pose' + f"/pose_{iteration}.npy", gaussians.P, train_cams_init)
+            # (DISABLED) Densification
+            # if iteration < opt.densify_until_iter:
+                # Keep track of max radii in image-space for pruning
+                # gaussians.max_radii2D[visibility_filter] = torch.max(gaussians.max_radii2D[visibility_filter], radii[visibility_filter])
+                # gaussians.add_densification_stats(viewspace_point_tensor, visibility_filter)
+                # if iteration > opt.densify_from_iter and iteration % opt.densification_interval == 0:
+                #     size_threshold = 20 if iteration > opt.opacity_reset_interval else None
+                #     gaussians.densify_and_prune(opt.densify_grad_threshold, 0.005, scene.cameras_extent, size_threshold)
+                # if iteration % opt.opacity_reset_interval == 0 or (dataset.white_background and iteration == opt.densify_from_iter):
+                #     gaussians.reset_opacity()
+            # Optimizer step
+            if iteration < opt.iterations:
+                gaussians.optimizer.step()
+                gaussians.optimizer.zero_grad(set_to_none = True)
+            # if (iteration in checkpoint_iterations):
+            #     print("\n[ITER {}] Saving Checkpoint".format(iteration))
+            #     torch.save((gaussians.capture(), iteration), scene.model_path + "/chkpnt" + str(iteration) + ".pth")
+        end = perf_counter()
+        train_time = end - start
+    # We commented out log&save operations, and then calculate train time.
+    # train_time = np.array(train_time)
+    # print("total_test_time_epoch: ", 1)
+    # print("train_time_mean: ", train_time.mean())
+    # print("train_time_median: ", np.median(train_time))
+if __name__ == "__main__":
+    # Set up command line argument parser
+    parser = ArgumentParser(description="Training script parameters")
+    lp = ModelParams(parser)
+    op = OptimizationParams(parser)
+    pp = PipelineParams(parser)
+    parser.add_argument('--ip', type=str, default="127.0.0.1")
+    parser.add_argument('--port', type=int, default=6009)
+    parser.add_argument('--debug_from', type=int, default=-1)
+    parser.add_argument('--detect_anomaly', action='store_true', default=False)
+    parser.add_argument("--test_iterations", nargs="+", type=int,
+                        default=[500, 800, 1000, 1500, 2000, 3000, 4000, 5000, 6000, 7_000, \
+                                 8_000, 9_000, 10_000, 11_000, 12_000, 13_000, 14_000, 30_000])
+    parser.add_argument("--save_iterations", nargs="+", type=int, default=[])
+    parser.add_argument("--quiet", action="store_true")
+    parser.add_argument("--checkpoint_iterations", nargs="+", type=int, default=[])
+    parser.add_argument("--start_checkpoint", type=str, default = None)
+    parser.add_argument("--scene", type=str, default=None)
+    parser.add_argument("--n_views", type=int, default=None)
+    parser.add_argument("--get_video", action="store_true")
+    parser.add_argument("--optim_pose", action="store_true")
+    parser.add_argument("--feat_type", type=str, nargs='*', default=None, help="Feature type(s). Multiple types can be specified for combination.")
+    parser.add_argument("--method", type=str, default='dust3r', help="Method of Initialization, e.g., 'dust3r' or 'mast3r'")
+    parser.add_argument("--feat_dim", type=int, default=None, help="Feture dimension after PCA . If None, PCA is not applied.")
+    parser.add_argument("--model", type=str, default='G', help="Model of Feat2gs, 'G'='geometry'/'T'='texture'/'A'='all'")
+    args = parser.parse_args(sys.argv[1:])
+    args.save_iterations.append(args.iterations)
+    os.makedirs(args.model_path, exist_ok=True)
+    print("Optimizing " + args.model_path)
+    # Initialize system state (RNG)
+    # safe_state(args.quiet)
+    # Start GUI server, configure and run training
+    # network_gui.init(args.ip, args.port)
+    torch.autograd.set_detect_anomaly(args.detect_anomaly)
+    training(lp.extract(args), op.extract(args), pp.extract(args), args.test_iterations, args.save_iterations, args.checkpoint_iterations, args.start_checkpoint, args.debug_from, args)
+    # All done
+    print("\nTraining complete.")

utils/camera_traj_config.py ADDED Viewed

	@@ -0,0 +1,655 @@

+trajectory_configs = {
+    'Infer': {
+        'cy': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 45.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'paper4': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 200.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 100.0, # Maximum displacement
+            }
+        },
+        'cy3': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 45.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'cy4': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 45.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'coffee': {
+            'up': [-1, 0],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'plant': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'desk': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+        },
+        'bread': {
+            'up': [-1, 0],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'brunch': {
+            'up': [1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'stuff': {
+            'up': [-1, 0],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'xbox': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'plushies': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 120.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'erhai': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 200.0, # Maximum displacement
+            }
+        },
+        'cy_crop1': {
+            'up': [-1, 0],
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 60.0, # Maximum displacement
+            }
+        },
+        'cy_crop': {
+            'up': [-1, 1],
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'paper': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 240.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'house': {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 240.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+        },
+        'home': {
+            'up': [1, 0],
+            'arc': {
+                'degree': 90.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            }
+        },
+        'paper3': {
+            'up': [1, 0],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'castle': {
+            'up': [-1, 1],
+            'spiral': {
+                'zrate': 2,     # Spiral rise rate
+                'rots': 2,        # Number of rotations
+            },
+        },
+        'hogwarts': {
+            'up': [-1, 1],
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'wander': {
+                'max_disp': 100.0, # Maximum displacement
+            }
+        },
+    },
+    'Tanks': {
+        'Auditorium':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 30.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 80.0, # Maximum displacement
+            }
+        },
+        'Caterpillar':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 240.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'Family':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'Ignatius':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 330.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'Train':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+    },
+    'DL3DV': {
+        'Center':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'Electrical':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'Museum':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'Supermarket2':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'Temple':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+    },
+    'MipNeRF360': {
+        'garden':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 270.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'kitchen':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'room':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+    },
+    'MVimgNet': {
+        'bench':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 80.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'car':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'suv':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+    },
+    'LLFF': {
+        'fortress':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 30.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'horns':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 60.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'orchids':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 30.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'room':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 0.5,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 30.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+        'trex':  {
+            'up': [-1, 1],
+            'arc': {
+                'degree': 180.0,  # Default arc degree
+            },
+            'spiral': {
+                'zrate': 1,     # Spiral rise rate
+                'rots': 1,        # Number of rotations
+            },
+            'lemniscate': {
+                'degree': 30.0,   # Lemniscate curve angle
+            },
+            'wander': {
+                'max_disp': 48.0, # Maximum displacement
+            }
+        },
+    },
+}

utils/camera_utils.py ADDED Viewed

	@@ -0,0 +1,481 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+from scene.cameras import Camera
+import numpy as np
+from utils.general_utils import PILtoTorch
+from utils.graphics_utils import fov2focal, getWorld2View2
+import scipy
+import matplotlib.pyplot as plt
+from scipy.special import softmax
+from typing import NamedTuple, List
+WARNED = False
+class CameraInfo(NamedTuple):
+    uid: int
+    R: np.array
+    T: np.array
+    FovY: np.array
+    FovX: np.array
+    image: np.array
+    image_path: str
+    image_name: str
+    width: int
+    height: int
+def loadCam(args, id, cam_info, resolution_scale):
+    orig_w, orig_h = cam_info.image.size
+    if args.resolution in [1, 2, 4, 8]:
+        resolution = round(orig_w/(resolution_scale * args.resolution)), round(orig_h/(resolution_scale * args.resolution))
+    else:  # should be a type that converts to float
+        if args.resolution == -1:
+            if orig_w > 1600:
+                global WARNED
+                if not WARNED:
+                    print("[ INFO ] Encountered quite large input images (>1.6K pixels width), rescaling to 1.6K.\n "
+                        "If this is not desired, please explicitly specify '--resolution/-r' as 1")
+                    WARNED = True
+                global_down = orig_w / 1600
+            else:
+                global_down = 1
+        else:
+            global_down = orig_w / args.resolution
+        scale = float(global_down) * float(resolution_scale)
+        resolution = (int(orig_w / scale), int(orig_h / scale))
+    resized_image_rgb = PILtoTorch(cam_info.image, resolution)
+    gt_image = resized_image_rgb[:3, ...]
+    loaded_mask = None
+    if resized_image_rgb.shape[1] == 4:
+        loaded_mask = resized_image_rgb[3:4, ...]
+    return Camera(colmap_id=cam_info.uid, R=cam_info.R, T=cam_info.T,
+                  FoVx=cam_info.FovX, FoVy=cam_info.FovY,
+                  image=gt_image, gt_alpha_mask=loaded_mask,
+                  image_name=cam_info.image_name, uid=id, data_device=args.data_device)
+def cameraList_from_camInfos(cam_infos, resolution_scale, args):
+    camera_list = []
+    for id, c in enumerate(cam_infos):
+        camera_list.append(loadCam(args, id, c, resolution_scale))
+    return camera_list
+def camera_to_JSON(id, camera : Camera):
+    Rt = np.zeros((4, 4))
+    Rt[:3, :3] = camera.R.transpose()
+    Rt[:3, 3] = camera.T
+    Rt[3, 3] = 1.0
+    W2C = np.linalg.inv(Rt)
+    pos = W2C[:3, 3]
+    rot = W2C[:3, :3]
+    serializable_array_2d = [x.tolist() for x in rot]
+    camera_entry = {
+        'id' : id,
+        'img_name' : camera.image_name,
+        'width' : camera.width,
+        'height' : camera.height,
+        'position': pos.tolist(),
+        'rotation': serializable_array_2d,
+        'fy' : fov2focal(camera.FovY, camera.height),
+        'fx' : fov2focal(camera.FovX, camera.width)
+    }
+    return camera_entry
+def transform_poses_pca(poses):
+    """Transforms poses so principal components lie on XYZ axes.
+  Args:
+    poses: a (N, 3, 4) array containing the cameras' camera to world transforms.
+  Returns:
+    A tuple (poses, transform), with the transformed poses and the applied
+    camera_to_world transforms.
+  """
+    t = poses[:, :3, 3]
+    t_mean = t.mean(axis=0)
+    t = t - t_mean
+    eigval, eigvec = np.linalg.eig(t.T @ t)
+    # Sort eigenvectors in order of largest to smallest eigenvalue.
+    inds = np.argsort(eigval)[::-1]
+    eigvec = eigvec[:, inds]
+    rot = eigvec.T
+    if np.linalg.det(rot) < 0:
+        rot = np.diag(np.array([1, 1, -1])) @ rot
+    transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1)
+    poses_recentered = unpad_poses(transform @ pad_poses(poses))
+    transform = np.concatenate([transform, np.eye(4)[3:]], axis=0)
+    # Flip coordinate system if z component of y-axis is negative
+    if poses_recentered.mean(axis=0)[2, 1] < 0:
+        poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered
+        transform = np.diag(np.array([1, -1, -1, 1])) @ transform
+    # Just make sure it's it in the [-1, 1]^3 cube
+    scale_factor = 1. / np.max(np.abs(poses_recentered[:, :3, 3]))
+    poses_recentered[:, :3, 3] *= scale_factor
+    # transform = np.diag(np.array([scale_factor] * 3 + [1])) @ transform
+    return poses_recentered, transform, scale_factor
+def generate_interpolated_path(poses, n_interp, spline_degree=5,
+                               smoothness=.03, rot_weight=.1):
+    """Creates a smooth spline path between input keyframe camera poses.
+  Spline is calculated with poses in format (position, lookat-point, up-point).
+  Args:
+    poses: (n, 3, 4) array of input pose keyframes.
+    n_interp: returned path will have n_interp * (n - 1) total poses.
+    spline_degree: polynomial degree of B-spline.
+    smoothness: parameter for spline smoothing, 0 forces exact interpolation.
+    rot_weight: relative weighting of rotation/translation in spline solve.
+  Returns:
+    Array of new camera poses with shape (n_interp * (n - 1), 3, 4).
+  """
+    def poses_to_points(poses, dist):
+        """Converts from pose matrices to (position, lookat, up) format."""
+        pos = poses[:, :3, -1]
+        lookat = poses[:, :3, -1] - dist * poses[:, :3, 2]
+        up = poses[:, :3, -1] + dist * poses[:, :3, 1]
+        return np.stack([pos, lookat, up], 1)
+    def points_to_poses(points):
+        """Converts from (position, lookat, up) format to pose matrices."""
+        return np.array([viewmatrix(p - l, u - p, p) for p, l, u in points])
+    def interp(points, n, k, s):
+        """Runs multidimensional B-spline interpolation on the input points."""
+        sh = points.shape
+        pts = np.reshape(points, (sh[0], -1))
+        k = min(k, sh[0] - 1)
+        tck, _ = scipy.interpolate.splprep(pts.T, k=k, s=s)
+        u = np.linspace(0, 1, n, endpoint=False)
+        new_points = np.array(scipy.interpolate.splev(u, tck))
+        new_points = np.reshape(new_points.T, (n, sh[1], sh[2]))
+        return new_points
+    ###  Additional operation
+    # inter_poses = []
+    # for pose in poses:
+    #     tmp_pose = np.eye(4)
+    #     tmp_pose[:3] = np.concatenate([pose.R.T, pose.T[:, None]], 1)
+    #     tmp_pose = np.linalg.inv(tmp_pose)
+    #     tmp_pose[:, 1:3] *= -1
+    #     inter_poses.append(tmp_pose)
+    # inter_poses = np.stack(inter_poses, 0)
+    # poses, transform = transform_poses_pca(inter_poses)
+    points = poses_to_points(poses, dist=rot_weight)
+    new_points = interp(points,
+                        n_interp * (points.shape[0] - 1),
+                        k=spline_degree,
+                        s=smoothness)
+    return points_to_poses(new_points)
+def viewmatrix(lookdir, up, position):
+    """Construct lookat view matrix."""
+    vec2 = normalize(lookdir)
+    vec0 = normalize(np.cross(up, vec2))
+    vec1 = normalize(np.cross(vec2, vec0))
+    m = np.stack([vec0, vec1, vec2, position], axis=1)
+    return m
+def normalize(x):
+    """Normalization helper function."""
+    return x / np.linalg.norm(x)
+def pad_poses(p):
+    """Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1]."""
+    bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape)
+    return np.concatenate([p[..., :3, :4], bottom], axis=-2)
+def unpad_poses(p):
+    """Remove the homogeneous bottom row from [..., 4, 4] pose matrices."""
+    return p[..., :3, :4]
+def invert_transform_poses_pca(poses_recentered, transform, scale_factor):
+    poses_recentered[:, :3, 3] /= scale_factor
+    transform_inv = np.linalg.inv(transform)
+    poses_original = unpad_poses(transform_inv @ pad_poses(poses_recentered))
+    return poses_original
+def visualizer(camera_poses, colors, save_path="/mnt/data/1.png"):
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection="3d")
+    for pose, color in zip(camera_poses, colors):
+        rotation = pose[:3, :3]
+        translation = pose[:3, 3]  # Corrected to use 3D translation component
+        camera_positions = np.einsum(
+            "...ij,...j->...i", np.linalg.inv(rotation), -translation
+        )
+        ax.scatter(
+            camera_positions[0],
+            camera_positions[1],
+            camera_positions[2],
+            c=color,
+            marker="o",
+        )
+    ax.set_xlabel("X")
+    ax.set_ylabel("Y")
+    ax.set_zlabel("Z")
+    ax.set_title("Camera Poses")
+    plt.savefig(save_path)
+    plt.close()
+    return save_path
+def focus_point_fn(poses: np.ndarray) -> np.ndarray:
+    """Calculate nearest point to all focal axes in poses."""
+    directions, origins = poses[:, :3, 2:3], poses[:, :3, 3:4]
+    m = np.eye(3) - directions * np.transpose(directions, [0, 2, 1])
+    mt_m = np.transpose(m, [0, 2, 1]) @ m
+    focus_pt = np.linalg.inv(mt_m.mean(0)) @ (mt_m @ origins).mean(0)[:, 0]
+    return focus_pt
+def interp(x, xp, fp):
+    # Flatten the input arrays
+    x_flat = x.reshape(-1, x.shape[-1])
+    xp_flat = xp.reshape(-1, xp.shape[-1])
+    fp_flat = fp.reshape(-1, fp.shape[-1])
+    # Perform interpolation for each set of flattened arrays
+    ret_flat = np.array([np.interp(xf, xpf, fpf) for xf, xpf, fpf in zip(x_flat, xp_flat, fp_flat)])
+    # Reshape the result to match the input shape
+    ret = ret_flat.reshape(x.shape)
+    return ret
+def sorted_interp(x, xp, fp):
+    # Identify the location in `xp` that corresponds to each `x`.
+    # The final `True` index in `mask` is the start of the matching interval.
+    mask = x[..., None, :] >= xp[..., :, None]
+    def find_interval(x):
+        # Grab the value where `mask` switches from True to False, and vice versa.
+        # This approach takes advantage of the fact that `x` is sorted.
+        x0 = np.max(np.where(mask, x[..., None], x[..., :1, None]), -2)
+        x1 = np.min(np.where(~mask, x[..., None], x[..., -1:, None]), -2)
+        return x0, x1
+    fp0, fp1 = find_interval(fp)
+    xp0, xp1 = find_interval(xp)
+    with np.errstate(divide='ignore', invalid='ignore'):
+        offset = np.clip(np.nan_to_num((x - xp0) / (xp1 - xp0), nan=0.0), 0, 1)
+    ret = fp0 + offset * (fp1 - fp0)
+    return ret
+def integrate_weights(w):
+    """Compute the cumulative sum of w, assuming all weight vectors sum to 1.
+    The output's size on the last dimension is one greater than that of the input,
+    because we're computing the integral corresponding to the endpoints of a step
+    function, not the integral of the interior/bin values.
+    Args:
+        w: Tensor, which will be integrated along the last axis. This is assumed to
+        sum to 1 along the last axis, and this function will (silently) break if
+        that is not the case.
+    Returns:
+        cw0: Tensor, the integral of w, where cw0[..., 0] = 0 and cw0[..., -1] = 1
+    """
+    cw = np.minimum(1, np.cumsum(w[..., :-1], axis=-1))
+    shape = cw.shape[:-1] + (1,)
+    # Ensure that the CDF starts with exactly 0 and ends with exactly 1.
+    cw0 = np.concatenate([np.zeros(shape), cw, np.ones(shape)], axis=-1)
+    return cw0
+def invert_cdf(u, t, w_logits, use_gpu_resampling=False):
+    """Invert the CDF defined by (t, w) at the points specified by u in [0, 1)."""
+    # Compute the PDF and CDF for each weight vector.
+    w = softmax(w_logits, axis=-1)
+    cw = integrate_weights(w)
+    # Interpolate into the inverse CDF.
+    interp_fn = interp if use_gpu_resampling else sorted_interp  # Assuming these are defined using NumPy
+    t_new = interp_fn(u, cw, t)
+    return t_new
+def sample(rng,
+           t,
+           w_logits,
+           num_samples,
+           single_jitter=False,
+           deterministic_center=False,
+           use_gpu_resampling=False):
+    """Piecewise-Constant PDF sampling from a step function.
+    Args:
+        rng: random number generator (or None for `linspace` sampling).
+        t: [..., num_bins + 1], bin endpoint coordinates (must be sorted)
+        w_logits: [..., num_bins], logits corresponding to bin weights
+        num_samples: int, the number of samples.
+        single_jitter: bool, if True, jitter every sample along each ray by the same
+        amount in the inverse CDF. Otherwise, jitter each sample independently.
+        deterministic_center: bool, if False, when `rng` is None return samples that
+        linspace the entire PDF. If True, skip the front and back of the linspace
+        so that the centers of each PDF interval are returned.
+        use_gpu_resampling: bool, If True this resamples the rays based on a
+        "gather" instruction, which is fast on GPUs but slow on TPUs. If False,
+        this resamples the rays based on brute-force searches, which is fast on
+        TPUs, but slow on GPUs.
+    Returns:
+        t_samples: jnp.ndarray(float32), [batch_size, num_samples].
+    """
+    eps = np.finfo(np.float32).eps
+    # Draw uniform samples.
+    if rng is None:
+        # Match the behavior of jax.random.uniform() by spanning [0, 1-eps].
+        if deterministic_center:
+            pad = 1 / (2 * num_samples)
+            u = np.linspace(pad, 1. - pad - eps, num_samples)
+        else:
+            u = np.linspace(0, 1. - eps, num_samples)
+            u = np.broadcast_to(u, t.shape[:-1] + (num_samples,))
+    else:
+        # `u` is in [0, 1) --- it can be zero, but it can never be 1.
+        u_max = eps + (1 - eps) / num_samples
+        max_jitter = (1 - u_max) / (num_samples - 1) - eps
+        d = 1 if single_jitter else num_samples
+        u = (
+            np.linspace(0, 1 - u_max, num_samples) +
+            rng.uniform(size=t.shape[:-1] + (d,), high=max_jitter))
+    return invert_cdf(u, t, w_logits, use_gpu_resampling=use_gpu_resampling)
+def generate_ellipse_path_from_poses(poses: np.ndarray,
+                          n_frames: int = 120,
+                          const_speed: bool = True,
+                          z_variation: float = 0.,
+                          z_phase: float = 0.) -> np.ndarray:
+    """Generate an elliptical render path based on the given poses."""
+    # Calculate the focal point for the path (cameras point toward this).
+    center = focus_point_fn(poses)
+    # Path height sits at z=0 (in middle of zero-mean capture pattern).
+    offset = np.array([center[0], center[1], 0])
+    # Calculate scaling for ellipse axes based on input camera positions.
+    sc = np.percentile(np.abs(poses[:, :3, 3] - offset), 100, axis=0)
+    # Use ellipse that is symmetric about the focal point in xy.
+    low = -sc + offset
+    high = sc + offset
+    # Optional height variation need not be symmetric
+    z_low = np.percentile((poses[:, :3, 3]), 0, axis=0)
+    z_high = np.percentile((poses[:, :3, 3]), 100, axis=0)
+    def get_positions(theta):
+        # Interpolate between bounds with trig functions to get ellipse in x-y.
+        # Optionally also interpolate in z to change camera height along path.
+        return np.stack([
+            low[0] + (high - low)[0] * (np.cos(theta) * .5 + .5),
+            low[1] + (high - low)[1] * (np.sin(theta) * .5 + .5),
+            z_variation * (z_low[2] + (z_high - z_low)[2] *
+                        (np.cos(theta + 2 * np.pi * z_phase) * .5 + .5)),
+        ], -1)
+    theta = np.linspace(0, 2. * np.pi, n_frames + 1, endpoint=True)
+    positions = get_positions(theta)
+    if const_speed:
+        # Resample theta angles so that the velocity is closer to constant.
+        lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1)
+        theta = sample(None, theta, np.log(lengths), n_frames + 1)
+        positions = get_positions(theta)
+    # Throw away duplicated last position.
+    positions = positions[:-1]
+    # Set path's up vector to axis closest to average of input pose up vectors.
+    avg_up = poses[:, :3, 1].mean(0)
+    avg_up = avg_up / np.linalg.norm(avg_up)
+    ind_up = np.argmax(np.abs(avg_up))
+    up = np.eye(3)[ind_up] * np.sign(avg_up[ind_up])
+    return np.stack([viewmatrix(p - center, up, p) for p in positions])
+def generate_ellipse_path_from_camera_infos(
+        cam_infos,
+        n_frames,
+        const_speed=False,
+        z_variation=0.,
+        z_phase=0.
+    ):
+    print(f'Generating ellipse path from {len(cam_infos)} camera infos ...')
+    poses = np.array([np.linalg.inv(getWorld2View2(cam_info.R, cam_info.T))[:3, :4] for cam_info in cam_infos])
+    poses[:, :, 1:3] *= -1
+    poses, transform, scale_factor = transform_poses_pca(poses)
+    render_poses = generate_ellipse_path_from_poses(poses, n_frames, const_speed, z_variation, z_phase)
+    render_poses = invert_transform_poses_pca(render_poses, transform, scale_factor)
+    render_poses[:, :, 1:3] *= -1
+    ret_cam_infos = []
+    for uid, pose in enumerate(render_poses):
+        R = pose[:3, :3]
+        c2w = np.eye(4)
+        c2w[:3, :4] = pose
+        T = np.linalg.inv(c2w)[:3, 3]
+        cam_info = CameraInfo(
+            uid = uid,
+            R = R,
+            T = T,
+            FovY = cam_infos[0].FovY,
+            FovX = cam_infos[0].FovX,
+            # image = np.zeros_like(cam_infos[0].image),
+            image = cam_infos[0].image,
+            image_path = '',
+            image_name = f'{uid:05d}.png',
+            width = cam_infos[0].width,
+            height = cam_infos[0].height
+        )
+        ret_cam_infos.append(cam_info)
+    return ret_cam_infos
+def generate_ellipse_path(
+        org_pose,
+        n_interp,
+        const_speed=False,
+        z_variation=0.,
+        z_phase=0.
+    ):
+    print(f'Generating ellipse path from {len(org_pose)} camera infos ...')
+    poses = np.array([np.linalg.inv(p)[:3, :4] for p in org_pose])  # w2c >>> c2w
+    poses[:, :, 1:3] *= -1
+    poses, transform, scale_factor = transform_poses_pca(poses)
+    render_poses = generate_ellipse_path_from_poses(poses, n_interp, const_speed, z_variation, z_phase)
+    render_poses = invert_transform_poses_pca(render_poses, transform, scale_factor)
+    render_poses[:, :, 1:3] *= -1   # c2w
+    return render_poses

utils/dust3r_utils.py ADDED Viewed

	@@ -0,0 +1,432 @@

+import os
+import torch
+import cv2
+import numpy as np
+import PIL.Image
+from PIL.ImageOps import exif_transpose
+from plyfile import PlyData, PlyElement
+import torchvision.transforms as tvf
+import roma
+import dust3r.cloud_opt.init_im_poses as init_fun
+from dust3r.cloud_opt.base_opt import global_alignment_loop
+from dust3r.utils.geometry import geotrf, inv, depthmap_to_absolute_camera_coordinates
+from dust3r.cloud_opt.commons import edge_str
+from dust3r.utils.image import _resize_pil_image, imread_cv2
+import dust3r.datasets.utils.cropping as cropping
+import torch.nn.functional as F
+def get_known_poses(scene):
+        if scene.has_im_poses:
+            known_poses_msk = torch.tensor([not (p.requires_grad) for p in scene.im_poses])
+            known_poses = scene.get_im_poses()
+            return known_poses_msk.sum(), known_poses_msk, known_poses
+        else:
+            return 0, None, None
+def init_from_pts3d(scene, pts3d, im_focals, im_poses):
+    # init poses
+    nkp, known_poses_msk, known_poses = get_known_poses(scene)
+    if nkp == 1:
+        raise NotImplementedError("Would be simpler to just align everything afterwards on the single known pose")
+    elif nkp > 1:
+        # global rigid SE3 alignment
+        s, R, T = init_fun.align_multiple_poses(im_poses[known_poses_msk], known_poses[known_poses_msk])
+        trf = init_fun.sRT_to_4x4(s, R, T, device=known_poses.device)
+        # rotate everything
+        im_poses = trf @ im_poses
+        im_poses[:, :3, :3] /= s  # undo scaling on the rotation part
+        for img_pts3d in pts3d:
+            img_pts3d[:] = geotrf(trf, img_pts3d)
+    # set all pairwise poses
+    for e, (i, j) in enumerate(scene.edges):
+        i_j = edge_str(i, j)
+        # compute transform that goes from cam to world
+        s, R, T = init_fun.rigid_points_registration(scene.pred_i[i_j], pts3d[i], conf=scene.conf_i[i_j])
+        scene._set_pose(scene.pw_poses, e, R, T, scale=s)
+    # take into account the scale normalization
+    s_factor = scene.get_pw_norm_scale_factor()
+    im_poses[:, :3, 3] *= s_factor  # apply downscaling factor
+    for img_pts3d in pts3d:
+        img_pts3d *= s_factor
+    # init all image poses
+    if scene.has_im_poses:
+        for i in range(scene.n_imgs):
+            cam2world = im_poses[i]
+            depth = geotrf(inv(cam2world), pts3d[i])[..., 2]
+            scene._set_depthmap(i, depth)
+            scene._set_pose(scene.im_poses, i, cam2world)
+            if im_focals[i] is not None:
+                scene._set_focal(i, im_focals[i])
+    if scene.verbose:
+        print(' init loss =', float(scene()))
+@torch.no_grad()
+def init_minimum_spanning_tree(scene, focal_avg=False, known_focal=None, **kw):
+    """ Init all camera poses (image-wise and pairwise poses) given
+        an initial set of pairwise estimations.
+    """
+    device = scene.device
+    pts3d, _, im_focals, im_poses = init_fun.minimum_spanning_tree(scene.imshapes, scene.edges,
+                                                        scene.pred_i, scene.pred_j, scene.conf_i, scene.conf_j, scene.im_conf, scene.min_conf_thr,
+                                                        device, has_im_poses=scene.has_im_poses, verbose=scene.verbose,
+                                                        **kw)
+    if known_focal is not None:
+        repeat_focal = np.repeat(known_focal, len(im_focals))
+        for i in range(len(im_focals)):
+            im_focals[i] = known_focal
+        scene.preset_focal(known_focals=repeat_focal)
+    elif focal_avg:
+        im_focals_avg = np.array(im_focals).mean()
+        for i in range(len(im_focals)):
+            im_focals[i] = im_focals_avg
+        repeat_focal = np.array(im_focals)#.cpu().numpy()
+        scene.preset_focal(known_focals=repeat_focal)
+    return init_from_pts3d(scene, pts3d, im_focals, im_poses)
+@torch.cuda.amp.autocast(enabled=False)
+def compute_global_alignment(scene, init=None, niter_PnP=10, focal_avg=False, known_focal=None, **kw):
+    if init is None:
+        pass
+    elif init == 'msp' or init == 'mst':
+        init_minimum_spanning_tree(scene, niter_PnP=niter_PnP, focal_avg=focal_avg, known_focal=known_focal)
+    elif init == 'known_poses':
+        init_fun.init_from_known_poses(scene, min_conf_thr=scene.min_conf_thr,
+                                        niter_PnP=niter_PnP)
+    else:
+        raise ValueError(f'bad value for {init=}')
+    return global_alignment_loop(scene, **kw)
+def load_images(folder_or_list, size, square_ok=False):
+    """ open and convert all images in a list or folder to proper input format for DUSt3R
+    """
+    if isinstance(folder_or_list, str):
+        print(f'>> Loading images from {folder_or_list}')
+        root, folder_content = folder_or_list, sorted(os.listdir(folder_or_list))
+    elif isinstance(folder_or_list, list):
+        print(f'>> Loading a list of {len(folder_or_list)} images')
+        root, folder_content = '', folder_or_list
+    else:
+        raise ValueError(f'bad {folder_or_list=} ({type(folder_or_list)})')
+    imgs = []
+    for path in folder_content:
+        if not path.endswith(('.jpg', '.jpeg', '.png', '.JPG')):
+            continue
+        img = exif_transpose(PIL.Image.open(os.path.join(root, path))).convert('RGB')
+        W1, H1 = img.size
+        if size == 224:
+            # resize short side to 224 (then crop)
+            img = _resize_pil_image(img, round(size * max(W1/H1, H1/W1)))
+        else:
+            # resize long side to 512
+            img = _resize_pil_image(img, size)
+        W, H = img.size
+        W2 = W//16*16
+        H2 = H//16*16
+        img = np.array(img)
+        img = cv2.resize(img, (W2,H2), interpolation=cv2.INTER_LINEAR)
+        img = PIL.Image.fromarray(img)
+        print(f' - adding {path} with resolution {W1}x{H1} --> {W2}x{H2}')
+        ImgNorm = tvf.Compose([tvf.ToTensor(), tvf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+        imgs.append(dict(img=ImgNorm(img)[None], true_shape=np.int32(
+            [img.size[::-1]]), idx=len(imgs), instance=str(len(imgs))))
+    assert imgs, 'no images foud at '+root
+    print(f' (Found {len(imgs)} images)')
+    return imgs, (W1,H1)
+def load_cam_mvsnet(file, interval_scale=1):
+    """ read camera txt file """
+    cam = np.zeros((2, 4, 4))
+    words = file.read().split()
+    # read extrinsic
+    for i in range(0, 4):
+        for j in range(0, 4):
+            extrinsic_index = 4 * i + j + 1
+            cam[0][i][j] = words[extrinsic_index]
+    # read intrinsic
+    for i in range(0, 3):
+        for j in range(0, 3):
+            intrinsic_index = 3 * i + j + 18
+            cam[1][i][j] = words[intrinsic_index]
+    if len(words) == 29:
+        cam[1][3][0] = words[27]
+        cam[1][3][1] = float(words[28]) * interval_scale
+        cam[1][3][2] = 192
+        cam[1][3][3] = cam[1][3][0] + cam[1][3][1] * cam[1][3][2]
+    elif len(words) == 30:
+        cam[1][3][0] = words[27]
+        cam[1][3][1] = float(words[28]) * interval_scale
+        cam[1][3][2] = words[29]
+        cam[1][3][3] = cam[1][3][0] + cam[1][3][1] * cam[1][3][2]
+    elif len(words) == 31:
+        cam[1][3][0] = words[27]
+        cam[1][3][1] = float(words[28]) * interval_scale
+        cam[1][3][2] = words[29]
+        cam[1][3][3] = words[30]
+    else:
+        cam[1][3][0] = 0
+        cam[1][3][1] = 0
+        cam[1][3][2] = 0
+        cam[1][3][3] = 0
+    extrinsic = cam[0].astype(np.float32)
+    intrinsic = cam[1].astype(np.float32)
+    return intrinsic, extrinsic
+def _crop_resize_if_necessary(image, depthmap, intrinsics, resolution, rng=None, info=None):
+    """ This function:
+        - first downsizes the image with LANCZOS inteprolation,
+            which is better than bilinear interpolation in
+    """
+    if not isinstance(image, PIL.Image.Image):
+        image = PIL.Image.fromarray(image)
+    # downscale with lanczos interpolation so that image.size == resolution
+    # cropping centered on the principal point
+    W, H = image.size
+    cx, cy = intrinsics[:2, 2].round().astype(int)
+    # calculate min distance to margin
+    min_margin_x = min(cx, W-cx)
+    min_margin_y = min(cy, H-cy)
+    assert min_margin_x > W/5, f'Bad principal point in view={info}'
+    assert min_margin_y > H/5, f'Bad principal point in view={info}'
+    ## Center crop
+    # Crop on the principal point, make it always centered
+    # the new window will be a rectangle of size (2*min_margin_x, 2*min_margin_y) centered on (cx,cy)
+    l, t = cx - min_margin_x, cy - min_margin_y
+    r, b = cx + min_margin_x, cy + min_margin_y
+    crop_bbox = (l, t, r, b)
+    image, depthmap, intrinsics = cropping.crop_image_depthmap(image, depthmap, intrinsics, crop_bbox)
+    # transpose the resolution if necessary
+    W, H = image.size  # new size
+    assert resolution[0] >= resolution[1]
+    if H > 1.1*W:
+        # image is portrait mode
+        resolution = resolution[::-1]
+    elif 0.9 < H/W < 1.1 and resolution[0] != resolution[1]:
+        # image is square, so we chose (portrait, landscape) randomly
+        if rng.integers(2):
+            resolution = resolution[::-1]
+    # high-quality Lanczos down-scaling
+    target_resolution = np.array(resolution)
+    ## Recale with max factor, so  one of width or height might be larger than target_resolution
+    image, depthmap, intrinsics = cropping.rescale_image_depthmap(image, depthmap, intrinsics, target_resolution)
+    # actual cropping (if necessary) with bilinear interpolation
+    intrinsics2 = cropping.camera_matrix_of_crop(intrinsics, image.size, resolution, offset_factor=0.5)
+    crop_bbox = cropping.bbox_from_intrinsics_in_out(intrinsics, intrinsics2, resolution)
+    image, depthmap, intrinsics2 = cropping.crop_image_depthmap(image, depthmap, intrinsics, crop_bbox)
+    return image, depthmap, intrinsics2
+def load_images_dtu(folder_or_list, size, scene_folder):
+    """
+    Preprocessing DTU requires depth, camera param and mask.
+    We follow Splatt3R to compute valid_mask.
+    """
+    if isinstance(folder_or_list, str):
+        print(f'>> Loading images from {folder_or_list}')
+        root, folder_content = folder_or_list, sorted(os.listdir(folder_or_list))
+    elif isinstance(folder_or_list, list):
+        print(f'>> Loading a list of {len(folder_or_list)} images')
+        root = os.path.dirname(folder_or_list[0]) if folder_or_list else ''
+        folder_content = [os.path.basename(p) for p in folder_or_list]
+    else:
+        raise ValueError(f'bad {folder_or_list=} ({type(folder_or_list)})')
+    depth_root = os.path.join(scene_folder, 'depths')
+    mask_root = os.path.join(scene_folder, 'binary_masks')
+    cam_root = os.path.join(scene_folder, 'cams')
+    imgs = []
+    for path in folder_content:
+        if not path.endswith(('.jpg', '.jpeg', '.png', '.JPG')):
+            continue
+        impath = os.path.join(root, path)
+        depthpath = os.path.join(depth_root, path.replace('.jpg', '.npy'))
+        campath = os.path.join(cam_root, path.replace('.jpg', '_cam.txt'))
+        maskpath = os.path.join(mask_root, path.replace('.jpg', '.png'))
+        rgb_image = imread_cv2(impath)
+        H1, W1 = rgb_image.shape[:2]
+        depthmap = np.load(depthpath)
+        depthmap = np.nan_to_num(depthmap.astype(np.float32), 0.0)
+        mask = imread_cv2(maskpath, cv2.IMREAD_UNCHANGED)/255.0
+        mask = mask.astype(np.float32)
+        mask[mask>0.5] = 1.0
+        mask[mask<0.5] = 0.0
+        mask = cv2.resize(mask, (depthmap.shape[1], depthmap.shape[0]), interpolation=cv2.INTER_NEAREST)
+        kernel = np.ones((10, 10), np.uint8)  # Define the erosion kernel
+        mask = cv2.erode(mask, kernel, iterations=1)
+        depthmap = depthmap * mask
+        cur_intrinsics, camera_pose = load_cam_mvsnet(open(campath, 'r'))
+        intrinsics = cur_intrinsics[:3, :3]
+        camera_pose = np.linalg.inv(camera_pose)
+        new_size = tuple(int(round(x*size/max(W1, H1))) for x in (W1, H1))
+        W, H = new_size
+        W2 = W//16*16
+        H2 = H//16*16
+        rgb_image, depthmap, intrinsics = _crop_resize_if_necessary(
+            rgb_image, depthmap, intrinsics, (W2, H2), info=impath)
+        print(f' - adding {path} with resolution {W1}x{H1} --> {W2}x{H2}')
+        ImgNorm = tvf.Compose([tvf.ToTensor(), tvf.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+        img = dict(
+            img=ImgNorm(rgb_image)[None],
+            true_shape=np.int32([rgb_image.size[::-1]]),
+            idx=len(imgs),
+            instance=str(len(imgs)),
+            depthmap=depthmap,
+            camera_pose=camera_pose,
+            camera_intrinsics=intrinsics
+        )
+        pts3d, valid_mask = depthmap_to_absolute_camera_coordinates(**img)
+        img['pts3d'] = pts3d
+        img['valid_mask'] = valid_mask & np.isfinite(pts3d).all(axis=-1)
+        imgs.append(img)
+    assert imgs, 'no images foud at '+root
+    print(f' (Found {len(imgs)} images)')
+    return imgs, (W1,H1)
+def storePly(path, xyz, rgb, feat=None):
+    # Define the dtype for the structured array
+    dtype = [('x', 'f4'), ('y', 'f4'), ('z', 'f4'),
+            ('nx', 'f4'), ('ny', 'f4'), ('nz', 'f4'),
+            ('red', 'u1'), ('green', 'u1'), ('blue', 'u1')]
+    if feat is not None:
+        for i in range(feat.shape[1]):
+            dtype.append((f'feat_{i}', 'f4'))
+    normals = np.zeros_like(xyz)
+    elements = np.empty(xyz.shape[0], dtype=dtype)
+    attributes = np.concatenate((xyz, normals, rgb), axis=1)
+    if feat is not None:
+        attributes = np.concatenate((attributes, feat), axis=1)
+    elements[:] = list(map(tuple, attributes))
+    # Create the PlyData object and write to file
+    vertex_element = PlyElement.describe(elements, 'vertex')
+    ply_data = PlyData([vertex_element])
+    ply_data.write(path)
+def R_to_quaternion(R):
+    """
+    Convert a rotation matrix to a quaternion.
+    Parameters:
+    - R: A 3x3 numpy array representing a rotation matrix.
+    Returns:
+    - A numpy array representing the quaternion [w, x, y, z].
+    """
+    m00, m01, m02 = R[0, 0], R[0, 1], R[0, 2]
+    m10, m11, m12 = R[1, 0], R[1, 1], R[1, 2]
+    m20, m21, m22 = R[2, 0], R[2, 1], R[2, 2]
+    trace = m00 + m11 + m22
+    if trace > 0:
+        s = 0.5 / np.sqrt(trace + 1.0)
+        w = 0.25 / s
+        x = (m21 - m12) * s
+        y = (m02 - m20) * s
+        z = (m10 - m01) * s
+    elif (m00 > m11) and (m00 > m22):
+        s = np.sqrt(1.0 + m00 - m11 - m22) * 2
+        w = (m21 - m12) / s
+        x = 0.25 * s
+        y = (m01 + m10) / s
+        z = (m02 + m20) / s
+    elif m11 > m22:
+        s = np.sqrt(1.0 + m11 - m00 - m22) * 2
+        w = (m02 - m20) / s
+        x = (m01 + m10) / s
+        y = 0.25 * s
+        z = (m12 + m21) / s
+    else:
+        s = np.sqrt(1.0 + m22 - m00 - m11) * 2
+        w = (m10 - m01) / s
+        x = (m02 + m20) / s
+        y = (m12 + m21) / s
+        z = 0.25 * s
+    return np.array([w, x, y, z])
+def save_colmap_cameras(ori_size, intrinsics, camera_file):
+    with open(camera_file, 'w') as f:
+        for i, K in enumerate(intrinsics, 1):  # Starting index at 1
+            width, height = ori_size
+            scale_factor_x = width/2  / K[0, 2]
+            scale_factor_y = height/2  / K[1, 2]
+            # assert scale_factor_x==scale_factor_y, "scale factor is not same for x and y"
+            # print(f'scale factor is not same for x {scale_factor_x} and y {scale_factor_y}')
+            f.write(f"{i} PINHOLE {width} {height} {K[0, 0]*scale_factor_x} {K[1, 1]*scale_factor_x} {width/2} {height/2}\n") # scale focal
+            # f.write(f"{i} PINHOLE {width} {height} {K[0, 0]} {K[1, 1]} {K[0, 2]} {K[1, 2]}\n")
+def save_colmap_images(poses, images_file, train_img_list):
+    with open(images_file, 'w') as f:
+        for i, pose in enumerate(poses, 1):  # Starting index at 1
+            # breakpoint()
+            pose = np.linalg.inv(pose)
+            R = pose[:3, :3]
+            t = pose[:3, 3]
+            q = R_to_quaternion(R)  # Convert rotation matrix to quaternion
+            f.write(f"{i} {q[0]} {q[1]} {q[2]} {q[3]} {t[0]} {t[1]} {t[2]} {i} {os.path.basename(train_img_list[i-1])}\n")
+            f.write(f"\n")
+def round_python3(number):
+    rounded = round(number)
+    if abs(number - rounded) == 0.5:
+        return 2.0 * round(number / 2.0)
+    return rounded
+def rigid_points_registration(pts1, pts2, conf=None):
+    R, T, s = roma.rigid_points_registration(
+        pts1.reshape(-1, 3), pts2.reshape(-1, 3), weights=conf, compute_scaling=True)
+    return s, R, T  # return un-scaled (R, T)

utils/feat_utils.py ADDED Viewed

	@@ -0,0 +1,827 @@

+import os
+import torch
+import torchvision.transforms as tvf
+import torch.nn.functional as F
+import numpy as np
+from dust3r.utils.device import to_numpy
+from dust3r.inference import inference
+from dust3r.model import AsymmetricCroCo3DStereo
+from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
+from utils.dust3r_utils import compute_global_alignment
+from mast3r.model import AsymmetricMASt3R
+from mast3r.cloud_opt.sparse_ga import sparse_global_alignment
+from mast3r.cloud_opt.tsdf_optimizer import TSDFPostProcess
+from hydra.utils import instantiate
+from omegaconf import OmegaConf
+class TorchPCA(object):
+    def __init__(self, n_components):
+        self.n_components = n_components
+    def fit(self, X):
+        self.mean_ = X.mean(dim=0)
+        unbiased = X - self.mean_.unsqueeze(0)
+        U, S, V = torch.pca_lowrank(unbiased, q=self.n_components, center=False, niter=50)
+        self.components_ = V.T
+        self.singular_values_ = S
+        return self
+    def transform(self, X):
+        t0 = X - self.mean_.unsqueeze(0)
+        projected = t0 @ self.components_.T
+        return projected
+def pca(stacked_feat, dim):
+    flattened_feats = []
+    for feat in stacked_feat:
+        H, W, C = feat.shape
+        feat = feat.reshape(H * W, C).detach()
+        flattened_feats.append(feat)
+    x = torch.cat(flattened_feats, dim=0)
+    fit_pca = TorchPCA(n_components=dim).fit(x)
+    projected_feats = []
+    for feat in stacked_feat:
+        H, W, C = feat.shape
+        feat = feat.reshape(H * W, C).detach()
+        x_red = fit_pca.transform(feat)
+        projected_feats.append(x_red.reshape(H, W, dim))
+    projected_feats = torch.stack(projected_feats)
+    return projected_feats
+def upsampler(feature, upsampled_height, upsampled_width, max_chunk=None):
+    """
+    Upsample the feature tensor to the specified height and width.
+    Args:
+    - feature (torch.Tensor): The input tensor with size [B, H, W, C].
+    - upsampled_height (int): The target height after upsampling.
+    - upsampled_width (int): The target width after upsampling.
+    Returns:
+    - upsampled_feature (torch.Tensor): The upsampled tensor with size [B, upsampled_height, upsampled_width, C].
+    """
+    # Permute the tensor to [B, C, H, W] for interpolation
+    feature = feature.permute(0, 3, 1, 2)
+    # Perform the upsampling
+    if max_chunk:
+        upsampled_chunks = []
+        for i in range(0, len(feature), max_chunk):
+            chunk = feature[i:i+max_chunk]
+            upsampled_chunk = F.interpolate(chunk, size=(upsampled_height, upsampled_width), mode='bilinear', align_corners=False)
+            upsampled_chunks.append(upsampled_chunk)
+        upsampled_feature = torch.cat(upsampled_chunks, dim=0)
+    else:
+        upsampled_feature = F.interpolate(feature, size=(upsampled_height, upsampled_width), mode='bilinear', align_corners=False)
+    # Permute back to [B, H, W, C]
+    upsampled_feature = upsampled_feature.permute(0, 2, 3, 1)
+    return upsampled_feature
+def visualizer(features, images, save_dir, dim=9, feat_type=None, file_name=None):
+    """
+    Visualize features and corresponding images, and save the result.
+    Args:
+        features (torch.Tensor): Feature tensor with shape [B, H, W, C].
+        images (list): List of dictionaries containing images with keys 'img'. Each image tensor has shape [1, 3, H, W]
+                       and values in the range [-1, 1].
+        save_dir (str): Directory to save the resulting visualization.
+        feat_type (list): List of feature types.
+        file_name (str): Name of the file to save.
+    """
+    import matplotlib
+    matplotlib.use('Agg')
+    from matplotlib import pyplot as plt
+    import torchvision.utils as vutils
+    assert features.dim() == 4, "Input tensor must have 4 dimensions (B, H, W, C)"
+    B, H, W, C = features.size()
+    features = features[..., dim-9:]
+    # Normalize the 3-dimensional feature to range [0, 1]
+    features_min = features.min(dim=0, keepdim=True).values.min(dim=1, keepdim=True).values.min(dim=2, keepdim=True).values
+    features_max = features.max(dim=0, keepdim=True).values.max(dim=1, keepdim=True).values.max(dim=2, keepdim=True).values
+    features = (features - features_min) / (features_max - features_min)
+    ##### Save individual feature maps
+    # # Create subdirectory for feature visualizations
+    # feat_dir = os.path.join(save_dir, 'feature_maps')
+    # if feat_type:
+    #     feat_dir = os.path.join(feat_dir, '-'.join(feat_type))
+    # os.makedirs(feat_dir, exist_ok=True)
+    # for i in range(B):
+    #     # Extract and save the feature map (channels 3-6)
+    #     feat_map = features[i, :, :, 3:6].permute(2, 0, 1)  # [3, H, W]
+    #     save_path = os.path.join(feat_dir, f'{i}_feat.png')
+    #     vutils.save_image(feat_map, save_path, normalize=False)
+    # return feat_dir
+    ##### Save feature maps in a single image
+    # Set the size of the plot
+    fig, axes = plt.subplots(B, 4, figsize=(W*4*0.01, H*B*0.01))
+    for i in range(B):
+        # Get the original image
+        image_tensor = images[i]['img']
+        assert image_tensor.dim() == 4 and image_tensor.size(0) == 1 and image_tensor.size(1) == 3, "Image tensor must have shape [1, 3, H, W]"
+        image = image_tensor.squeeze(0).permute(1, 2, 0).numpy()  # Convert to (H, W, 3)
+        # Scale image values from [-1, 1] to [0, 1]
+        image = (image + 1) / 2
+        ax = axes[i, 0] if B > 1 else axes[0]
+        ax.imshow(image)
+        ax.axis('off')
+        # Visualize each 3-dimensional feature
+        for j in range(3):
+            ax = axes[i, j+1] if B > 1 else axes[j+1]
+            if j * 3 < min(C, dim):  # Check if the feature channels are available
+                feature_to_plot = features[i, :, :, j*3:(j+1)*3].cpu().numpy()
+                ax.imshow(feature_to_plot)
+            else:  # Plot white image if features are not available
+                ax.imshow(torch.ones(H, W, 3).numpy())
+            ax.axis('off')
+    # Reduce margins and spaces between images
+    plt.subplots_adjust(wspace=0.005, hspace=0.005, left=0.01, right=0.99, top=0.99, bottom=0.01)
+    # Save the entire plot
+    if file_name is None:
+        file_name = f'feat_dim{dim-9}-{dim}'
+    if feat_type:
+        feat_type_str = '-'.join(feat_type)
+        file_name = file_name + f'_{feat_type_str}'
+    save_path = os.path.join(save_dir, file_name + '.png')
+    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
+    plt.close()
+    return save_path
+#### Open it if you visualize feature maps in Feat2GS's teaser
+# import matplotlib.colors as mcolors
+# from PIL import Image
+# morandi_colors = [
+#     '#8AA2A9', '#C98474', '#F2D0A9', '#8D9F87', '#A7A7A7', '#D98E73', '#B24C33', '#5E7460', '#4A6B8A', '#B2CBC2',
+#     '#BBC990', '#6B859E', '#B45342', '#4E0000', '#3D0000', '#2C0000', '#1B0000', '#0A0000', '#DCAC99', '#6F936B',
+#     '#EBA062', '#FED273', '#9A8EB4', '#706052', '#E9E5E5', '#C4D8D2', '#F2CBBD', '#F6F9F1', '#C5CABC', '#A3968B',
+#     '#5C6974', '#BE7B6E', '#C67752', '#C18830', '#8C956C', '#CAC691', '#819992', '#4D797F', '#95AEB2', '#B6C4CF',
+#     '#84291C', '#B9551F', '#A96400', '#374B6C', '#C8B493', '#677D5D', '#9882A2', '#2D5F53', '#D2A0AC', '#658D9A',
+#     '#9A7265', '#EFE1D2', '#DDD8D1', '#D2C6BC', '#E3C9BC', '#B8AB9F', '#D8BEA4', '#E0D4C5', '#B8B8B6', '#D0CAC3',
+#     '#9AA8B5', '#BBC9B9', '#E3E8D8', '#ADB3A4', '#C5C9BB', '#A3968B', '#C2A995', '#EDE1D1', '#EDE8E1', '#EDEBE1',
+#     '#CFCFCC', '#AABAC6', '#DCDEE0', '#EAE5E7', '#B7AB9F', '#F7EFE3', '#DED8CF', '#ABCA99', '#C5CD8F', '#959491',
+#     '#FFE481', '#C18E99', '#B07C86', '#9F6A73', '#8E5860', '#DEAD44', '#CD9B31', '#BC891E', '#AB770B', '#9A6500',
+#     '#778144', '#666F31', '#555D1E', '#444B0B', '#333900', '#67587B', '#564668', '#684563', '#573350', '#684550',
+#     '#57333D', '#46212A', '#350F17', '#240004',
+# ]
+# def rgb_to_hsv(rgb):
+#     rgb = rgb.clamp(0, 1)
+#     cmax, cmax_idx = rgb.max(dim=-1)
+#     cmin = rgb.min(dim=-1).values
+#     diff = cmax - cmin
+#     h = torch.zeros_like(cmax)
+#     h[cmax_idx == 0] = (((rgb[..., 1] - rgb[..., 2]) / diff) % 6)[cmax_idx == 0]
+#     h[cmax_idx == 1] = (((rgb[..., 2] - rgb[..., 0]) / diff) + 2)[cmax_idx == 1]
+#     h[cmax_idx == 2] = (((rgb[..., 0] - rgb[..., 1]) / diff) + 4)[cmax_idx == 2]
+#     h[diff == 0] = 0  # If cmax == cmin
+#     h = h / 6
+#     s = torch.zeros_like(cmax)
+#     s[cmax != 0] = (diff / cmax)[cmax != 0]
+#     v = cmax
+#     return torch.stack([h, s, v], dim=-1)
+# def hsv_to_rgb(hsv):
+#     h, s, v = hsv[..., 0], hsv[..., 1], hsv[..., 2]
+#     c = v * s
+#     x = c * (1 - torch.abs((h * 6) % 2 - 1))
+#     m = v - c
+#     rgb = torch.zeros_like(hsv)
+#     mask = (h < 1/6)
+#     rgb[mask] = torch.stack([c[mask], x[mask], torch.zeros_like(x[mask])], dim=-1)
+#     mask = (1/6 <= h) & (h < 2/6)
+#     rgb[mask] = torch.stack([x[mask], c[mask], torch.zeros_like(x[mask])], dim=-1)
+#     mask = (2/6 <= h) & (h < 3/6)
+#     rgb[mask] = torch.stack([torch.zeros_like(x[mask]), c[mask], x[mask]], dim=-1)
+#     mask = (3/6 <= h) & (h < 4/6)
+#     rgb[mask] = torch.stack([torch.zeros_like(x[mask]), x[mask], c[mask]], dim=-1)
+#     mask = (4/6 <= h) & (h < 5/6)
+#     rgb[mask] = torch.stack([x[mask], torch.zeros_like(x[mask]), c[mask]], dim=-1)
+#     mask = (5/6 <= h)
+#     rgb[mask] = torch.stack([c[mask], torch.zeros_like(x[mask]), x[mask]], dim=-1)
+#     return rgb + m.unsqueeze(-1)
+# def interpolate_colors(colors, n_colors):
+#     # Convert colors to RGB tensor
+#     rgb_colors = torch.tensor([mcolors.to_rgb(color) for color in colors])
+#     # Convert RGB to HSV
+#     hsv_colors = rgb_to_hsv(rgb_colors)
+#     # Sort by hue
+#     sorted_indices = torch.argsort(hsv_colors[:, 0])
+#     sorted_hsv_colors = hsv_colors[sorted_indices]
+#     # Create interpolation indices
+#     indices = torch.linspace(0, len(sorted_hsv_colors) - 1, n_colors)
+#     # Perform interpolation
+#     interpolated_hsv = torch.stack([
+#         torch.lerp(sorted_hsv_colors[int(i)],
+#                    sorted_hsv_colors[min(int(i) + 1, len(sorted_hsv_colors) - 1)],
+#                    i - int(i))
+#         for i in indices
+#     ])
+#     # Convert interpolated result back to RGB
+#     interpolated_rgb = hsv_to_rgb(interpolated_hsv)
+#     return interpolated_rgb
+# def project_to_morandi(features, morandi_colors):
+#     features_flat = features.reshape(-1, 3)
+#     distances = torch.cdist(features_flat, morandi_colors)
+#     # Get the indices of the closest colors
+#     closest_color_indices = torch.argmin(distances, dim=1)
+#     # Use the closest Morandi colors directly
+#     features_morandi = morandi_colors[closest_color_indices]
+#     features_morandi = features_morandi.reshape(features.shape)
+#     return features_morandi
+# def smooth_color_transform(features, morandi_colors, smoothness=0.1):
+#     features_flat = features.reshape(-1, 3)
+#     distances = torch.cdist(features_flat, morandi_colors)
+#     # Calculate weights
+#     weights = torch.exp(-distances / smoothness)
+#     weights = weights / weights.sum(dim=1, keepdim=True)
+#     # Weighted average
+#     features_morandi = torch.matmul(weights, morandi_colors)
+#     features_morandi = features_morandi.reshape(features.shape)
+#     return features_morandi
+# def histogram_matching(source, template):
+#     """
+#     Match the histogram of the source tensor to that of the template tensor.
+#     :param source: Source tensor with shape [B, H, W, 3]
+#     :param template: Template tensor with shape [N, 3], where N is the number of colors
+#     :return: Source tensor after histogram matching
+#     """
+#     def match_cumulative_cdf(source, template):
+#         src_values, src_indices, src_counts = torch.unique(source, return_inverse=True, return_counts=True)
+#         tmpl_values, tmpl_counts = torch.unique(template, return_counts=True)
+#         src_quantiles = torch.cumsum(src_counts.float(), 0) / source.numel()
+#         tmpl_quantiles = torch.cumsum(tmpl_counts.float(), 0) / template.numel()
+#         idx = torch.searchsorted(tmpl_quantiles, src_quantiles)
+#         idx = torch.clamp(idx, 1, len(tmpl_quantiles)-1)
+#         slope = (tmpl_values[idx] - tmpl_values[idx-1]) / (tmpl_quantiles[idx] - tmpl_quantiles[idx-1])
+#         interp_a_values = torch.lerp(tmpl_values[idx-1], tmpl_values[idx],
+#                                      (src_quantiles - tmpl_quantiles[idx-1]) * slope)
+#         return interp_a_values[src_indices].reshape(source.shape)
+#     matched = torch.stack([match_cumulative_cdf(source[..., i], template[:, i]) for i in range(3)], dim=-1)
+#     return matched
+# def process_features(features):
+#     device = features.device
+#     n_colors = 1024
+#     morandi_colors_tensor = interpolate_colors(morandi_colors, n_colors).to(device)
+#     # morandi_colors_tensor = torch.tensor([mcolors.to_rgb(color) for color in morandi_colors]).to(device)
+#     # features_morandi = project_to_morandi(features, morandi_colors_tensor)
+#     # features_morandi = histogram_matching(features, morandi_colors_tensor)
+#     features_morandi = smooth_color_transform(features, morandi_colors_tensor, smoothness=0.05)
+#     return features_morandi.cpu().numpy()
+# def visualizer(features, images, save_dir, dim=9, feat_type=None, file_name=None):
+#     import matplotlib
+#     matplotlib.use('Agg')
+#     import matplotlib.pyplot as plt
+#     import numpy as np
+#     import os
+#     assert features.dim() == 4, "Input tensor must have 4 dimensions (B, H, W, C)"
+#     B, H, W, C = features.size()
+#     # Ensure features have at least 3 channels for RGB visualization
+#     assert C >= 3, "Features must have at least 3 channels for RGB visualization"
+#     features = features[..., :3]
+#     # Normalize features to [0, 1] range
+#     features_min = features.min(dim=0, keepdim=True).values.min(dim=1, keepdim=True).values.min(dim=2, keepdim=True).values
+#     features_max = features.max(dim=0, keepdim=True).values.max(dim=1, keepdim=True).values.max(dim=2, keepdim=True).values
+#     features = (features - features_min) / (features_max - features_min)
+#     features_processed = process_features(features)
+#     # Create the directory structure
+#     vis_dir = os.path.join(save_dir, 'vis')
+#     if feat_type:
+#         feat_type_str = '-'.join(feat_type)
+#         vis_dir = os.path.join(vis_dir, feat_type_str)
+#     os.makedirs(vis_dir, exist_ok=True)
+#     # Save individual images for each feature map
+#     for i in range(B):
+#         if file_name is None:
+#             file_name = 'feat_morandi'
+#         save_path = os.path.join(vis_dir, f'{file_name}_{i}.png')
+#         # Convert to uint8 and save directly
+#         img = Image.fromarray((features_processed[i] * 255).astype(np.uint8))
+#         img.save(save_path)
+#     print(f"Feature maps have been saved in the directory: {vis_dir}")
+#     return vis_dir
+def mv_visualizer(features, images, save_dir, dim=9, feat_type=None, file_name=None):
+    """
+    Visualize features and corresponding images, and save the result. (For MASt3R decoder or head features)
+    """
+    import matplotlib
+    matplotlib.use('Agg')
+    from matplotlib import pyplot as plt
+    import os
+    B, H, W, _ = features.size()
+    features = features[..., dim-9:]
+    # Normalize the 3-dimensional feature to range [0, 1]
+    features_min = features.min(dim=0, keepdim=True).values.min(dim=1, keepdim=True).values.min(dim=2, keepdim=True).values
+    features_max = features.max(dim=0, keepdim=True).values.max(dim=1, keepdim=True).values.max(dim=2, keepdim=True).values
+    features = (features - features_min) / (features_max - features_min)
+    rows = (B + 1) // 2  # Adjust rows for odd B
+    fig, axes = plt.subplots(rows, 8, figsize=(W*8*0.01, H*rows*0.01))
+    for i in range(B//2):
+        # Odd row: image and features
+        image = (images[2*i]['img'].squeeze(0).permute(1, 2, 0).numpy() + 1) / 2
+        axes[i, 0].imshow(image)
+        axes[i, 0].axis('off')
+        for j in range(3):
+            axes[i, j+1].imshow(features[2*i, :, :, j*3:(j+1)*3].cpu().numpy())
+            axes[i, j+1].axis('off')
+        # Even row: image and features
+        if 2*i + 1 < B:
+            image = (images[2*i + 1]['img'].squeeze(0).permute(1, 2, 0).numpy() + 1) / 2
+            axes[i, 4].imshow(image)
+            axes[i, 4].axis('off')
+            for j in range(3):
+                axes[i, j+5].imshow(features[2*i + 1, :, :, j*3:(j+1)*3].cpu().numpy())
+                axes[i, j+5].axis('off')
+    # Handle last row if B is odd
+    if B % 2 != 0:
+        image = (images[-1]['img'].squeeze(0).permute(1, 2, 0).numpy() + 1) / 2
+        axes[-1, 0].imshow(image)
+        axes[-1, 0].axis('off')
+        for j in range(3):
+            axes[-1, j+1].imshow(features[-1, :, :, j*3:(j+1)*3].cpu().numpy())
+            axes[-1, j+1].axis('off')
+        # Hide unused columns in last row
+        for j in range(4, 8):
+            axes[-1, j].axis('off')
+    plt.subplots_adjust(wspace=0.005, hspace=0.005, left=0.01, right=0.99, top=0.99, bottom=0.01)
+    # Save the plot
+    if file_name is None:
+        file_name = f'feat_dim{dim-9}-{dim}'
+    if feat_type:
+        feat_type_str = '-'.join(feat_type)
+        file_name = file_name + f'_{feat_type_str}'
+    save_path = os.path.join(save_dir, file_name + '.png')
+    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
+    plt.close()
+    return save_path
+def adjust_norm(image: torch.Tensor) -> torch.Tensor:
+    inv_normalize = tvf.Normalize(
+        mean=[-1, -1, -1],
+        std=[1/0.5, 1/0.5, 1/0.5]
+    )
+    correct_normalize = tvf.Normalize(
+        mean=[0.485, 0.456, 0.406],
+        std=[0.229, 0.224, 0.225]
+    )
+    image = inv_normalize(image)
+    image = correct_normalize(image)
+    return image
+def adjust_midas_norm(image: torch.Tensor) -> torch.Tensor:
+    inv_normalize = tvf.Normalize(
+        mean=[-1, -1, -1],
+        std=[1/0.5, 1/0.5, 1/0.5]
+    )
+    correct_normalize = tvf.Normalize(
+        mean=[0.5, 0.5, 0.5],
+        std=[0.5, 0.5, 0.5]
+    )
+    image = inv_normalize(image)
+    image = correct_normalize(image)
+    return image
+def adjust_clip_norm(image: torch.Tensor) -> torch.Tensor:
+    inv_normalize = tvf.Normalize(
+        mean=[-1, -1, -1],
+        std=[1/0.5, 1/0.5, 1/0.5]
+    )
+    correct_normalize = tvf.Normalize(
+        mean=[0.48145466, 0.4578275, 0.40821073],
+        std=[0.26862954, 0.26130258, 0.27577711]
+    )
+    image = inv_normalize(image)
+    image = correct_normalize(image)
+    return image
+class UnNormalize(object):
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+    def __call__(self, image):
+        image2 = torch.clone(image)
+        if len(image2.shape) == 4:
+            image2 = image2.permute(1, 0, 2, 3)
+        for t, m, s in zip(image2, self.mean, self.std):
+            t.mul_(s).add_(m)
+        return image2.permute(1, 0, 2, 3)
+norm = tvf.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+unnorm = UnNormalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+midas_norm = tvf.Normalize([0.5] * 3, [0.5] * 3)
+midas_unnorm = UnNormalize([0.5] * 3, [0.5] * 3)
+def generate_iuv(B, H, W):
+    i_coords = torch.arange(B).view(B, 1, 1, 1).expand(B, H, W, 1).float() / (B - 1)
+    u_coords = torch.linspace(0, 1, W).view(1, 1, W, 1).expand(B, H, W, 1)
+    v_coords = torch.linspace(0, 1, H).view(1, H, 1, 1).expand(B, H, W, 1)
+    iuv_coords = torch.cat([i_coords, u_coords, v_coords], dim=-1)
+    return iuv_coords
+class FeatureExtractor:
+    """
+    Extracts and processes features from images using VFMs for per point(per pixel).
+    Supports multiple VFM features, dimensionality reduction, feature upsampling, and visualization.
+    Parameters:
+        images (list): List of image info.
+        method (str): Pointmap Init method, choose in ["dust3r", "mast3r"].
+        device (str): 'cuda'.
+        feat_type (list): VFM, choose in ["dust3r", "mast3r", "dift", "dino_b16", "dinov2_b14", "radio", "clip_b16", "mae_b16", "midas_l16", "sam_base", "iuvrgb"].
+        feat_dim (int): PCA dimensions.
+        img_base_path (str): Training view data directory path.
+        model_path (str): Model path, './submodules/mast3r/checkpoints/'.
+        vis_feat (bool): Visualize and save feature maps.
+        vis_key (str): Feature type to visualize(only for mast3r), choose in ["decfeat", "desc"].
+        focal_avg (bool): Use averaging focal.
+    """
+    def __init__(self, images, args, method):
+        self.images = images
+        self.method = method
+        self.device = args.device
+        self.feat_type = args.feat_type
+        self.feat_dim = args.feat_dim
+        self.img_base_path = args.img_base_path
+        # self.use_featup = args.use_featup
+        self.model_path = args.model_path
+        self.vis_feat = args.vis_feat
+        self.vis_key = args.vis_key
+        self.focal_avg = args.focal_avg
+    def get_dust3r_feat(self, **kw):
+        model_path = os.path.join(self.model_path, "DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth")
+        model = AsymmetricCroCo3DStereo.from_pretrained(model_path).to(self.device)
+        output = inference(kw['pairs'], model, self.device, batch_size=1)
+        scene = global_aligner(output, device=self.device, mode=GlobalAlignerMode.PointCloudOptimizer)
+        if self.vis_key:
+            assert self.vis_key == 'decfeat', f"Expected vis_key to be 'decfeat', but got {self.vis_key}"
+            self.vis_decfeat(kw['pairs'], output=output)
+        # del model, output
+        # torch.cuda.empty_cache()
+        return scene.stacked_feat
+    def get_mast3r_feat(self, **kw):
+        model_path = os.path.join(self.model_path, "MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth")
+        model = AsymmetricMASt3R.from_pretrained(model_path).to(self.device)
+        cache_dir = os.path.join(self.img_base_path, "cache")
+        if os.path.exists(cache_dir):
+            os.system(f'rm -rf {cache_dir}')
+        scene = sparse_global_alignment(kw['train_img_list'], kw['pairs'], cache_dir,
+                                        model, lr1=0.07, niter1=500, lr2=0.014, niter2=200, device=self.device,
+                                        opt_depth='depth' in 'refine', shared_intrinsics=self.focal_avg,
+                                        matching_conf_thr=5.)
+        if self.vis_key:
+            assert self.vis_key in ['decfeat', 'desc'], f"Expected vis_key to be 'decfeat' or 'desc', but got {self.vis_key}"
+            self.vis_decfeat(kw['pairs'], model=model)
+        # del model
+        # torch.cuda.empty_cache()
+        return scene.stacked_feat
+    def get_feat(self, feat_type):
+        """
+        Get features using Probe3D.
+        """
+        cfg = OmegaConf.load(f"configs/backbone/{feat_type}.yaml")
+        model = instantiate(cfg.model, output="dense", return_multilayer=False)
+        model = model.to(self.device)
+        if 'midas' in feat_type:
+            image_norm = adjust_midas_norm(torch.cat([i['img'] for i in self.images])).to(self.device)
+        # elif 'clip' in self.feat_type:
+        #     image_norm = adjust_clip_norm(torch.cat([i['img'] for i in self.images])).to(self.device)
+        else:
+            image_norm = adjust_norm(torch.cat([i['img'] for i in self.images])).to(self.device)
+        with torch.no_grad():
+            feats = model(image_norm).permute(0, 2, 3, 1)
+        # del model
+        # torch.cuda.empty_cache()
+        return feats
+    # def get_feat(self, feat_type):
+    #     """
+    #     Get features using FeatUp.
+    #     """
+    #     original_feat_type = feat_type
+    #     use_norm = False if 'maskclip' in feat_type else True
+    #     if 'featup' in original_feat_type:
+    #         feat_type = feat_type.split('_featup')[0]
+    #     # feat_upsampler = torch.hub.load("mhamilton723/FeatUp", feat_type, use_norm=use_norm).to(device)
+    #     feat_upsampler = torch.hub.load("/home/chenyue/.cache/torch/hub/mhamilton723_FeatUp_main/", feat_type, use_norm=use_norm, source='local').to(self.device)     ## offline
+    #     image_norm = adjust_norm(torch.cat([i['img'] for i in self.images])).to(self.device)
+    #     image_norm = F.interpolate(image_norm, size=(224, 224), mode='bilinear', align_corners=False)
+    #     if 'featup' in original_feat_type:
+    #         feats = feat_upsampler(image_norm).permute(0, 2, 3, 1)
+    #     else:
+    #         feats = feat_upsampler.model(image_norm).permute(0, 2, 3, 1)
+    #     return feats
+    def get_iuvrgb(self):
+        rgb = torch.cat([i['img'] for i in self.images]).permute(0, 2, 3, 1).to(self.device)
+        feats = torch.cat([generate_iuv(*rgb.shape[:-1]).to(self.device), rgb], dim=-1)
+        return feats
+    def get_iuv(self):
+        rgb = torch.cat([i['img'] for i in self.images]).permute(0, 2, 3, 1).to(self.device)
+        feats = generate_iuv(*rgb.shape[:-1]).to(self.device)
+        return feats
+    def preprocess(self, feature, feat_dim, vis_feat=False, is_upsample=True):
+        """
+        Preprocess features by applying PCA, upsampling, and optionally visualizing.
+        """
+        if feat_dim:
+            feature = pca(feature, feat_dim)
+        # else:
+        #     feature_min = feature.min(dim=0, keepdim=True).values.min(dim=1, keepdim=True).values
+        #     feature_max = feature.max(dim=0, keepdim=True).values.max(dim=1, keepdim=True).values
+        #     feature = (feature - feature_min) / (feature_max - feature_min + 1e-6)
+        #     feature = feature - feature.mean(dim=[0,1,2], keepdim=True)
+        torch.cuda.empty_cache()
+        if (feature[0].shape[0:-1] != self.images[0]['true_shape'][0]).all() and is_upsample:
+            feature = upsampler(feature, *[s for s in self.images[0]['true_shape'][0]])
+        print(f"Feature map size >>> height: {feature[0].shape[0]}, width: {feature[0].shape[1]}, channels: {feature[0].shape[2]}")
+        if vis_feat:
+            save_path = visualizer(feature, self.images, self.img_base_path, feat_type=self.feat_type)
+            print(f"The encoder feature visualization has been saved at >>>>> {save_path}")
+        return feature
+    def vis_decfeat(self, pairs, **kw):
+        """
+        Visualize decoder or head(only for mast3r) features.
+        """
+        if 'output' in kw:
+            output = kw['output']
+        else:
+            output = inference(pairs, kw['model'], self.device, batch_size=1, verbose=False)
+        decfeat1 = output['pred1'][self.vis_key].detach()
+        decfeat2 = output['pred2'][self.vis_key].detach()
+        # decfeat1 = pca(decfeat1, 9)
+        # decfeat2 = pca(decfeat2, 9)
+        decfeat = torch.stack([decfeat1, decfeat2], dim=1).view(-1, *decfeat1.shape[1:])
+        decfeat = torch.cat(torch.chunk(decfeat,2)[::-1], dim=0)
+        decfeat = pca(decfeat, 9)
+        if (decfeat.shape[1:-1] != self.images[0]['true_shape'][0]).all():
+            decfeat = upsampler(decfeat, *[s for s in self.images[0]['true_shape'][0]])
+        pair_images = [im for p in pairs[3:] + pairs[:3] for im in p]
+        save_path = mv_visualizer(decfeat, pair_images, self.img_base_path,
+                                  feat_type=self.feat_type, file_name=f'{self.vis_key}_pcaall_dim0-9')
+        print(f"The decoder feature visualization has been saved at >>>>> {save_path}")
+    def forward(self, **kw):
+        feat_dim = self.feat_dim
+        vis_feat = self.vis_feat and len(self.feat_type) == 1
+        is_upsample = len(self.feat_type) == 1
+        all_feats = {}
+        for feat_type in self.feat_type:
+            if feat_type == self.method:
+                feats = kw['scene'].stacked_feat
+            elif feat_type in ['dust3r', 'mast3r']:
+                feats = getattr(self, f"get_{feat_type}_feat")(**kw)
+            elif feat_type in ['iuv', 'iuvrgb']:
+                feats = getattr(self, f"get_{feat_type}")()
+                feat_dim = None
+            else:
+                feats = self.get_feat(feat_type)
+            # feats = to_numpy(self.preprocess(feats))
+            all_feats[feat_type] = self.preprocess(feats.detach().clone(), feat_dim, vis_feat, is_upsample)
+        if len(self.feat_type) > 1:
+            all_feats = {k: (v - v.min()) / (v.max() - v.min()) for k, v in all_feats.items()}
+            target_size = tuple(s // 16 for s in self.images[0]['true_shape'][0][:2])
+            tmp_feats = []
+            kickoff = []
+            for k, v in all_feats.items():
+                if k in ['iuv', 'iuvrgb']:
+                    # self.feat_dim -= v.shape[-1]
+                    kickoff.append(v)
+                else:
+                    if v.shape[1:3] != target_size:
+                        v = F.interpolate(v.permute(0, 3, 1, 2), size=target_size,
+                                        mode='bilinear', align_corners=False).permute(0, 2, 3, 1)
+                    tmp_feats.append(v)
+            feats = self.preprocess(torch.cat(tmp_feats, dim=-1), self.feat_dim, self.vis_feat and not kickoff)
+            if kickoff:
+                feats = torch.cat([feats] + kickoff, dim=-1)
+                feats = self.preprocess(feats, self.feat_dim, self.vis_feat, is_upsample=False)
+        else:
+            feats = all_feats[self.feat_type[0]]
+        torch.cuda.empty_cache()
+        return to_numpy(feats)
+    def __call__(self, **kw):
+        return self.forward(**kw)
+class InitMethod:
+    """
+    Initialize pointmap and camera param via DUSt3R or MASt3R.
+    """
+    def __init__(self, args):
+        self.method = args.method
+        self.n_views = args.n_views
+        self.device = args.device
+        self.img_base_path = args.img_base_path
+        self.focal_avg = args.focal_avg
+        self.tsdf_thresh = args.tsdf_thresh
+        self.min_conf_thr = args.min_conf_thr
+        if self.method == 'dust3r':
+            self.model_path = os.path.join(args.model_path, "DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth")
+        else:
+            self.model_path = os.path.join(args.model_path, "MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth")
+    def get_dust3r(self):
+        return AsymmetricCroCo3DStereo.from_pretrained(self.model_path).to(self.device)
+    def get_mast3r(self):
+        return AsymmetricMASt3R.from_pretrained(self.model_path).to(self.device)
+    def infer_dust3r(self, **kw):
+        output = inference(kw['pairs'], kw['model'], self.device, batch_size=1)
+        scene = global_aligner(output, device=self.device, mode=GlobalAlignerMode.PointCloudOptimizer)
+        loss = compute_global_alignment(scene=scene, init="mst", niter=300, schedule='linear', lr=0.01,
+                                        focal_avg=self.focal_avg, known_focal=kw.get('known_focal', None))
+        scene = scene.clean_pointcloud()
+        return scene
+    def infer_mast3r(self, **kw):
+        cache_dir = os.path.join(self.img_base_path, "cache")
+        if os.path.exists(cache_dir):
+            os.system(f'rm -rf {cache_dir}')
+        scene = sparse_global_alignment(kw['train_img_list'], kw['pairs'], cache_dir,
+                                        kw['model'], lr1=0.07, niter1=500, lr2=0.014, niter2=200, device=self.device,
+                                        opt_depth='depth' in 'refine', shared_intrinsics=self.focal_avg,
+                                        matching_conf_thr=5.)
+        return scene
+    def get_dust3r_info(self, scene):
+        imgs = to_numpy(scene.imgs)
+        focals = scene.get_focals()
+        poses = to_numpy(scene.get_im_poses())
+        pts3d = to_numpy(scene.get_pts3d())
+        # pts3d = to_numpy(scene.get_planes3d())
+        scene.min_conf_thr = float(scene.conf_trf(torch.tensor(1.0)))
+        confidence_masks = to_numpy(scene.get_masks())
+        intrinsics = to_numpy(scene.get_intrinsics())
+        return imgs, focals, poses, intrinsics, pts3d, confidence_masks
+    def get_mast3r_info(self, scene):
+        imgs = to_numpy(scene.imgs)
+        focals = scene.get_focals()[:,None]
+        poses = to_numpy(scene.get_im_poses())
+        intrinsics = to_numpy(scene.intrinsics)
+        tsdf = TSDFPostProcess(scene, TSDF_thresh=self.tsdf_thresh)
+        pts3d, _, confs = to_numpy(tsdf.get_dense_pts3d(clean_depth=True))
+        pts3d = [arr.reshape((*imgs[0].shape[:2], 3)) for arr in pts3d]
+        confidence_masks = np.array(to_numpy([c > self.min_conf_thr for c in confs]))
+        return imgs, focals, poses, intrinsics, pts3d, confidence_masks
+    def get_dust3r_depth(self, scene):
+        return to_numpy(scene.get_depthmaps())
+    def get_mast3r_depth(self, scene):
+        imgs = to_numpy(scene.imgs)
+        tsdf = TSDFPostProcess(scene, TSDF_thresh=self.tsdf_thresh)
+        _, depthmaps, _ = to_numpy(tsdf.get_dense_pts3d(clean_depth=True))
+        depthmaps = [arr.reshape((*imgs[0].shape[:2], 3)) for arr in depthmaps]
+        return depthmaps
+    def get_model(self):
+        return getattr(self, f"get_{self.method}")()
+    def infer(self, **kw):
+        return getattr(self, f"infer_{self.method}")(**kw)
+    def get_info(self, scene):
+        return getattr(self, f"get_{self.method}_info")(scene)
+    def get_depth(self, scene):
+        return getattr(self, f"get_{self.method}_depth")(scene)

utils/general_utils.py ADDED Viewed

	@@ -0,0 +1,133 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import torch
+import sys
+from datetime import datetime
+import numpy as np
+import random
+def inverse_sigmoid(x):
+    return torch.log(x/(1-x))
+def PILtoTorch(pil_image, resolution):
+    resized_image_PIL = pil_image.resize(resolution)
+    resized_image = torch.from_numpy(np.array(resized_image_PIL)) / 255.0
+    if len(resized_image.shape) == 3:
+        return resized_image.permute(2, 0, 1)
+    else:
+        return resized_image.unsqueeze(dim=-1).permute(2, 0, 1)
+def get_expon_lr_func(
+    lr_init, lr_final, lr_delay_steps=0, lr_delay_mult=1.0, max_steps=1000000
+):
+    """
+    Copied from Plenoxels
+    Continuous learning rate decay function. Adapted from JaxNeRF
+    The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
+    is log-linearly interpolated elsewhere (equivalent to exponential decay).
+    If lr_delay_steps>0 then the learning rate will be scaled by some smooth
+    function of lr_delay_mult, such that the initial learning rate is
+    lr_init*lr_delay_mult at the beginning of optimization but will be eased back
+    to the normal learning rate when steps>lr_delay_steps.
+    :param conf: config subtree 'lr' or similar
+    :param max_steps: int, the number of steps during optimization.
+    :return HoF which takes step as input
+    """
+    def helper(step):
+        if step < 0 or (lr_init == 0.0 and lr_final == 0.0):
+            # Disable this parameter
+            return 0.0
+        if lr_delay_steps > 0:
+            # A kind of reverse cosine decay.
+            delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
+                0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1)
+            )
+        else:
+            delay_rate = 1.0
+        t = np.clip(step / max_steps, 0, 1)
+        log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
+        return delay_rate * log_lerp
+    return helper
+def strip_lowerdiag(L):
+    uncertainty = torch.zeros((L.shape[0], 6), dtype=torch.float, device="cuda")
+    uncertainty[:, 0] = L[:, 0, 0]
+    uncertainty[:, 1] = L[:, 0, 1]
+    uncertainty[:, 2] = L[:, 0, 2]
+    uncertainty[:, 3] = L[:, 1, 1]
+    uncertainty[:, 4] = L[:, 1, 2]
+    uncertainty[:, 5] = L[:, 2, 2]
+    return uncertainty
+def strip_symmetric(sym):
+    return strip_lowerdiag(sym)
+def build_rotation(r):
+    norm = torch.sqrt(r[:,0]*r[:,0] + r[:,1]*r[:,1] + r[:,2]*r[:,2] + r[:,3]*r[:,3])
+    q = r / norm[:, None]
+    R = torch.zeros((q.size(0), 3, 3), device='cuda')
+    r = q[:, 0]
+    x = q[:, 1]
+    y = q[:, 2]
+    z = q[:, 3]
+    R[:, 0, 0] = 1 - 2 * (y*y + z*z)
+    R[:, 0, 1] = 2 * (x*y - r*z)
+    R[:, 0, 2] = 2 * (x*z + r*y)
+    R[:, 1, 0] = 2 * (x*y + r*z)
+    R[:, 1, 1] = 1 - 2 * (x*x + z*z)
+    R[:, 1, 2] = 2 * (y*z - r*x)
+    R[:, 2, 0] = 2 * (x*z - r*y)
+    R[:, 2, 1] = 2 * (y*z + r*x)
+    R[:, 2, 2] = 1 - 2 * (x*x + y*y)
+    return R
+def build_scaling_rotation(s, r):
+    L = torch.zeros((s.shape[0], 3, 3), dtype=torch.float, device="cuda")
+    R = build_rotation(r)
+    L[:,0,0] = s[:,0]
+    L[:,1,1] = s[:,1]
+    L[:,2,2] = s[:,2]
+    L = R @ L
+    return L
+def safe_state(silent):
+    old_f = sys.stdout
+    class F:
+        def __init__(self, silent):
+            self.silent = silent
+        def write(self, x):
+            if not self.silent:
+                if x.endswith("\n"):
+                    old_f.write(x.replace("\n", " [{}]\n".format(str(datetime.now().strftime("%d/%m %H:%M:%S")))))
+                else:
+                    old_f.write(x)
+        def flush(self):
+            old_f.flush()
+    sys.stdout = F(silent)
+    random.seed(0)
+    np.random.seed(0)
+    torch.manual_seed(0)
+    torch.cuda.set_device(torch.device("cuda:0"))

utils/graphics_utils.py ADDED Viewed

	@@ -0,0 +1,210 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import torch
+import math
+import numpy as np
+from typing import NamedTuple
+import torch.nn.functional as F
+from torch import Tensor
+class BasicPointCloud(NamedTuple):
+    points : np.array
+    colors : np.array
+    normals : np.array
+    features: np.array
+def geom_transform_points(points, transf_matrix):
+    P, _ = points.shape
+    ones = torch.ones(P, 1, dtype=points.dtype, device=points.device)
+    points_hom = torch.cat([points, ones], dim=1)
+    points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0))
+    denom = points_out[..., 3:] + 0.0000001
+    return (points_out[..., :3] / denom).squeeze(dim=0)
+def getWorld2View(R, t):
+    Rt = np.zeros((4, 4))
+    Rt[:3, :3] = R.transpose()
+    Rt[:3, 3] = t
+    Rt[3, 3] = 1.0
+    return np.float32(Rt)
+def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0):
+    Rt = np.zeros((4, 4))
+    Rt[:3, :3] = R.transpose()
+    Rt[:3, 3] = t
+    Rt[3, 3] = 1.0
+    C2W = np.linalg.inv(Rt)
+    cam_center = C2W[:3, 3]
+    cam_center = (cam_center + translate) * scale
+    C2W[:3, 3] = cam_center
+    Rt = np.linalg.inv(C2W)
+    return np.float32(Rt)
+def getWorld2View2_torch(R, t, translate=torch.tensor([0.0, 0.0, 0.0]), scale=1.0):
+    translate = torch.tensor(translate, dtype=torch.float32)
+    # Initialize the transformation matrix
+    Rt = torch.zeros((4, 4), dtype=torch.float32)
+    Rt[:3, :3] = R.t()  # Transpose of R
+    Rt[:3, 3] = t
+    Rt[3, 3] = 1.0
+    # Compute the inverse to get the camera-to-world transformation
+    C2W = torch.linalg.inv(Rt)
+    cam_center = C2W[:3, 3]
+    cam_center = (cam_center + translate) * scale
+    C2W[:3, 3] = cam_center
+    # Invert again to get the world-to-view transformation
+    Rt = torch.linalg.inv(C2W)
+    return Rt
+def getProjectionMatrix(znear, zfar, fovX, fovY):
+    tanHalfFovY = math.tan((fovY / 2))
+    tanHalfFovX = math.tan((fovX / 2))
+    top = tanHalfFovY * znear
+    bottom = -top
+    right = tanHalfFovX * znear
+    left = -right
+    P = torch.zeros(4, 4)
+    z_sign = 1.0
+    P[0, 0] = 2.0 * znear / (right - left)
+    P[1, 1] = 2.0 * znear / (top - bottom)
+    P[0, 2] = (right + left) / (right - left)
+    P[1, 2] = (top + bottom) / (top - bottom)
+    P[3, 2] = z_sign
+    P[2, 2] = z_sign * zfar / (zfar - znear)
+    P[2, 3] = -(zfar * znear) / (zfar - znear)
+    return P
+def fov2focal(fov, pixels):
+    return pixels / (2 * math.tan(fov / 2))
+def focal2fov(focal, pixels):
+    return 2*math.atan(pixels/(2*focal))
+def resize_render(view, size=None):
+    image_size = size if size is not None else max(view.image_width, view.image_height)
+    view.original_image = torch.zeros((3, image_size, image_size), device=view.original_image.device)
+    focal_length_x = fov2focal(view.FoVx, view.image_width)
+    focal_length_y = fov2focal(view.FoVy, view.image_height)
+    view.image_width = image_size
+    view.image_height = image_size
+    view.FoVx = focal2fov(focal_length_x, image_size)
+    view.FoVy = focal2fov(focal_length_y, image_size)
+    return view
+def make_video_divisble(
+    video: torch.Tensor | np.ndarray, block_size=16
+) -> torch.Tensor | np.ndarray:
+    H, W = video.shape[1:3]
+    H_new = H - H % block_size
+    W_new = W - W % block_size
+    return video[:, :H_new, :W_new]
+def depth_to_points(
+    depths: Tensor, camtoworlds: Tensor, Ks: Tensor, z_depth: bool = True
+) -> Tensor:
+    """Convert depth maps to 3D points
+    Args:
+        depths: Depth maps [..., H, W, 1]
+        camtoworlds: Camera-to-world transformation matrices [..., 4, 4]
+        Ks: Camera intrinsics [..., 3, 3]
+        z_depth: Whether the depth is in z-depth (True) or ray depth (False)
+    Returns:
+        points: 3D points in the world coordinate system [..., H, W, 3]
+    """
+    assert depths.shape[-1] == 1, f"Invalid depth shape: {depths.shape}"
+    assert camtoworlds.shape[-2:] == (
+        4,
+        4,
+    ), f"Invalid viewmats shape: {camtoworlds.shape}"
+    assert Ks.shape[-2:] == (3, 3), f"Invalid Ks shape: {Ks.shape}"
+    assert (
+        depths.shape[:-3] == camtoworlds.shape[:-2] == Ks.shape[:-2]
+    ), f"Shape mismatch! depths: {depths.shape}, viewmats: {camtoworlds.shape}, Ks: {Ks.shape}"
+    device = depths.device
+    height, width = depths.shape[-3:-1]
+    x, y = torch.meshgrid(
+        torch.arange(width, device=device),
+        torch.arange(height, device=device),
+        indexing="xy",
+    )  # [H, W]
+    fx = Ks[..., 0, 0]  # [...]
+    fy = Ks[..., 1, 1]  # [...]
+    cx = Ks[..., 0, 2]  # [...]
+    cy = Ks[..., 1, 2]  # [...]
+    # camera directions in camera coordinates
+    camera_dirs = F.pad(
+        torch.stack(
+            [
+                (x - cx[..., None, None] + 0.5) / fx[..., None, None],
+                (y - cy[..., None, None] + 0.5) / fy[..., None, None],
+            ],
+            dim=-1,
+        ),
+        (0, 1),
+        value=1.0,
+    )  # [..., H, W, 3]
+    # ray directions in world coordinates
+    directions = torch.einsum(
+        "...ij,...hwj->...hwi", camtoworlds[..., :3, :3], camera_dirs
+    )  # [..., H, W, 3]
+    origins = camtoworlds[..., :3, -1]  # [..., 3]
+    if not z_depth:
+        directions = F.normalize(directions, dim=-1)
+    points = origins[..., None, None, :] + depths * directions
+    return points
+def depth_to_normal(
+    depths: Tensor, camtoworlds: Tensor, Ks: Tensor, z_depth: bool = True
+) -> Tensor:
+    """Convert depth maps to surface normals
+    Args:
+        depths: Depth maps [..., H, W, 1]
+        camtoworlds: Camera-to-world transformation matrices [..., 4, 4]
+        Ks: Camera intrinsics [..., 3, 3]
+        z_depth: Whether the depth is in z-depth (True) or ray depth (False)
+    Returns:
+        normals: Surface normals in the world coordinate system [..., H, W, 3]
+    """
+    points = depth_to_points(depths, camtoworlds, Ks, z_depth=z_depth)  # [..., H, W, 3]
+    dx = torch.cat(
+        [points[..., 2:, 1:-1, :] - points[..., :-2, 1:-1, :]], dim=-3
+    )  # [..., H-2, W-2, 3]
+    dy = torch.cat(
+        [points[..., 1:-1, 2:, :] - points[..., 1:-1, :-2, :]], dim=-2
+    )  # [..., H-2, W-2, 3]
+    normals = F.normalize(torch.cross(dx, dy, dim=-1), dim=-1)  # [..., H-2, W-2, 3]
+    normals = F.pad(normals, (0, 0, 1, 1, 1, 1), value=0.0)  # [..., H, W, 3]
+    return normals

utils/image_utils.py ADDED Viewed

	@@ -0,0 +1,118 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import torch
+def mse(img1, img2):
+    return (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
+def psnr(img1, img2):
+    mse = (((img1 - img2)) ** 2).view(img1.shape[0], -1).mean(1, keepdim=True)
+    return 20 * torch.log10(1.0 / torch.sqrt(mse))
+def masked_psnr(img1, img2, mask):
+    mse = ((((img1 - img2)) ** 2) * mask).sum() / (3. * mask.sum())
+    return 20 * torch.log10(1.0 / torch.sqrt(mse))
+def accuracy_torch(gt_points, rec_points, gt_normals=None, rec_normals=None, batch_size=5000):
+    n_points = rec_points.shape[0]
+    all_distances = []
+    all_indices = []
+    for i in range(0, n_points, batch_size):
+        end_idx = min(i + batch_size, n_points)
+        batch_points = rec_points[i:end_idx]
+        distances = torch.cdist(batch_points, gt_points)  # (batch_size, M)
+        batch_distances, batch_indices = torch.min(distances, dim=1)  # (batch_size,)
+        all_distances.append(batch_distances)
+        all_indices.append(batch_indices)
+    distances = torch.cat(all_distances)
+    indices = torch.cat(all_indices)
+    acc = torch.mean(distances)
+    acc_median = torch.median(distances)
+    if gt_normals is not None and rec_normals is not None:
+        normal_dot = torch.sum(gt_normals[indices] * rec_normals, dim=-1)
+        normal_dot = torch.abs(normal_dot)
+        return acc, acc_median, torch.mean(normal_dot), torch.median(normal_dot)
+    return acc, acc_median
+def completion_torch(gt_points, rec_points, gt_normals=None, rec_normals=None, batch_size=5000):
+    n_points = gt_points.shape[0]
+    all_distances = []
+    all_indices = []
+    for i in range(0, n_points, batch_size):
+        end_idx = min(i + batch_size, n_points)
+        batch_points = gt_points[i:end_idx]
+        distances = torch.cdist(batch_points, rec_points)  # (batch_size, M)
+        batch_distances, batch_indices = torch.min(distances, dim=1)  # (batch_size,)
+        all_distances.append(batch_distances)
+        all_indices.append(batch_indices)
+    distances = torch.cat(all_distances)
+    indices = torch.cat(all_indices)
+    comp = torch.mean(distances)
+    comp_median = torch.median(distances)
+    if gt_normals is not None and rec_normals is not None:
+        normal_dot = torch.sum(gt_normals * rec_normals[indices], dim=-1)
+        normal_dot = torch.abs(normal_dot)
+        return comp, comp_median, torch.mean(normal_dot), torch.median(normal_dot)
+    return comp, comp_median
+def accuracy_per_point(gt_points, rec_points, batch_size=5000):
+    n_points = rec_points.shape[0]
+    all_distances = []
+    all_indices = []
+    for i in range(0, n_points, batch_size):
+        end_idx = min(i + batch_size, n_points)
+        batch_points = rec_points[i:end_idx]
+        distances = torch.cdist(batch_points, gt_points)  # (batch_size, M)
+        batch_distances, batch_indices = torch.min(distances, dim=1)  # (batch_size,)
+        all_distances.append(batch_distances)
+        all_indices.append(batch_indices)
+    distances = torch.cat(all_distances)
+    return distances
+def completion_per_point(gt_points, rec_points, batch_size=5000):
+    n_points = gt_points.shape[0]
+    all_distances = []
+    all_indices = []
+    for i in range(0, n_points, batch_size):
+        end_idx = min(i + batch_size, n_points)
+        batch_points = gt_points[i:end_idx]
+        distances = torch.cdist(batch_points, rec_points)  # (batch_size, M)
+        batch_distances, batch_indices = torch.min(distances, dim=1)  # (batch_size,)
+        all_distances.append(batch_distances)
+        all_indices.append(batch_indices)
+    distances = torch.cat(all_distances)
+    return distances

utils/loss_utils.py ADDED Viewed

	@@ -0,0 +1,247 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+import torch
+import torch.nn.functional as F
+from torch.autograd import Variable
+from math import exp
+import einops
+def l1_loss(network_output, gt):
+    return torch.abs((network_output - gt)).mean()
+def l2_loss(network_output, gt):
+    return ((network_output - gt) ** 2).mean()
+def gaussian(window_size, sigma):
+    gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
+    return gauss / gauss.sum()
+def create_window(window_size, channel):
+    _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
+    _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+    window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
+    return window
+def masked_ssim(img1, img2, mask):
+    ssim_map = ssim(img1, img2, get_ssim_map=True)
+    return (ssim_map * mask).sum() / (3. * mask.sum())
+def ssim(img1, img2, window_size=11, size_average=True, get_ssim_map=False):
+    channel = img1.size(-3)
+    window = create_window(window_size, channel)
+    if img1.is_cuda:
+        window = window.cuda(img1.get_device())
+    window = window.type_as(img1)
+    return _ssim(img1, img2, window, window_size, channel, size_average, get_ssim_map)
+def _ssim(img1, img2, window, window_size, channel, size_average=True, get_ssim_map=False):
+    mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
+    mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
+    mu1_sq = mu1.pow(2)
+    mu2_sq = mu2.pow(2)
+    mu1_mu2 = mu1 * mu2
+    sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
+    sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
+    sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
+    C1 = 0.01 ** 2
+    C2 = 0.03 ** 2
+    ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
+    if get_ssim_map:
+        return ssim_map
+    elif size_average:
+        return ssim_map.mean()
+    else:
+        return ssim_map.mean(1).mean(1).mean(1)
+# --- Projections ---
+def homogenize_points(points):
+    """Append a '1' along the final dimension of the tensor (i.e. convert xyz->xyz1)"""
+    return torch.cat([points, torch.ones_like(points[..., :1])], dim=-1)
+def normalize_homogenous_points(points):
+    """Normalize the point vectors"""
+    return points / points[..., -1:]
+def pixel_space_to_camera_space(pixel_space_points, depth, intrinsics):
+    """
+    Convert pixel space points to camera space points.
+    Args:
+        pixel_space_points (torch.Tensor): Pixel space points with shape (h, w, 2)
+        depth (torch.Tensor): Depth map with shape (b, v, h, w, 1)
+        intrinsics (torch.Tensor): Camera intrinsics with shape (b, v, 3, 3)
+    Returns:
+        torch.Tensor: Camera space points with shape (b, v, h, w, 3).
+    """
+    pixel_space_points = homogenize_points(pixel_space_points)
+    camera_space_points = torch.einsum('b v i j , h w j -> b v h w i', intrinsics.inverse(), pixel_space_points)
+    camera_space_points = camera_space_points * depth
+    return camera_space_points
+def camera_space_to_world_space(camera_space_points, c2w):
+    """
+    Convert camera space points to world space points.
+    Args:
+        camera_space_points (torch.Tensor): Camera space points with shape (b, v, h, w, 3)
+        c2w (torch.Tensor): Camera to world extrinsics matrix with shape (b, v, 4, 4)
+    Returns:
+        torch.Tensor: World space points with shape (b, v, h, w, 3).
+    """
+    camera_space_points = homogenize_points(camera_space_points)
+    world_space_points = torch.einsum('b v i j , b v h w j -> b v h w i', c2w, camera_space_points)
+    return world_space_points[..., :3]
+def camera_space_to_pixel_space(camera_space_points, intrinsics):
+    """
+    Convert camera space points to pixel space points.
+    Args:
+        camera_space_points (torch.Tensor): Camera space points with shape (b, v1, v2, h, w, 3)
+        c2w (torch.Tensor): Camera to world extrinsics matrix with shape (b, v2, 3, 3)
+    Returns:
+        torch.Tensor: World space points with shape (b, v1, v2, h, w, 2).
+    """
+    camera_space_points = normalize_homogenous_points(camera_space_points)
+    pixel_space_points = torch.einsum('b u i j , b v u h w j -> b v u h w i', intrinsics, camera_space_points)
+    return pixel_space_points[..., :2]
+def world_space_to_camera_space(world_space_points, c2w):
+    """
+    Convert world space points to pixel space points.
+    Args:
+        world_space_points (torch.Tensor): World space points with shape (b, v1, h, w, 3)
+        c2w (torch.Tensor): Camera to world extrinsics matrix with shape (b, v2, 4, 4)
+    Returns:
+        torch.Tensor: Camera space points with shape (b, v1, v2, h, w, 3).
+    """
+    world_space_points = homogenize_points(world_space_points)
+    camera_space_points = torch.einsum('b u i j , b v h w j -> b v u h w i', c2w.inverse(), world_space_points)
+    return camera_space_points[..., :3]
+def unproject_depth(depth, intrinsics, c2w):
+    """
+    Turn the depth map into a 3D point cloud in world space
+    Args:
+        depth: (b, v, h, w, 1)
+        intrinsics: (b, v, 3, 3)
+        c2w: (b, v, 4, 4)
+    Returns:
+        torch.Tensor: World space points with shape (b, v, h, w, 3).
+    """
+    # Compute indices of pixels
+    h, w = depth.shape[-3], depth.shape[-2]
+    x_grid, y_grid = torch.meshgrid(
+        torch.arange(w, device=depth.device, dtype=torch.float32),
+        torch.arange(h, device=depth.device, dtype=torch.float32),
+        indexing='xy'
+    )  # (h, w), (h, w)
+    # Compute coordinates of pixels in camera space
+    pixel_space_points = torch.stack((x_grid, y_grid), dim=-1)  # (..., h, w, 2)
+    camera_points = pixel_space_to_camera_space(pixel_space_points, depth, intrinsics)  # (..., h, w, 3)
+    # Convert points to world space
+    world_points = camera_space_to_world_space(camera_points, c2w)  # (..., h, w, 3)
+    return world_points
+@torch.no_grad()
+def calculate_in_frustum_mask(depth_1, intrinsics_1, c2w_1, depth_2, intrinsics_2, c2w_2, atol=1e-2):
+    """
+    A function that takes in the depth, intrinsics and c2w matrices of two sets
+    of views, and then works out which of the pixels in the first set of views
+    has a direct corresponding pixel in any of views in the second set
+    Args:
+        depth_1: (b, v1, h, w)
+        intrinsics_1: (b, v1, 3, 3)
+        c2w_1: (b, v1, 4, 4)
+        depth_2: (b, v2, h, w)
+        intrinsics_2: (b, v2, 3, 3)
+        c2w_2: (b, v2, 4, 4)
+    Returns:
+        torch.Tensor: Camera space points with shape (b, v1, h, w).
+    """
+    _, v1, h, w = depth_1.shape
+    _, v2, _, _ = depth_2.shape
+    # Unproject the depth to get the 3D points in world space
+    points_3d = unproject_depth(depth_1[..., None], intrinsics_1, c2w_1)  # (b, v1, h, w, 3)
+    # Project the 3D points into the pixel space of all the second views simultaneously
+    camera_points = world_space_to_camera_space(points_3d, c2w_2)  # (b, v1, v2, h, w, 3)
+    points_2d = camera_space_to_pixel_space(camera_points, intrinsics_2)  # (b, v1, v2, h, w, 2)
+    # Calculate the depth of each point
+    rendered_depth = camera_points[..., 2]  # (b, v1, v2, h, w)
+    # We use three conditions to determine if a point should be masked
+    # Condition 1: Check if the points are in the frustum of any of the v2 views
+    in_frustum_mask = (
+        (points_2d[..., 0] > 0) &
+        (points_2d[..., 0] < w) &
+        (points_2d[..., 1] > 0) &
+        (points_2d[..., 1] < h)
+    )  # (b, v1, v2, h, w)
+    in_frustum_mask = in_frustum_mask.any(dim=-3)  # (b, v1, h, w)
+    # Condition 2: Check if the points have non-zero (i.e. valid) depth in the input view
+    non_zero_depth = depth_1 > 1e-6
+    # Condition 3: Check if the points have matching depth to any of the v2
+    # views torch.nn.functional.grid_sample expects the input coordinates to
+    # be normalized to the range [-1, 1], so we normalize first
+    points_2d[..., 0] /= w
+    points_2d[..., 1] /= h
+    points_2d = points_2d * 2 - 1
+    matching_depth = torch.ones_like(rendered_depth, dtype=torch.bool)
+    for b in range(depth_1.shape[0]):
+        for i in range(v1):
+            for j in range(v2):
+                depth = einops.rearrange(depth_2[b, j], 'h w -> 1 1 h w')
+                coords = einops.rearrange(points_2d[b, i, j], 'h w c -> 1 h w c')
+                sampled_depths = torch.nn.functional.grid_sample(depth, coords, align_corners=False)[0, 0]
+                matching_depth[b, i, j] = torch.isclose(rendered_depth[b, i, j], sampled_depths, atol=atol)
+    matching_depth = matching_depth.any(dim=-3)  # (..., v1, h, w)
+    mask = in_frustum_mask & non_zero_depth & matching_depth
+    return mask

utils/pose_utils.py ADDED Viewed

	@@ -0,0 +1,570 @@

+import math
+import numpy as np
+import torch
+import torch.nn.functional as F
+from typing import Tuple
+from utils.stepfun import sample_np, sample
+import scipy
+def quad2rotation(q):
+    """
+    Convert quaternion to rotation in batch. Since all operation in pytorch, support gradient passing.
+    Args:
+        quad (tensor, batch_size*4): quaternion.
+    Returns:
+        rot_mat (tensor, batch_size*3*3): rotation.
+    """
+    # bs = quad.shape[0]
+    # qr, qi, qj, qk = quad[:, 0], quad[:, 1], quad[:, 2], quad[:, 3]
+    # two_s = 2.0 / (quad * quad).sum(-1)
+    # rot_mat = torch.zeros(bs, 3, 3).to(quad.get_device())
+    # rot_mat[:, 0, 0] = 1 - two_s * (qj**2 + qk**2)
+    # rot_mat[:, 0, 1] = two_s * (qi * qj - qk * qr)
+    # rot_mat[:, 0, 2] = two_s * (qi * qk + qj * qr)
+    # rot_mat[:, 1, 0] = two_s * (qi * qj + qk * qr)
+    # rot_mat[:, 1, 1] = 1 - two_s * (qi**2 + qk**2)
+    # rot_mat[:, 1, 2] = two_s * (qj * qk - qi * qr)
+    # rot_mat[:, 2, 0] = two_s * (qi * qk - qj * qr)
+    # rot_mat[:, 2, 1] = two_s * (qj * qk + qi * qr)
+    # rot_mat[:, 2, 2] = 1 - two_s * (qi**2 + qj**2)
+    # return rot_mat
+    if not isinstance(q, torch.Tensor):
+        q = torch.tensor(q).cuda()
+    norm = torch.sqrt(
+        q[:, 0] * q[:, 0] + q[:, 1] * q[:, 1] + q[:, 2] * q[:, 2] + q[:, 3] * q[:, 3]
+    )
+    q = q / norm[:, None]
+    rot = torch.zeros((q.size(0), 3, 3)).to(q)
+    r = q[:, 0]
+    x = q[:, 1]
+    y = q[:, 2]
+    z = q[:, 3]
+    rot[:, 0, 0] = 1 - 2 * (y * y + z * z)
+    rot[:, 0, 1] = 2 * (x * y - r * z)
+    rot[:, 0, 2] = 2 * (x * z + r * y)
+    rot[:, 1, 0] = 2 * (x * y + r * z)
+    rot[:, 1, 1] = 1 - 2 * (x * x + z * z)
+    rot[:, 1, 2] = 2 * (y * z - r * x)
+    rot[:, 2, 0] = 2 * (x * z - r * y)
+    rot[:, 2, 1] = 2 * (y * z + r * x)
+    rot[:, 2, 2] = 1 - 2 * (x * x + y * y)
+    return rot
+def get_camera_from_tensor(inputs):
+    """
+    Convert quaternion and translation to transformation matrix.
+    """
+    if not isinstance(inputs, torch.Tensor):
+        inputs = torch.tensor(inputs).cuda()
+    N = len(inputs.shape)
+    if N == 1:
+        inputs = inputs.unsqueeze(0)
+    # quad, T = inputs[:, :4], inputs[:, 4:]
+    # # normalize quad
+    # quad = F.normalize(quad)
+    # R = quad2rotation(quad)
+    # RT = torch.cat([R, T[:, :, None]], 2)
+    # # Add homogenous row
+    # homogenous_row = torch.tensor([0, 0, 0, 1]).cuda()
+    # RT = torch.cat([RT, homogenous_row[None, None, :].repeat(N, 1, 1)], 1)
+    # if N == 1:
+    #     RT = RT[0]
+    # return RT
+    quad, T = inputs[:, :4], inputs[:, 4:]
+    w2c = torch.eye(4).to(inputs).float()
+    w2c[:3, :3] = quad2rotation(quad)
+    w2c[:3, 3] = T
+    return w2c
+def quadmultiply(q1, q2):
+    """
+    Multiply two quaternions together using quaternion arithmetic
+    """
+    # Extract scalar and vector parts of the quaternions
+    w1, x1, y1, z1 = q1.unbind(dim=-1)
+    w2, x2, y2, z2 = q2.unbind(dim=-1)
+    # Calculate the quaternion product
+    result_quaternion = torch.stack(
+        [
+            w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2,
+            w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2,
+            w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2,
+            w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2,
+        ],
+        dim=-1,
+    )
+    return result_quaternion
+def _sqrt_positive_part(x: torch.Tensor) -> torch.Tensor:
+    """
+    Returns torch.sqrt(torch.max(0, x))
+    but with a zero subgradient where x is 0.
+    Source: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html#matrix_to_quaternion
+    """
+    ret = torch.zeros_like(x)
+    positive_mask = x > 0
+    ret[positive_mask] = torch.sqrt(x[positive_mask])
+    return ret
+def rotation2quad(matrix: torch.Tensor) -> torch.Tensor:
+    """
+    Convert rotations given as rotation matrices to quaternions.
+    Args:
+        matrix: Rotation matrices as tensor of shape (..., 3, 3).
+    Returns:
+        quaternions with real part first, as tensor of shape (..., 4).
+    Source: https://pytorch3d.readthedocs.io/en/latest/_modules/pytorch3d/transforms/rotation_conversions.html#matrix_to_quaternion
+    """
+    if matrix.size(-1) != 3 or matrix.size(-2) != 3:
+        raise ValueError(f"Invalid rotation matrix shape {matrix.shape}.")
+    if not isinstance(matrix, torch.Tensor):
+        matrix = torch.tensor(matrix).cuda()
+    batch_dim = matrix.shape[:-2]
+    m00, m01, m02, m10, m11, m12, m20, m21, m22 = torch.unbind(
+        matrix.reshape(batch_dim + (9,)), dim=-1
+    )
+    q_abs = _sqrt_positive_part(
+        torch.stack(
+            [
+                1.0 + m00 + m11 + m22,
+                1.0 + m00 - m11 - m22,
+                1.0 - m00 + m11 - m22,
+                1.0 - m00 - m11 + m22,
+            ],
+            dim=-1,
+        )
+    )
+    # we produce the desired quaternion multiplied by each of r, i, j, k
+    quat_by_rijk = torch.stack(
+        [
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1),
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1),
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1),
+            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
+            #  `int`.
+            torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1),
+        ],
+        dim=-2,
+    )
+    # We floor here at 0.1 but the exact level is not important; if q_abs is small,
+    # the candidate won't be picked.
+    flr = torch.tensor(0.1).to(dtype=q_abs.dtype, device=q_abs.device)
+    quat_candidates = quat_by_rijk / (2.0 * q_abs[..., None].max(flr))
+    # if not for numerical problems, quat_candidates[i] should be same (up to a sign),
+    # forall i; we pick the best-conditioned one (with the largest denominator)
+    return quat_candidates[
+        F.one_hot(q_abs.argmax(dim=-1), num_classes=4) > 0.5, :
+    ].reshape(batch_dim + (4,))
+def get_tensor_from_camera(RT, Tquad=False):
+    """
+    Convert transformation matrix to quaternion and translation.
+    """
+    # gpu_id = -1
+    # if type(RT) == torch.Tensor:
+    #     if RT.get_device() != -1:
+    #         gpu_id = RT.get_device()
+    #         RT = RT.detach().cpu()
+    #     RT = RT.numpy()
+    # from mathutils import Matrix
+    #
+    # R, T = RT[:3, :3], RT[:3, 3]
+    # rot = Matrix(R)
+    # quad = rot.to_quaternion()
+    # if Tquad:
+    #     tensor = np.concatenate([T, quad], 0)
+    # else:
+    #     tensor = np.concatenate([quad, T], 0)
+    # tensor = torch.from_numpy(tensor).float()
+    # if gpu_id != -1:
+    #     tensor = tensor.to(gpu_id)
+    # return tensor
+    if not isinstance(RT, torch.Tensor):
+        RT = torch.tensor(RT).cuda()
+    rot = RT[:3, :3].unsqueeze(0).detach()
+    quat = rotation2quad(rot).squeeze()
+    tran = RT[:3, 3].detach()
+    return torch.cat([quat, tran])
+def normalize(x):
+    return x / np.linalg.norm(x)
+def viewmatrix(lookdir, up, position, subtract_position=False):
+  """Construct lookat view matrix."""
+  vec2 = normalize((lookdir - position) if subtract_position else lookdir)
+  vec0 = normalize(np.cross(up, vec2))
+  vec1 = normalize(np.cross(vec2, vec0))
+  m = np.stack([vec0, vec1, vec2, position], axis=1)
+  return m
+def poses_avg(poses):
+  """New pose using average position, z-axis, and up vector of input poses."""
+  position = poses[:, :3, 3].mean(0)
+  z_axis = poses[:, :3, 2].mean(0)
+  up = poses[:, :3, 1].mean(0)
+  cam2world = viewmatrix(z_axis, up, position)
+  return cam2world
+def focus_point_fn(poses):
+    """Calculate nearest point to all focal axes in poses."""
+    directions, origins = poses[:, :3, 2:3], poses[:, :3, 3:4]
+    m = np.eye(3) - directions * np.transpose(directions, [0, 2, 1])
+    mt_m = np.transpose(m, [0, 2, 1]) @ m
+    focus_pt = np.linalg.inv(mt_m.mean(0)) @ (mt_m @ origins).mean(0)[:, 0]
+    return focus_pt
+def pad_poses(p):
+    """Pad [..., 3, 4] pose matrices with a homogeneous bottom row [0,0,0,1]."""
+    bottom = np.broadcast_to([0, 0, 0, 1.], p[..., :1, :4].shape)
+    return np.concatenate([p[..., :3, :4], bottom], axis=-2)
+def unpad_poses(p):
+    """Remove the homogeneous bottom row from [..., 4, 4] pose matrices."""
+    return p[..., :3, :4]
+def transform_poses_pca(poses):
+    """Transforms poses so principal components lie on XYZ axes.
+  Args:
+    poses: a (N, 3, 4) array containing the cameras' camera to world transforms.
+  Returns:
+    A tuple (poses, transform), with the transformed poses and the applied
+    camera_to_world transforms.
+  """
+    t = poses[:, :3, 3]
+    t_mean = t.mean(axis=0)
+    t = t - t_mean
+    eigval, eigvec = np.linalg.eig(t.T @ t)
+    # Sort eigenvectors in order of largest to smallest eigenvalue.
+    inds = np.argsort(eigval)[::-1]
+    eigvec = eigvec[:, inds]
+    rot = eigvec.T
+    if np.linalg.det(rot) < 0:
+        rot = np.diag(np.array([1, 1, -1])) @ rot
+    transform = np.concatenate([rot, rot @ -t_mean[:, None]], -1)
+    poses_recentered = unpad_poses(transform @ pad_poses(poses))
+    transform = np.concatenate([transform, np.eye(4)[3:]], axis=0)
+    # Flip coordinate system if z component of y-axis is negative
+    if poses_recentered.mean(axis=0)[2, 1] < 0:
+        poses_recentered = np.diag(np.array([1, -1, -1])) @ poses_recentered
+        transform = np.diag(np.array([1, -1, -1, 1])) @ transform
+    # Just make sure it's it in the [-1, 1]^3 cube
+    scale_factor = 1. / np.max(np.abs(poses_recentered[:, :3, 3]))
+    poses_recentered[:, :3, 3] *= scale_factor
+    transform = np.diag(np.array([scale_factor] * 3 + [1])) @ transform
+    return poses_recentered, transform
+def recenter_poses(poses: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+  """Recenter poses around the origin."""
+  cam2world = poses_avg(poses)
+  transform = np.linalg.inv(pad_poses(cam2world))
+  poses = transform @ pad_poses(poses)
+  return unpad_poses(poses), transform
+def generate_ellipse_path(views, n_frames=600, const_speed=True, z_variation=0., z_phase=0.):
+    poses = []
+    for view in views:
+        tmp_view = np.eye(4)
+        tmp_view[:3] = np.concatenate([view.R.T, view.T[:, None]], 1)
+        tmp_view = np.linalg.inv(tmp_view)
+        tmp_view[:, 1:3] *= -1
+        poses.append(tmp_view)
+    poses = np.stack(poses, 0)
+    poses, transform = transform_poses_pca(poses)
+    # Calculate the focal point for the path (cameras point toward this).
+    center = focus_point_fn(poses)
+    # Path height sits at z=0 (in middle of zero-mean capture pattern).
+    offset = np.array([center[0] , center[1],  0 ])
+    # Calculate scaling for ellipse axes based on input camera positions.
+    sc = np.percentile(np.abs(poses[:, :3, 3] - offset), 90, axis=0)
+    # Use ellipse that is symmetric about the focal point in xy.
+    low = -sc + offset
+    high = sc + offset
+    # Optional height variation need not be symmetric
+    z_low = np.percentile((poses[:, :3, 3]), 10, axis=0)
+    z_high = np.percentile((poses[:, :3, 3]), 90, axis=0)
+    def get_positions(theta):
+        # Interpolate between bounds with trig functions to get ellipse in x-y.
+        # Optionally also interpolate in z to change camera height along path.
+        return np.stack([
+            (low[0] + (high - low)[0] * (np.cos(theta) * .5 + .5)),
+            (low[1] + (high - low)[1] * (np.sin(theta) * .5 + .5)),
+            z_variation * (z_low[2] + (z_high - z_low)[2] *
+                           (np.cos(theta + 2 * np.pi * z_phase) * .5 + .5)),
+        ], -1)
+    theta = np.linspace(0, 2. * np.pi, n_frames + 1, endpoint=True)
+    positions = get_positions(theta)
+    if const_speed:
+        # Resample theta angles so that the velocity is closer to constant.
+        lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1)
+        theta = sample_np(None, theta, np.log(lengths), n_frames + 1)
+        positions = get_positions(theta)
+    # Throw away duplicated last position.
+    positions = positions[:-1]
+    # Set path's up vector to axis closest to average of input pose up vectors.
+    avg_up = poses[:, :3, 1].mean(0)
+    avg_up = avg_up / np.linalg.norm(avg_up)
+    ind_up = np.argmax(np.abs(avg_up))
+    up = np.eye(3)[ind_up] * np.sign(avg_up[ind_up])
+    # up = normalize(poses[:, :3, 1].sum(0))
+    render_poses = []
+    for p in positions:
+        render_pose = np.eye(4)
+        render_pose[:3] = viewmatrix(p - center, up, p)
+        render_pose = np.linalg.inv(transform) @ render_pose
+        render_pose[:3, 1:3] *= -1
+        render_poses.append(np.linalg.inv(render_pose))
+    return render_poses
+def generate_spiral_path(poses_arr,
+                         n_frames: int = 180,
+                         n_rots: int = 2,
+                         zrate: float = .5) -> np.ndarray:
+  """Calculates a forward facing spiral path for rendering."""
+  poses = poses_arr[:, :-2].reshape([-1, 3, 5])
+  bounds = poses_arr[:, -2:]
+  fix_rotation = np.array([
+      [0, -1, 0, 0],
+      [1, 0, 0, 0],
+      [0, 0, 1, 0],
+      [0, 0, 0, 1],
+  ], dtype=np.float32)
+  poses = poses[:, :3, :4] @ fix_rotation
+  scale = 1. / (bounds.min() * .75)
+  poses[:, :3, 3] *= scale
+  bounds *= scale
+  poses, transform = recenter_poses(poses)
+  close_depth, inf_depth = bounds.min() * .9, bounds.max() * 5.
+  dt = .75
+  focal = 1 / (((1 - dt) / close_depth + dt / inf_depth))
+  # Get radii for spiral path using 90th percentile of camera positions.
+  positions = poses[:, :3, 3]
+  radii = np.percentile(np.abs(positions), 90, 0)
+  radii = np.concatenate([radii, [1.]])
+  # Generate poses for spiral path.
+  render_poses = []
+  cam2world = poses_avg(poses)
+  up = poses[:, :3, 1].mean(0)
+  for theta in np.linspace(0., 2. * np.pi * n_rots, n_frames, endpoint=False):
+    t = radii * [np.cos(theta), -np.sin(theta), -np.sin(theta * zrate), 1.]
+    position = cam2world @ t
+    lookat = cam2world @ [0, 0, -focal, 1.]
+    z_axis = position - lookat
+    render_pose = np.eye(4)
+    render_pose[:3] = viewmatrix(z_axis, up, position)
+    render_pose = np.linalg.inv(transform) @ render_pose
+    render_pose[:3, 1:3] *= -1
+    render_pose[:3, 3] /= scale
+    render_poses.append(np.linalg.inv(render_pose))
+  render_poses = np.stack(render_poses, axis=0)
+  return render_poses
+def generate_interpolated_path(
+    views,
+    n_interp,
+    spline_degree = 5,
+    smoothness = 0.03,
+    rot_weight = 0.1,
+    lock_up = False,
+    fixed_up_vector = None,
+    lookahead_i = None,
+    frames_per_colmap = None,
+    const_speed = False,
+    n_buffer = None,
+    periodic = False,
+    n_interp_as_total = False,
+):
+  """Creates a smooth spline path between input keyframe camera poses.
+  Spline is calculated with poses in format (position, lookat-point, up-point).
+  Args:
+    poses: (n, 3, 4) array of input pose keyframes.
+    n_interp: returned path will have n_interp * (n - 1) total poses.
+    spline_degree: polynomial degree of B-spline.
+    smoothness: parameter for spline smoothing, 0 forces exact interpolation.
+    rot_weight: relative weighting of rotation/translation in spline solve.
+    lock_up: if True, forced to use given Up and allow Lookat to vary.
+    fixed_up_vector: replace the interpolated `up` with a fixed vector.
+    lookahead_i: force the look direction to look at the pose `i` frames ahead.
+    frames_per_colmap: conversion factor for the desired average velocity.
+    const_speed: renormalize spline to have constant delta between each pose.
+    n_buffer: Number of buffer frames to insert at the start and end of the
+      path. Helps keep the ends of a spline path straight.
+    periodic: make the spline path periodic (perfect loop).
+    n_interp_as_total: use n_interp as total number of poses in path rather than
+      the number of poses to interpolate between each input.
+  Returns:
+    Array of new camera poses with shape (n_interp * (n - 1), 3, 4), or
+    (n_interp, 3, 4) if n_interp_as_total is set.
+  """
+  poses = []
+  for view in views:
+    tmp_view = np.eye(4)
+    tmp_view[:3] = np.concatenate([view.R.T, view.T[:, None]], 1)
+    tmp_view = np.linalg.inv(tmp_view)
+    tmp_view[:, 1:3] *= -1
+    poses.append(tmp_view)
+  poses = np.stack(poses, 0)
+  def poses_to_points(poses, dist):
+    """Converts from pose matrices to (position, lookat, up) format."""
+    pos = poses[:, :3, -1]
+    lookat = poses[:, :3, -1] - dist * poses[:, :3, 2]
+    up = poses[:, :3, -1] + dist * poses[:, :3, 1]
+    return np.stack([pos, lookat, up], 1)
+  def points_to_poses(points):
+    """Converts from (position, lookat, up) format to pose matrices."""
+    poses = []
+    for i in range(len(points)):
+      pos, lookat_point, up_point = points[i]
+      if lookahead_i is not None:
+        if i + lookahead_i < len(points):
+          lookat = pos - points[i + lookahead_i][0]
+      else:
+        lookat = pos - lookat_point
+      up = (up_point - pos) if fixed_up_vector is None else fixed_up_vector
+      poses.append(viewmatrix(lookat, up, pos))
+    return np.array(poses)
+  def insert_buffer_poses(poses, n_buffer):
+    """Insert extra poses at the start and end of the path."""
+    def average_distance(points):
+      distances = np.linalg.norm(points[1:] - points[0:-1], axis=-1)
+      return np.mean(distances)
+    def shift(pose, dz):
+      result = np.copy(pose)
+      z = result[:3, 2]
+      z /= np.linalg.norm(z)
+      # Move along forward-backward axis. -z is forward.
+      result[:3, 3] += z * dz
+      return result
+    dz = average_distance(poses[:, :3, 3])
+    prefix = np.stack([shift(poses[0], (i + 1) * dz) for i in range(n_buffer)])
+    prefix = prefix[::-1]  # reverse order
+    suffix = np.stack(
+        [shift(poses[-1], -(i + 1) * dz) for i in range(n_buffer)]
+    )
+    result = np.concatenate([prefix, poses, suffix])
+    return result
+  def remove_buffer_poses(poses, u, n_frames, u_keyframes, n_buffer):
+    u_keyframes = u_keyframes[n_buffer:-n_buffer]
+    mask = (u >= u_keyframes[0]) & (u <= u_keyframes[-1])
+    poses = poses[mask]
+    u = u[mask]
+    n_frames = len(poses)
+    return poses, u, n_frames, u_keyframes
+  def interp(points, u, k, s):
+    """Runs multidimensional B-spline interpolation on the input points."""
+    sh = points.shape
+    pts = np.reshape(points, (sh[0], -1))
+    k = min(k, sh[0] - 1)
+    tck, u_keyframes = scipy.interpolate.splprep(pts.T, k=k, s=s, per=periodic)
+    new_points = np.array(scipy.interpolate.splev(u, tck))
+    new_points = np.reshape(new_points.T, (len(u), sh[1], sh[2]))
+    return new_points, u_keyframes
+  if n_buffer is not None:
+    poses = insert_buffer_poses(poses, n_buffer)
+  points = poses_to_points(poses, dist=rot_weight)
+  if n_interp_as_total:
+    n_frames = n_interp + 1  # Add extra since final pose is discarded.
+  else:
+    n_frames = n_interp * (points.shape[0] - 1)
+  u = np.linspace(0, 1, n_frames, endpoint=True)
+  new_points, u_keyframes = interp(points, u=u, k=spline_degree, s=smoothness)
+  poses = points_to_poses(new_points)
+  if n_buffer is not None:
+    poses, u, n_frames, u_keyframes = remove_buffer_poses(
+        poses, u, n_frames, u_keyframes, n_buffer
+    )
+    # poses, transform = transform_poses_pca(poses)
+  if frames_per_colmap is not None:
+    # Recalculate the number of frames to achieve desired average velocity.
+    positions = poses[:, :3, -1]
+    lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1)
+    total_length_colmap = lengths.sum()
+    print('old n_frames:', n_frames)
+    print('total_length_colmap:', total_length_colmap)
+    n_frames = int(total_length_colmap * frames_per_colmap)
+    print('new n_frames:', n_frames)
+    u = np.linspace(
+        np.min(u_keyframes), np.max(u_keyframes), n_frames, endpoint=True
+    )
+    new_points, _ = interp(points, u=u, k=spline_degree, s=smoothness)
+    poses = points_to_poses(new_points)
+  if const_speed:
+    # Resample timesteps so that the velocity is nearly constant.
+    positions = poses[:, :3, -1]
+    lengths = np.linalg.norm(positions[1:] - positions[:-1], axis=-1)
+    u = sample(None, u, np.log(lengths), n_frames + 1)
+    new_points, _ = interp(points, u=u, k=spline_degree, s=smoothness)
+    poses = points_to_poses(new_points)
+#   return poses[:-1], u[:-1], u_keyframes
+  return poses[:-1]

utils/sh_utils.py ADDED Viewed

	@@ -0,0 +1,118 @@

+#  Copyright 2021 The PlenOctree Authors.
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#  this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright notice,
+#  this list of conditions and the following disclaimer in the documentation
+#  and/or other materials provided with the distribution.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+#  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+#  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+#  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+#  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+#  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+#  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+#  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+#  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+#  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+#  POSSIBILITY OF SUCH DAMAGE.
+import torch
+C0 = 0.28209479177387814
+C1 = 0.4886025119029199
+C2 = [
+    1.0925484305920792,
+    -1.0925484305920792,
+    0.31539156525252005,
+    -1.0925484305920792,
+    0.5462742152960396
+]
+C3 = [
+    -0.5900435899266435,
+    2.890611442640554,
+    -0.4570457994644658,
+    0.3731763325901154,
+    -0.4570457994644658,
+    1.445305721320277,
+    -0.5900435899266435
+]
+C4 = [
+    2.5033429417967046,
+    -1.7701307697799304,
+    0.9461746957575601,
+    -0.6690465435572892,
+    0.10578554691520431,
+    -0.6690465435572892,
+    0.47308734787878004,
+    -1.7701307697799304,
+    0.6258357354491761,
+]
+def eval_sh(deg, sh, dirs):
+    """
+    Evaluate spherical harmonics at unit directions
+    using hardcoded SH polynomials.
+    Works with torch/np/jnp.
+    ... Can be 0 or more batch dimensions.
+    Args:
+        deg: int SH deg. Currently, 0-3 supported
+        sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2]
+        dirs: jnp.ndarray unit directions [..., 3]
+    Returns:
+        [..., C]
+    """
+    assert deg <= 4 and deg >= 0
+    coeff = (deg + 1) ** 2
+    assert sh.shape[-1] >= coeff
+    result = C0 * sh[..., 0]
+    if deg > 0:
+        x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3]
+        result = (result -
+                C1 * y * sh[..., 1] +
+                C1 * z * sh[..., 2] -
+                C1 * x * sh[..., 3])
+        if deg > 1:
+            xx, yy, zz = x * x, y * y, z * z
+            xy, yz, xz = x * y, y * z, x * z
+            result = (result +
+                    C2[0] * xy * sh[..., 4] +
+                    C2[1] * yz * sh[..., 5] +
+                    C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] +
+                    C2[3] * xz * sh[..., 7] +
+                    C2[4] * (xx - yy) * sh[..., 8])
+            if deg > 2:
+                result = (result +
+                C3[0] * y * (3 * xx - yy) * sh[..., 9] +
+                C3[1] * xy * z * sh[..., 10] +
+                C3[2] * y * (4 * zz - xx - yy)* sh[..., 11] +
+                C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] +
+                C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] +
+                C3[5] * z * (xx - yy) * sh[..., 14] +
+                C3[6] * x * (xx - 3 * yy) * sh[..., 15])
+                if deg > 3:
+                    result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] +
+                            C4[1] * yz * (3 * xx - yy) * sh[..., 17] +
+                            C4[2] * xy * (7 * zz - 1) * sh[..., 18] +
+                            C4[3] * yz * (7 * zz - 3) * sh[..., 19] +
+                            C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] +
+                            C4[5] * xz * (7 * zz - 3) * sh[..., 21] +
+                            C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] +
+                            C4[7] * xz * (xx - 3 * yy) * sh[..., 23] +
+                            C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24])
+    return result
+def RGB2SH(rgb):
+    return (rgb - 0.5) / C0
+def SH2RGB(sh):
+    return sh * C0 + 0.5

utils/stepfun.py ADDED Viewed

	@@ -0,0 +1,403 @@

+# from internal import math
+import numpy as np
+import torch
+def searchsorted(a, v):
+    """Find indices where v should be inserted into a to maintain order.
+  Args:
+    a: tensor, the sorted reference points that we are scanning to see where v
+      should lie.
+    v: tensor, the query points that we are pretending to insert into a. Does
+      not need to be sorted. All but the last dimensions should match or expand
+      to those of a, the last dimension can differ.
+  Returns:
+    (idx_lo, idx_hi), where a[idx_lo] <= v < a[idx_hi], unless v is out of the
+    range [a[0], a[-1]] in which case idx_lo and idx_hi are both the first or
+    last index of a.
+  """
+    i = torch.arange(a.shape[-1], device=a.device)
+    v_ge_a = v[..., None, :] >= a[..., :, None]
+    idx_lo = torch.max(torch.where(v_ge_a, i[..., :, None], i[..., :1, None]), -2).values
+    idx_hi = torch.min(torch.where(~v_ge_a, i[..., :, None], i[..., -1:, None]), -2).values
+    return idx_lo, idx_hi
+def query(tq, t, y, outside_value=0):
+    """Look up the values of the step function (t, y) at locations tq."""
+    idx_lo, idx_hi = searchsorted(t, tq)
+    yq = torch.where(idx_lo == idx_hi, torch.full_like(idx_hi, outside_value),
+                     torch.take_along_dim(y, idx_lo, dim=-1))
+    return yq
+def inner_outer(t0, t1, y1):
+    """Construct inner and outer measures on (t1, y1) for t0."""
+    cy1 = torch.cat([torch.zeros_like(y1[..., :1]),
+                     torch.cumsum(y1, dim=-1)],
+                    dim=-1)
+    idx_lo, idx_hi = searchsorted(t1, t0)
+    cy1_lo = torch.take_along_dim(cy1, idx_lo, dim=-1)
+    cy1_hi = torch.take_along_dim(cy1, idx_hi, dim=-1)
+    y0_outer = cy1_hi[..., 1:] - cy1_lo[..., :-1]
+    y0_inner = torch.where(idx_hi[..., :-1] <= idx_lo[..., 1:],
+                           cy1_lo[..., 1:] - cy1_hi[..., :-1], torch.zeros_like(idx_lo[..., 1:]))
+    return y0_inner, y0_outer
+def lossfun_outer(t, w, t_env, w_env):
+    """The proposal weight should be an upper envelope on the nerf weight."""
+    eps = torch.finfo(t.dtype).eps
+    # eps = 1e-3
+    _, w_outer = inner_outer(t, t_env, w_env)
+    # We assume w_inner <= w <= w_outer. We don't penalize w_inner because it's
+    # more effective to pull w_outer up than it is to push w_inner down.
+    # Scaled half-quadratic loss that gives a constant gradient at w_outer = 0.
+    return (w - w_outer).clamp_min(0) ** 2 / (w + eps)
+def weight_to_pdf(t, w):
+    """Turn a vector of weights that sums to 1 into a PDF that integrates to 1."""
+    eps = torch.finfo(t.dtype).eps
+    return w / (t[..., 1:] - t[..., :-1]).clamp_min(eps)
+def pdf_to_weight(t, p):
+    """Turn a PDF that integrates to 1 into a vector of weights that sums to 1."""
+    return p * (t[..., 1:] - t[..., :-1])
+def max_dilate(t, w, dilation, domain=(-torch.inf, torch.inf)):
+    """Dilate (via max-pooling) a non-negative step function."""
+    t0 = t[..., :-1] - dilation
+    t1 = t[..., 1:] + dilation
+    t_dilate, _ = torch.sort(torch.cat([t, t0, t1], dim=-1), dim=-1)
+    t_dilate = torch.clip(t_dilate, *domain)
+    w_dilate = torch.max(
+        torch.where(
+            (t0[..., None, :] <= t_dilate[..., None])
+            & (t1[..., None, :] > t_dilate[..., None]),
+            w[..., None, :],
+            torch.zeros_like(w[..., None, :]),
+        ), dim=-1).values[..., :-1]
+    return t_dilate, w_dilate
+def max_dilate_weights(t,
+                       w,
+                       dilation,
+                       domain=(-torch.inf, torch.inf),
+                       renormalize=False):
+    """Dilate (via max-pooling) a set of weights."""
+    eps = torch.finfo(w.dtype).eps
+    # eps = 1e-3
+    p = weight_to_pdf(t, w)
+    t_dilate, p_dilate = max_dilate(t, p, dilation, domain=domain)
+    w_dilate = pdf_to_weight(t_dilate, p_dilate)
+    if renormalize:
+        w_dilate /= torch.sum(w_dilate, dim=-1, keepdim=True).clamp_min(eps)
+    return t_dilate, w_dilate
+def integrate_weights(w):
+    """Compute the cumulative sum of w, assuming all weight vectors sum to 1.
+  The output's size on the last dimension is one greater than that of the input,
+  because we're computing the integral corresponding to the endpoints of a step
+  function, not the integral of the interior/bin values.
+  Args:
+    w: Tensor, which will be integrated along the last axis. This is assumed to
+      sum to 1 along the last axis, and this function will (silently) break if
+      that is not the case.
+  Returns:
+    cw0: Tensor, the integral of w, where cw0[..., 0] = 0 and cw0[..., -1] = 1
+  """
+    cw = torch.cumsum(w[..., :-1], dim=-1).clamp_max(1)
+    shape = cw.shape[:-1] + (1,)
+    # Ensure that the CDF starts with exactly 0 and ends with exactly 1.
+    cw0 = torch.cat([torch.zeros(shape, device=cw.device), cw,
+                     torch.ones(shape, device=cw.device)], dim=-1)
+    return cw0
+def integrate_weights_np(w):
+    """Compute the cumulative sum of w, assuming all weight vectors sum to 1.
+  The output's size on the last dimension is one greater than that of the input,
+  because we're computing the integral corresponding to the endpoints of a step
+  function, not the integral of the interior/bin values.
+  Args:
+    w: Tensor, which will be integrated along the last axis. This is assumed to
+      sum to 1 along the last axis, and this function will (silently) break if
+      that is not the case.
+  Returns:
+    cw0: Tensor, the integral of w, where cw0[..., 0] = 0 and cw0[..., -1] = 1
+  """
+    cw = np.minimum(1, np.cumsum(w[..., :-1], axis=-1))
+    shape = cw.shape[:-1] + (1,)
+    # Ensure that the CDF starts with exactly 0 and ends with exactly 1.
+    cw0 = np.concatenate([np.zeros(shape), cw,
+                          np.ones(shape)], axis=-1)
+    return cw0
+def invert_cdf(u, t, w_logits):
+    """Invert the CDF defined by (t, w) at the points specified by u in [0, 1)."""
+    # Compute the PDF and CDF for each weight vector.
+    w = torch.softmax(w_logits, dim=-1)
+    cw = integrate_weights(w)
+    # Interpolate into the inverse CDF.
+    t_new = math.sorted_interp(u, cw, t)
+    return t_new
+def invert_cdf_np(u, t, w_logits):
+    """Invert the CDF defined by (t, w) at the points specified by u in [0, 1)."""
+    # Compute the PDF and CDF for each weight vector.
+    w = np.exp(w_logits) / np.exp(w_logits).sum(axis=-1, keepdims=True)
+    cw = integrate_weights_np(w)
+    # Interpolate into the inverse CDF.
+    interp_fn = np.interp
+    t_new = interp_fn(u, cw, t)
+    return t_new
+def sample(rand,
+           t,
+           w_logits,
+           num_samples,
+           single_jitter=False,
+           deterministic_center=False):
+    """Piecewise-Constant PDF sampling from a step function.
+  Args:
+    rand: random number generator (or None for `linspace` sampling).
+    t: [..., num_bins + 1], bin endpoint coordinates (must be sorted)
+    w_logits: [..., num_bins], logits corresponding to bin weights
+    num_samples: int, the number of samples.
+    single_jitter: bool, if True, jitter every sample along each ray by the same
+      amount in the inverse CDF. Otherwise, jitter each sample independently.
+    deterministic_center: bool, if False, when `rand` is None return samples that
+      linspace the entire PDF. If True, skip the front and back of the linspace
+      so that the centers of each PDF interval are returned.
+  Returns:
+    t_samples: [batch_size, num_samples].
+  """
+    eps = torch.finfo(t.dtype).eps
+    # eps = 1e-3
+    device = t.device
+    # Draw uniform samples.
+    if not rand:
+        if deterministic_center:
+            pad = 1 / (2 * num_samples)
+            u = torch.linspace(pad, 1. - pad - eps, num_samples, device=device)
+        else:
+            u = torch.linspace(0, 1. - eps, num_samples, device=device)
+        u = torch.broadcast_to(u, t.shape[:-1] + (num_samples,))
+    else:
+        # `u` is in [0, 1) --- it can be zero, but it can never be 1.
+        u_max = eps + (1 - eps) / num_samples
+        max_jitter = (1 - u_max) / (num_samples - 1) - eps
+        d = 1 if single_jitter else num_samples
+        u = torch.linspace(0, 1 - u_max, num_samples, device=device) + \
+            torch.rand(t.shape[:-1] + (d,), device=device) * max_jitter
+    return invert_cdf(u, t, w_logits)
+def sample_np(rand,
+              t,
+              w_logits,
+              num_samples,
+              single_jitter=False,
+              deterministic_center=False):
+    """
+    numpy version of sample()
+  """
+    eps = np.finfo(np.float32).eps
+    # Draw uniform samples.
+    if not rand:
+        if deterministic_center:
+            pad = 1 / (2 * num_samples)
+            u = np.linspace(pad, 1. - pad - eps, num_samples)
+        else:
+            u = np.linspace(0, 1. - eps, num_samples)
+        u = np.broadcast_to(u, t.shape[:-1] + (num_samples,))
+    else:
+        # `u` is in [0, 1) --- it can be zero, but it can never be 1.
+        u_max = eps + (1 - eps) / num_samples
+        max_jitter = (1 - u_max) / (num_samples - 1) - eps
+        d = 1 if single_jitter else num_samples
+        u = np.linspace(0, 1 - u_max, num_samples) + \
+            np.random.rand(*t.shape[:-1], d) * max_jitter
+    return invert_cdf_np(u, t, w_logits)
+def sample_intervals(rand,
+                     t,
+                     w_logits,
+                     num_samples,
+                     single_jitter=False,
+                     domain=(-torch.inf, torch.inf)):
+    """Sample *intervals* (rather than points) from a step function.
+  Args:
+    rand: random number generator (or None for `linspace` sampling).
+    t: [..., num_bins + 1], bin endpoint coordinates (must be sorted)
+    w_logits: [..., num_bins], logits corresponding to bin weights
+    num_samples: int, the number of intervals to sample.
+    single_jitter: bool, if True, jitter every sample along each ray by the same
+      amount in the inverse CDF. Otherwise, jitter each sample independently.
+    domain: (minval, maxval), the range of valid values for `t`.
+  Returns:
+    t_samples: [batch_size, num_samples].
+  """
+    if num_samples <= 1:
+        raise ValueError(f'num_samples must be > 1, is {num_samples}.')
+    # Sample a set of points from the step function.
+    centers = sample(
+        rand,
+        t,
+        w_logits,
+        num_samples,
+        single_jitter,
+        deterministic_center=True)
+    # The intervals we return will span the midpoints of each adjacent sample.
+    mid = (centers[..., 1:] + centers[..., :-1]) / 2
+    # Each first/last fencepost is the reflection of the first/last midpoint
+    # around the first/last sampled center. We clamp to the limits of the input
+    # domain, provided by the caller.
+    minval, maxval = domain
+    first = (2 * centers[..., :1] - mid[..., :1]).clamp_min(minval)
+    last = (2 * centers[..., -1:] - mid[..., -1:]).clamp_max(maxval)
+    t_samples = torch.cat([first, mid, last], dim=-1)
+    return t_samples
+def lossfun_distortion(t, w):
+    """Compute iint w[i] w[j] |t[i] - t[j]| di dj."""
+    # The loss incurred between all pairs of intervals.
+    ut = (t[..., 1:] + t[..., :-1]) / 2
+    dut = torch.abs(ut[..., :, None] - ut[..., None, :])
+    loss_inter = torch.sum(w * torch.sum(w[..., None, :] * dut, dim=-1), dim=-1)
+    # The loss incurred within each individual interval with itself.
+    loss_intra = torch.sum(w ** 2 * (t[..., 1:] - t[..., :-1]), dim=-1) / 3
+    return loss_inter + loss_intra
+def interval_distortion(t0_lo, t0_hi, t1_lo, t1_hi):
+    """Compute mean(abs(x-y); x in [t0_lo, t0_hi], y in [t1_lo, t1_hi])."""
+    # Distortion when the intervals do not overlap.
+    d_disjoint = torch.abs((t1_lo + t1_hi) / 2 - (t0_lo + t0_hi) / 2)
+    # Distortion when the intervals overlap.
+    d_overlap = (2 *
+                 (torch.minimum(t0_hi, t1_hi) ** 3 - torch.maximum(t0_lo, t1_lo) ** 3) +
+                 3 * (t1_hi * t0_hi * torch.abs(t1_hi - t0_hi) +
+                      t1_lo * t0_lo * torch.abs(t1_lo - t0_lo) + t1_hi * t0_lo *
+                      (t0_lo - t1_hi) + t1_lo * t0_hi *
+                      (t1_lo - t0_hi))) / (6 * (t0_hi - t0_lo) * (t1_hi - t1_lo))
+    # Are the two intervals not overlapping?
+    are_disjoint = (t0_lo > t1_hi) | (t1_lo > t0_hi)
+    return torch.where(are_disjoint, d_disjoint, d_overlap)
+def weighted_percentile(t, w, ps):
+    """Compute the weighted percentiles of a step function. w's must sum to 1."""
+    cw = integrate_weights(w)
+    # We want to interpolate into the integrated weights according to `ps`.
+    fn = lambda cw_i, t_i: math.sorted_interp(torch.tensor(ps, device=t.device) / 100, cw_i, t_i)
+    # Vmap fn to an arbitrary number of leading dimensions.
+    cw_mat = cw.reshape([-1, cw.shape[-1]])
+    t_mat = t.reshape([-1, t.shape[-1]])
+    wprctile_mat = fn(cw_mat, t_mat)  # TODO
+    wprctile = wprctile_mat.reshape(cw.shape[:-1] + (len(ps),))
+    return wprctile
+def resample(t, tp, vp, use_avg=False):
+    """Resample a step function defined by (tp, vp) into intervals t.
+  Args:
+    t: tensor with shape (..., n+1), the endpoints to resample into.
+    tp: tensor with shape (..., m+1), the endpoints of the step function being
+      resampled.
+    vp: tensor with shape (..., m), the values of the step function being
+      resampled.
+    use_avg: bool, if False, return the sum of the step function for each
+      interval in `t`. If True, return the average, weighted by the width of
+      each interval in `t`.
+    eps: float, a small value to prevent division by zero when use_avg=True.
+  Returns:
+    v: tensor with shape (..., n), the values of the resampled step function.
+  """
+    eps = torch.finfo(t.dtype).eps
+    # eps = 1e-3
+    if use_avg:
+        wp = torch.diff(tp, dim=-1)
+        v_numer = resample(t, tp, vp * wp, use_avg=False)
+        v_denom = resample(t, tp, wp, use_avg=False)
+        v = v_numer / v_denom.clamp_min(eps)
+        return v
+    acc = torch.cumsum(vp, dim=-1)
+    acc0 = torch.cat([torch.zeros(acc.shape[:-1] + (1,), device=acc.device), acc], dim=-1)
+    acc0_resampled = math.sorted_interp(t, tp, acc0)  # TODO
+    v = torch.diff(acc0_resampled, dim=-1)
+    return v
+def resample_np(t, tp, vp, use_avg=False):
+    """
+    numpy version of resample
+  """
+    eps = np.finfo(t.dtype).eps
+    if use_avg:
+        wp = np.diff(tp, axis=-1)
+        v_numer = resample_np(t, tp, vp * wp, use_avg=False)
+        v_denom = resample_np(t, tp, wp, use_avg=False)
+        v = v_numer / np.maximum(eps, v_denom)
+        return v
+    acc = np.cumsum(vp, axis=-1)
+    acc0 = np.concatenate([np.zeros(acc.shape[:-1] + (1,)), acc], axis=-1)
+    acc0_resampled = np.vectorize(np.interp, signature='(n),(m),(m)->(n)')(t, tp, acc0)
+    v = np.diff(acc0_resampled, axis=-1)
+    return v
+def blur_stepfun(x, y, r):
+    xr, xr_idx = torch.sort(torch.cat([x - r, x + r], dim=-1))
+    y1 = (torch.cat([y, torch.zeros_like(y[..., :1])], dim=-1) -
+          torch.cat([torch.zeros_like(y[..., :1]), y], dim=-1)) / (2 * r)
+    y2 = torch.cat([y1, -y1], dim=-1).take_along_dim(xr_idx[..., :-1], dim=-1)
+    yr = torch.cumsum((xr[..., 1:] - xr[..., :-1]) *
+                      torch.cumsum(y2, dim=-1), dim=-1).clamp_min(0)
+    yr = torch.cat([torch.zeros_like(yr[..., :1]), yr], dim=-1)
+    return xr, yr

utils/system_utils.py ADDED Viewed

	@@ -0,0 +1,28 @@

+#
+# Copyright (C) 2023, Inria
+# GRAPHDECO research group, https://team.inria.fr/graphdeco
+# All rights reserved.
+#
+# This software is free for non-commercial, research and evaluation use
+# under the terms of the LICENSE.md file.
+#
+# For inquiries contact  [email protected]
+#
+from errno import EEXIST
+from os import makedirs, path
+import os
+def mkdir_p(folder_path):
+    # Creates a directory. equivalent to using mkdir -p on the command line
+    try:
+        makedirs(folder_path)
+    except OSError as exc: # Python >2.5
+        if exc.errno == EEXIST and path.isdir(folder_path):
+            pass
+        else:
+            raise
+def searchForMaxIteration(folder):
+    saved_iters = [int(fname.split("_")[-1]) for fname in os.listdir(folder)]
+    return max(saved_iters)

utils/trajectories.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import numpy as np
+import roma
+import torch
+import torch.nn.functional as F
+def rt_to_mat4(
+    R: torch.Tensor, t: torch.Tensor, s: torch.Tensor | None = None
+) -> torch.Tensor:
+    """
+    Args:
+        R (torch.Tensor): (..., 3, 3).
+        t (torch.Tensor): (..., 3).
+        s (torch.Tensor): (...,).
+    Returns:
+        torch.Tensor: (..., 4, 4)
+    """
+    mat34 = torch.cat([R, t[..., None]], dim=-1)
+    if s is None:
+        bottom = (
+            mat34.new_tensor([[0.0, 0.0, 0.0, 1.0]])
+            .reshape((1,) * (mat34.dim() - 2) + (1, 4))
+            .expand(mat34.shape[:-2] + (1, 4))
+        )
+    else:
+        bottom = F.pad(1.0 / s[..., None, None], (3, 0), value=0.0)
+    mat4 = torch.cat([mat34, bottom], dim=-2)
+    return mat4
+def get_avg_w2c(w2cs: torch.Tensor):
+    c2ws = torch.linalg.inv(w2cs)
+    # 1. Compute the center
+    center = c2ws[:, :3, -1].mean(0)
+    # 2. Compute the z axis
+    z = F.normalize(c2ws[:, :3, 2].mean(0), dim=-1)
+    # 3. Compute axis y' (no need to normalize as it's not the final output)
+    y_ = c2ws[:, :3, 1].mean(0)  # (3)
+    # 4. Compute the x axis
+    x = F.normalize(torch.cross(y_, z, dim=-1), dim=-1)  # (3)
+    # 5. Compute the y axis (as z and x are normalized, y is already of norm 1)
+    y = torch.cross(z, x, dim=-1)  # (3)
+    avg_c2w = rt_to_mat4(torch.stack([x, y, z], 1), center)
+    avg_w2c = torch.linalg.inv(avg_c2w)
+    return avg_w2c
+# def get_lookat(origins: torch.Tensor, viewdirs: torch.Tensor) -> torch.Tensor:
+#     """Calculate the intersection point of multiple camera rays as the lookat point.
+#     Use the center of camera positions as a reference point for the lookat,
+#     then move forward along the average view direction by a certain distance.
+#     """
+#     # Calculate the center of camera positions
+#     center = origins.mean(dim=0)
+#     # Calculate average view direction
+#     mean_dir = F.normalize(viewdirs.mean(dim=0), dim=-1)
+#     # Calculate average distance to the center point
+#     avg_dist = torch.norm(origins - center, dim=-1).mean()
+#     # Move forward along the average view direction
+#     lookat = center + mean_dir * avg_dist
+#     return lookat
+def get_lookat(origins: torch.Tensor, viewdirs: torch.Tensor) -> torch.Tensor:
+    """Triangulate a set of rays to find a single lookat point.
+    Args:
+        origins (torch.Tensor): A (N, 3) array of ray origins.
+        viewdirs (torch.Tensor): A (N, 3) array of ray view directions.
+    Returns:
+        torch.Tensor: A (3,) lookat point.
+    """
+    viewdirs = torch.nn.functional.normalize(viewdirs, dim=-1)
+    eye = torch.eye(3, device=origins.device, dtype=origins.dtype)[None]
+    # Calculate projection matrix I - rr^T
+    I_min_cov = eye - (viewdirs[..., None] * viewdirs[..., None, :])
+    # Compute sum of projections
+    sum_proj = I_min_cov.matmul(origins[..., None]).sum(dim=-3)
+    # Solve for the intersection point using least squares
+    lookat = torch.linalg.lstsq(I_min_cov.sum(dim=-3), sum_proj).solution[..., 0]
+    # Check NaNs.
+    assert not torch.any(torch.isnan(lookat))
+    return lookat
+def get_lookat_w2cs(positions: torch.Tensor, lookat: torch.Tensor, up: torch.Tensor):
+    """
+    Args:
+        positions: (N, 3) tensor of camera positions
+        lookat: (3,) tensor of lookat point
+        up: (3,) tensor of up vector
+    Returns:
+        w2cs: (N, 3, 3) tensor of world to camera rotation matrices
+    """
+    forward_vectors = F.normalize(lookat - positions, dim=-1)
+    right_vectors = F.normalize(torch.cross(forward_vectors, up[None], dim=-1), dim=-1)
+    down_vectors = F.normalize(
+        torch.cross(forward_vectors, right_vectors, dim=-1), dim=-1
+    )
+    Rs = torch.stack([right_vectors, down_vectors, forward_vectors], dim=-1)
+    w2cs = torch.linalg.inv(rt_to_mat4(Rs, positions))
+    return w2cs
+def get_arc_w2cs(
+    ref_w2c: torch.Tensor,
+    lookat: torch.Tensor,
+    up: torch.Tensor,
+    num_frames: int,
+    degree: float,
+    **_,
+) -> torch.Tensor:
+    ref_position = torch.linalg.inv(ref_w2c)[:3, 3]
+    thetas = (
+        torch.sin(
+            torch.linspace(0.0, torch.pi * 2.0, num_frames + 1, device=ref_w2c.device)[
+                :-1
+            ]
+        )
+        * (degree / 2.0)
+        / 180.0
+        * torch.pi
+    )
+    positions = torch.einsum(
+        "nij,j->ni",
+        roma.rotvec_to_rotmat(thetas[:, None] * up[None]),
+        ref_position - lookat,
+    )
+    return get_lookat_w2cs(positions, lookat, up)
+def get_lemniscate_w2cs(
+    ref_w2c: torch.Tensor,
+    lookat: torch.Tensor,
+    up: torch.Tensor,
+    num_frames: int,
+    degree: float,
+    **_,
+) -> torch.Tensor:
+    ref_c2w = torch.linalg.inv(ref_w2c)
+    a = torch.linalg.norm(ref_c2w[:3, 3] - lookat) * np.tan(degree / 360 * np.pi)
+    # Lemniscate curve in camera space. Starting at the origin.
+    thetas = (
+        torch.linspace(0, 2 * torch.pi, num_frames + 1, device=ref_w2c.device)[:-1]
+        + torch.pi / 2
+    )
+    positions = torch.stack(
+        [
+            a * torch.cos(thetas) / (1 + torch.sin(thetas) ** 2),
+            a * torch.cos(thetas) * torch.sin(thetas) / (1 + torch.sin(thetas) ** 2),
+            torch.zeros(num_frames, device=ref_w2c.device),
+        ],
+        dim=-1,
+    )
+    # Transform to world space.
+    positions = torch.einsum(
+        "ij,nj->ni", ref_c2w[:3], F.pad(positions, (0, 1), value=1.0)
+    )
+    return get_lookat_w2cs(positions, lookat, up)
+def get_spiral_w2cs(
+    ref_w2c: torch.Tensor,
+    lookat: torch.Tensor,
+    up: torch.Tensor,
+    num_frames: int,
+    rads: float | torch.Tensor,
+    zrate: float,
+    rots: int,
+    **_,
+) -> torch.Tensor:
+    ref_c2w = torch.linalg.inv(ref_w2c)
+    thetas = torch.linspace(
+        0, 2 * torch.pi * rots, num_frames + 1, device=ref_w2c.device
+    )[:-1]
+    # Spiral curve in camera space. Starting at the origin.
+    if isinstance(rads, torch.Tensor):
+        rads = rads.reshape(-1, 3).to(ref_w2c.device)
+    positions = (
+        torch.stack(
+            [
+                torch.cos(thetas),
+                -torch.sin(thetas),
+                -torch.sin(thetas * zrate),
+            ],
+            dim=-1,
+        )
+        * rads
+    )
+    # Transform to world space.
+    positions = torch.einsum(
+        "ij,nj->ni", ref_c2w[:3], F.pad(positions, (0, 1), value=1.0)
+    )
+    return get_lookat_w2cs(positions, lookat, up)
+def get_wander_w2cs(ref_w2c, focal_length, num_frames, max_disp, **_):
+    device = ref_w2c.device
+    c2w = np.linalg.inv(ref_w2c.detach().cpu().numpy())
+    max_disp = max_disp
+    max_trans = max_disp / focal_length
+    output_poses = []
+    for i in range(num_frames):
+        x_trans = max_trans * np.sin(2.0 * np.pi * float(i) / float(num_frames))
+        y_trans = 0.0
+        z_trans = max_trans * np.cos(2.0 * np.pi * float(i) / float(num_frames)) / 2.0
+        i_pose = np.concatenate(
+            [
+                np.concatenate(
+                    [
+                        np.eye(3),
+                        np.array([x_trans, y_trans, z_trans])[:, np.newaxis],
+                    ],
+                    axis=1,
+                ),
+                np.array([0.0, 0.0, 0.0, 1.0])[np.newaxis, :],
+            ],
+            axis=0,
+        )
+        i_pose = np.linalg.inv(i_pose)
+        ref_pose = np.concatenate(
+            [c2w[:3, :4], np.array([0.0, 0.0, 0.0, 1.0])[np.newaxis, :]], axis=0
+        )
+        render_pose = np.dot(ref_pose, i_pose)
+        output_poses.append(render_pose)
+    output_poses = torch.from_numpy(np.array(output_poses, dtype=np.float32)).to(device)
+    w2cs = torch.linalg.inv(output_poses)
+    return w2cs

utils/utils_poses/ATE/align_trajectory.py ADDED Viewed

	@@ -0,0 +1,80 @@

+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+import numpy as np
+import utils.utils_poses.ATE.transformations as tfs
+def get_best_yaw(C):
+    '''
+    maximize trace(Rz(theta) * C)
+    '''
+    assert C.shape == (3, 3)
+    A = C[0, 1] - C[1, 0]
+    B = C[0, 0] + C[1, 1]
+    theta = np.pi / 2 - np.arctan2(B, A)
+    return theta
+def rot_z(theta):
+    R = tfs.rotation_matrix(theta, [0, 0, 1])
+    R = R[0:3, 0:3]
+    return R
+def align_umeyama(model, data, known_scale=False, yaw_only=False):
+    """Implementation of the paper: S. Umeyama, Least-Squares Estimation
+    of Transformation Parameters Between Two Point Patterns,
+    IEEE Trans. Pattern Anal. Mach. Intell., vol. 13, no. 4, 1991.
+    model = s * R * data + t
+    Input:
+    model -- first trajectory (nx3), numpy array type
+    data -- second trajectory (nx3), numpy array type
+    Output:
+    s -- scale factor (scalar)
+    R -- rotation matrix (3x3)
+    t -- translation vector (3x1)
+    t_error -- translational error per point (1xn)
+    """
+    # substract mean
+    mu_M = model.mean(0)
+    mu_D = data.mean(0)
+    model_zerocentered = model - mu_M
+    data_zerocentered = data - mu_D
+    n = np.shape(model)[0]
+    # correlation
+    C = 1.0/n*np.dot(model_zerocentered.transpose(), data_zerocentered)
+    sigma2 = 1.0/n*np.multiply(data_zerocentered, data_zerocentered).sum()
+    U_svd, D_svd, V_svd = np.linalg.linalg.svd(C)
+    D_svd = np.diag(D_svd)
+    V_svd = np.transpose(V_svd)
+    S = np.eye(3)
+    if(np.linalg.det(U_svd)*np.linalg.det(V_svd) < 0):
+        S[2, 2] = -1
+    if yaw_only:
+        rot_C = np.dot(data_zerocentered.transpose(), model_zerocentered)
+        theta = get_best_yaw(rot_C)
+        R = rot_z(theta)
+    else:
+        R = np.dot(U_svd, np.dot(S, np.transpose(V_svd)))
+    if known_scale:
+        s = 1
+    else:
+        s = 1.0/sigma2*np.trace(np.dot(D_svd, S))
+    t = mu_M-s*np.dot(R, mu_D)
+    return s, R, t

utils/utils_poses/ATE/align_utils.py ADDED Viewed

	@@ -0,0 +1,144 @@

+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+import numpy as np
+import utils.utils_poses.ATE.transformations as tfs
+import utils.utils_poses.ATE.align_trajectory as align
+def _getIndices(n_aligned, total_n):
+    if n_aligned == -1:
+        idxs = np.arange(0, total_n)
+    else:
+        assert n_aligned <= total_n and n_aligned >= 1
+        idxs = np.arange(0, n_aligned)
+    return idxs
+def alignPositionYawSingle(p_es, p_gt, q_es, q_gt):
+    '''
+    calcualte the 4DOF transformation: yaw R and translation t so that:
+        gt = R * est + t
+    '''
+    p_es_0, q_es_0 = p_es[0, :], q_es[0, :]
+    p_gt_0, q_gt_0 = p_gt[0, :], q_gt[0, :]
+    g_rot = tfs.quaternion_matrix(q_gt_0)
+    g_rot = g_rot[0:3, 0:3]
+    est_rot = tfs.quaternion_matrix(q_es_0)
+    est_rot = est_rot[0:3, 0:3]
+    C_R = np.dot(est_rot, g_rot.transpose())
+    theta = align.get_best_yaw(C_R)
+    R = align.rot_z(theta)
+    t = p_gt_0 - np.dot(R, p_es_0)
+    return R, t
+def alignPositionYaw(p_es, p_gt, q_es, q_gt, n_aligned=1):
+    if n_aligned == 1:
+        R, t = alignPositionYawSingle(p_es, p_gt, q_es, q_gt)
+        return R, t
+    else:
+        idxs = _getIndices(n_aligned, p_es.shape[0])
+        est_pos = p_es[idxs, 0:3]
+        gt_pos = p_gt[idxs, 0:3]
+        _, R, t = align.align_umeyama(gt_pos, est_pos, known_scale=True,
+                                      yaw_only=True)  # note the order
+        t = np.array(t)
+        t = t.reshape((3, ))
+        R = np.array(R)
+        return R, t
+# align by a SE3 transformation
+def alignSE3Single(p_es, p_gt, q_es, q_gt):
+    '''
+    Calculate SE3 transformation R and t so that:
+        gt = R * est + t
+    Using only the first poses of est and gt
+    '''
+    p_es_0, q_es_0 = p_es[0, :], q_es[0, :]
+    p_gt_0, q_gt_0 = p_gt[0, :], q_gt[0, :]
+    g_rot = tfs.quaternion_matrix(q_gt_0)
+    g_rot = g_rot[0:3, 0:3]
+    est_rot = tfs.quaternion_matrix(q_es_0)
+    est_rot = est_rot[0:3, 0:3]
+    R = np.dot(g_rot, np.transpose(est_rot))
+    t = p_gt_0 - np.dot(R, p_es_0)
+    return R, t
+def alignSE3(p_es, p_gt, q_es, q_gt, n_aligned=-1):
+    '''
+    Calculate SE3 transformation R and t so that:
+        gt = R * est + t
+    '''
+    if n_aligned == 1:
+        R, t = alignSE3Single(p_es, p_gt, q_es, q_gt)
+        return R, t
+    else:
+        idxs = _getIndices(n_aligned, p_es.shape[0])
+        est_pos = p_es[idxs, 0:3]
+        gt_pos = p_gt[idxs, 0:3]
+        s, R, t = align.align_umeyama(gt_pos, est_pos,
+                                      known_scale=True)  # note the order
+        t = np.array(t)
+        t = t.reshape((3, ))
+        R = np.array(R)
+        return R, t
+# align by similarity transformation
+def alignSIM3(p_es, p_gt, q_es, q_gt, n_aligned=-1):
+    '''
+    calculate s, R, t so that:
+        gt = R * s * est + t
+    '''
+    idxs = _getIndices(n_aligned, p_es.shape[0])
+    est_pos = p_es[idxs, 0:3]
+    gt_pos = p_gt[idxs, 0:3]
+    s, R, t = align.align_umeyama(gt_pos, est_pos)  # note the order
+    return s, R, t
+# a general interface
+def alignTrajectory(p_es, p_gt, q_es, q_gt, method, n_aligned=-1):
+    '''
+    calculate s, R, t so that:
+        gt = R * s * est + t
+    method can be: sim3, se3, posyaw, none;
+    n_aligned: -1 means using all the frames
+    '''
+    assert p_es.shape[1] == 3
+    assert p_gt.shape[1] == 3
+    assert q_es.shape[1] == 4
+    assert q_gt.shape[1] == 4
+    s = 1
+    R = None
+    t = None
+    if method == 'sim3':
+        assert n_aligned >= 2 or n_aligned == -1, "sim3 uses at least 2 frames"
+        s, R, t = alignSIM3(p_es, p_gt, q_es, q_gt, n_aligned)
+    elif method == 'se3':
+        R, t = alignSE3(p_es, p_gt, q_es, q_gt, n_aligned)
+    elif method == 'posyaw':
+        R, t = alignPositionYaw(p_es, p_gt, q_es, q_gt, n_aligned)
+    elif method == 'none':
+        R = np.identity(3)
+        t = np.zeros((3, ))
+    else:
+        assert False, 'unknown alignment method'
+    return s, R, t
+if __name__ == '__main__':
+    pass

utils/utils_poses/ATE/compute_trajectory_errors.py ADDED Viewed

	@@ -0,0 +1,89 @@

+#!/usr/bin/env python2
+import os
+import numpy as np
+import utils.utils_poses.ATE.trajectory_utils as tu
+import utils.utils_poses.ATE.transformations as tf
+def compute_relative_error(p_es, q_es, p_gt, q_gt, T_cm, dist, max_dist_diff,
+                           accum_distances=[],
+                           scale=1.0):
+    if len(accum_distances) == 0:
+        accum_distances = tu.get_distance_from_start(p_gt)
+    comparisons = tu.compute_comparison_indices_length(
+        accum_distances, dist, max_dist_diff)
+    n_samples = len(comparisons)
+    print('number of samples = {0} '.format(n_samples))
+    if n_samples < 2:
+        print("Too few samples! Will not compute.")
+        return np.array([]), np.array([]), np.array([]), np.array([]), np.array([]),\
+            np.array([]), np.array([])
+    T_mc = np.linalg.inv(T_cm)
+    errors = []
+    for idx, c in enumerate(comparisons):
+        if not c == -1:
+            T_c1 = tu.get_rigid_body_trafo(q_es[idx, :], p_es[idx, :])
+            T_c2 = tu.get_rigid_body_trafo(q_es[c, :], p_es[c, :])
+            T_c1_c2 = np.dot(np.linalg.inv(T_c1), T_c2)
+            T_c1_c2[:3, 3] *= scale
+            T_m1 = tu.get_rigid_body_trafo(q_gt[idx, :], p_gt[idx, :])
+            T_m2 = tu.get_rigid_body_trafo(q_gt[c, :], p_gt[c, :])
+            T_m1_m2 = np.dot(np.linalg.inv(T_m1), T_m2)
+            T_m1_m2_in_c1 = np.dot(T_cm, np.dot(T_m1_m2, T_mc))
+            T_error_in_c2 = np.dot(np.linalg.inv(T_m1_m2_in_c1), T_c1_c2)
+            T_c2_rot = np.eye(4)
+            T_c2_rot[0:3, 0:3] = T_c2[0:3, 0:3]
+            T_error_in_w = np.dot(T_c2_rot, np.dot(
+                T_error_in_c2, np.linalg.inv(T_c2_rot)))
+            errors.append(T_error_in_w)
+    error_trans_norm = []
+    error_trans_perc = []
+    error_yaw = []
+    error_gravity = []
+    e_rot = []
+    e_rot_deg_per_m = []
+    for e in errors:
+        tn = np.linalg.norm(e[0:3, 3])
+        error_trans_norm.append(tn)
+        error_trans_perc.append(tn / dist * 100)
+        ypr_angles = tf.euler_from_matrix(e, 'rzyx')
+        e_rot.append(tu.compute_angle(e))
+        error_yaw.append(abs(ypr_angles[0])*180.0/np.pi)
+        error_gravity.append(
+            np.sqrt(ypr_angles[1]**2+ypr_angles[2]**2)*180.0/np.pi)
+        e_rot_deg_per_m.append(e_rot[-1] / dist)
+    return errors, np.array(error_trans_norm), np.array(error_trans_perc),\
+        np.array(error_yaw), np.array(error_gravity), np.array(e_rot),\
+        np.array(e_rot_deg_per_m)
+def compute_absolute_error(p_es_aligned, q_es_aligned, p_gt, q_gt):
+    e_trans_vec = (p_gt-p_es_aligned)
+    e_trans = np.sqrt(np.sum(e_trans_vec**2, 1))
+    # orientation error
+    e_rot = np.zeros((len(e_trans,)))
+    e_ypr = np.zeros(np.shape(p_es_aligned))
+    for i in range(np.shape(p_es_aligned)[0]):
+        R_we = tf.matrix_from_quaternion(q_es_aligned[i, :])
+        R_wg = tf.matrix_from_quaternion(q_gt[i, :])
+        e_R = np.dot(R_we, np.linalg.inv(R_wg))
+        e_ypr[i, :] = tf.euler_from_matrix(e_R, 'rzyx')
+        e_rot[i] = np.rad2deg(np.linalg.norm(tf.logmap_so3(e_R[:3, :3])))
+    # scale drift
+    motion_gt = np.diff(p_gt, 0)
+    motion_es = np.diff(p_es_aligned, 0)
+    dist_gt = np.sqrt(np.sum(np.multiply(motion_gt, motion_gt), 1))
+    dist_es = np.sqrt(np.sum(np.multiply(motion_es, motion_es), 1))
+    e_scale_perc = np.abs((np.divide(dist_es, dist_gt)-1.0) * 100)
+    # ate = np.sqrt(np.mean(np.asarray(e_trans) ** 2))
+    return e_trans, e_trans_vec, e_rot, e_ypr, e_scale_perc

utils/utils_poses/ATE/results_writer.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#!/usr/bin/env python2
+import os
+# import yaml
+import numpy as np
+def compute_statistics(data_vec):
+    stats = dict()
+    if len(data_vec) > 0:
+        stats['rmse'] = float(
+            np.sqrt(np.dot(data_vec, data_vec) / len(data_vec)))
+        stats['mean'] = float(np.mean(data_vec))
+        stats['median'] = float(np.median(data_vec))
+        stats['std'] = float(np.std(data_vec))
+        stats['min'] = float(np.min(data_vec))
+        stats['max'] = float(np.max(data_vec))
+        stats['num_samples'] = int(len(data_vec))
+    else:
+        stats['rmse'] = 0
+        stats['mean'] = 0
+        stats['median'] = 0
+        stats['std'] = 0
+        stats['min'] = 0
+        stats['max'] = 0
+        stats['num_samples'] = 0
+    return stats
+# def update_and_save_stats(new_stats, label, yaml_filename):
+#     stats = dict()
+#     if os.path.exists(yaml_filename):
+#         stats = yaml.load(open(yaml_filename, 'r'), Loader=yaml.FullLoader)
+#     stats[label] = new_stats
+#
+#     with open(yaml_filename, 'w') as outfile:
+#         outfile.write(yaml.dump(stats, default_flow_style=False))
+#
+#     return
+#
+#
+# def compute_and_save_statistics(data_vec, label, yaml_filename):
+#     new_stats = compute_statistics(data_vec)
+#     update_and_save_stats(new_stats, label, yaml_filename)
+#
+#     return new_stats
+#
+#
+# def write_tex_table(list_values, rows, cols, outfn):
+#     '''
+#     write list_values[row_idx][col_idx] to a table that is ready to be pasted
+#     into latex source
+#
+#     list_values is a list of row values
+#
+#     The value should be string of desired format
+#     '''
+#
+#     assert len(rows) >= 1
+#     assert len(cols) >= 1
+#
+#     with open(outfn, 'w') as f:
+#         # write header
+#         f.write('      &      ')
+#         for col_i in cols[:-1]:
+#             f.write(col_i + ' & ')
+#         f.write(' ' + cols[-1]+'\n')
+#
+#         # write each row
+#         for row_idx, row_i in enumerate(list_values):
+#             f.write(rows[row_idx] + ' &     ')
+#             row_values = list_values[row_idx]
+#             for col_idx in range(len(row_values) - 1):
+#                 f.write(row_values[col_idx] + ' & ')
+#             f.write(' ' + row_values[-1]+' \n')

utils/utils_poses/ATE/trajectory_utils.py ADDED Viewed

	@@ -0,0 +1,46 @@

+#!/usr/bin/env python2
+"""
+@author: Christian Forster
+"""
+import os
+import numpy as np
+import utils.utils_poses.ATE.transformations as tf
+def get_rigid_body_trafo(quat, trans):
+    T = tf.quaternion_matrix(quat)
+    T[0:3, 3] = trans
+    return T
+def get_distance_from_start(gt_translation):
+    distances = np.diff(gt_translation[:, 0:3], axis=0)
+    distances = np.sqrt(np.sum(np.multiply(distances, distances), 1))
+    distances = np.cumsum(distances)
+    distances = np.concatenate(([0], distances))
+    return distances
+def compute_comparison_indices_length(distances, dist, max_dist_diff):
+    max_idx = len(distances)
+    comparisons = []
+    for idx, d in enumerate(distances):
+        best_idx = -1
+        error = max_dist_diff
+        for i in range(idx, max_idx):
+            if np.abs(distances[i]-(d+dist)) < error:
+                best_idx = i
+                error = np.abs(distances[i] - (d+dist))
+        if best_idx != -1:
+            comparisons.append(best_idx)
+    return comparisons
+def compute_angle(transform):
+    """
+    Compute the rotation angle from a 4x4 homogeneous matrix.
+    """
+    # an invitation to 3-d vision, p 27
+    return np.arccos(
+        min(1, max(-1, (np.trace(transform[0:3, 0:3]) - 1)/2)))*180.0/np.pi

utils/utils_poses/ATE/transformations.py ADDED Viewed

	@@ -0,0 +1,1974 @@

+# -*- coding: utf-8 -*-
+# transformations.py
+# Copyright (c) 2006, Christoph Gohlke
+# Copyright (c) 2006-2009, The Regents of the University of California
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# * Redistributions of source code must retain the above copyright
+#   notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+#   notice, this list of conditions and the following disclaimer in the
+#   documentation and/or other materials provided with the distribution.
+# * Neither the name of the copyright holders nor the names of any
+#   contributors may be used to endorse or promote products derived
+#   from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+"""Homogeneous Transformation Matrices and Quaternions.
+A library for calculating 4x4 matrices for translating, rotating, reflecting,
+scaling, shearing, projecting, orthogonalizing, and superimposing arrays of
+3D homogeneous coordinates as well as for converting between rotation matrices,
+Euler angles, and quaternions. Also includes an Arcball control object and
+functions to decompose transformation matrices.
+:Authors:
+  `Christoph Gohlke <http://www.lfd.uci.edu/~gohlke/>`__,
+  Laboratory for Fluorescence Dynamics, University of California, Irvine
+:Version: 20090418
+Requirements
+------------
+* `Python 2.6 <http://www.python.org>`__
+* `Numpy 1.3 <http://numpy.scipy.org>`__
+* `transformations.c 20090418 <http://www.lfd.uci.edu/~gohlke/>`__
+  (optional implementation of some functions in C)
+Notes
+-----
+Matrices (M) can be inverted using numpy.linalg.inv(M), concatenated using
+numpy.dot(M0, M1), or used to transform homogeneous coordinates (v) using
+numpy.dot(M, v) for shape (4, \*) "point of arrays", respectively
+numpy.dot(v, M.T) for shape (\*, 4) "array of points".
+Calculations are carried out with numpy.float64 precision.
+This Python implementation is not optimized for speed.
+Vector, point, quaternion, and matrix function arguments are expected to be
+"array like", i.e. tuple, list, or numpy arrays.
+Return types are numpy arrays unless specified otherwise.
+Angles are in radians unless specified otherwise.
+Quaternions ix+jy+kz+w are represented as [x, y, z, w].
+Use the transpose of transformation matrices for OpenGL glMultMatrixd().
+A triple of Euler angles can be applied/interpreted in 24 ways, which can
+be specified using a 4 character string or encoded 4-tuple:
+  *Axes 4-string*: e.g. 'sxyz' or 'ryxy'
+  - first character : rotations are applied to 's'tatic or 'r'otating frame
+  - remaining characters : successive rotation axis 'x', 'y', or 'z'
+  *Axes 4-tuple*: e.g. (0, 0, 0, 0) or (1, 1, 1, 1)
+  - inner axis: code of axis ('x':0, 'y':1, 'z':2) of rightmost matrix.
+  - parity : even (0) if inner axis 'x' is followed by 'y', 'y' is followed
+    by 'z', or 'z' is followed by 'x'. Otherwise odd (1).
+  - repetition : first and last axis are same (1) or different (0).
+  - frame : rotations are applied to static (0) or rotating (1) frame.
+References
+----------
+(1)  Matrices and transformations. Ronald Goldman.
+     In "Graphics Gems I", pp 472-475. Morgan Kaufmann, 1990.
+(2)  More matrices and transformations: shear and pseudo-perspective.
+     Ronald Goldman. In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991.
+(3)  Decomposing a matrix into simple transformations. Spencer Thomas.
+     In "Graphics Gems II", pp 320-323. Morgan Kaufmann, 1991.
+(4)  Recovering the data from the transformation matrix. Ronald Goldman.
+     In "Graphics Gems II", pp 324-331. Morgan Kaufmann, 1991.
+(5)  Euler angle conversion. Ken Shoemake.
+     In "Graphics Gems IV", pp 222-229. Morgan Kaufmann, 1994.
+(6)  Arcball rotation control. Ken Shoemake.
+     In "Graphics Gems IV", pp 175-192. Morgan Kaufmann, 1994.
+(7)  Representing attitude: Euler angles, unit quaternions, and rotation
+     vectors. James Diebel. 2006.
+(8)  A discussion of the solution for the best rotation to relate two sets
+     of vectors. W Kabsch. Acta Cryst. 1978. A34, 827-828.
+(9)  Closed-form solution of absolute orientation using unit quaternions.
+     BKP Horn. J Opt Soc Am A. 1987. 4(4), 629-642.
+(10) Quaternions. Ken Shoemake.
+     http://www.sfu.ca/~jwa3/cmpt461/files/quatut.pdf
+(11) From quaternion to matrix and back. JMP van Waveren. 2005.
+     http://www.intel.com/cd/ids/developer/asmo-na/eng/293748.htm
+(12) Uniform random rotations. Ken Shoemake.
+     In "Graphics Gems III", pp 124-132. Morgan Kaufmann, 1992.
+Examples
+--------
+>>> alpha, beta, gamma = 0.123, -1.234, 2.345
+>>> origin, xaxis, yaxis, zaxis = (0, 0, 0), (1, 0, 0), (0, 1, 0), (0, 0, 1)
+>>> I = identity_matrix()
+>>> Rx = rotation_matrix(alpha, xaxis)
+>>> Ry = rotation_matrix(beta, yaxis)
+>>> Rz = rotation_matrix(gamma, zaxis)
+>>> R = concatenate_matrices(Rx, Ry, Rz)
+>>> euler = euler_from_matrix(R, 'rxyz')
+>>> numpy.allclose([alpha, beta, gamma], euler)
+True
+>>> Re = euler_matrix(alpha, beta, gamma, 'rxyz')
+>>> is_same_transform(R, Re)
+True
+>>> al, be, ga = euler_from_matrix(Re, 'rxyz')
+>>> is_same_transform(Re, euler_matrix(al, be, ga, 'rxyz'))
+True
+>>> qx = quaternion_about_axis(alpha, xaxis)
+>>> qy = quaternion_about_axis(beta, yaxis)
+>>> qz = quaternion_about_axis(gamma, zaxis)
+>>> q = quaternion_multiply(qx, qy)
+>>> q = quaternion_multiply(q, qz)
+>>> Rq = quaternion_matrix(q)
+>>> is_same_transform(R, Rq)
+True
+>>> S = scale_matrix(1.23, origin)
+>>> T = translation_matrix((1, 2, 3))
+>>> Z = shear_matrix(beta, xaxis, origin, zaxis)
+>>> R = random_rotation_matrix(numpy.random.rand(3))
+>>> M = concatenate_matrices(T, R, Z, S)
+>>> scale, shear, angles, trans, persp = decompose_matrix(M)
+>>> numpy.allclose(scale, 1.23)
+True
+>>> numpy.allclose(trans, (1, 2, 3))
+True
+>>> numpy.allclose(shear, (0, math.tan(beta), 0))
+True
+>>> is_same_transform(R, euler_matrix(axes='sxyz', *angles))
+True
+>>> M1 = compose_matrix(scale, shear, angles, trans, persp)
+>>> is_same_transform(M, M1)
+True
+"""
+from __future__ import division
+import warnings
+import math
+import numpy
+# Documentation in HTML format can be generated with Epydoc
+__docformat__ = "restructuredtext en"
+def skew(v):
+    """Returns the skew-symmetric matrix of a vector
+    cfo, 2015/08/13
+    """
+    return numpy.array([[0, -v[2], v[1]],
+                        [v[2], 0, -v[0]],
+                        [-v[1], v[0], 0]], dtype=numpy.float64)
+def unskew(R):
+    """Returns the coordinates of a skew-symmetric matrix
+    cfo, 2015/08/13
+    """
+    return numpy.array([R[2, 1], R[0, 2], R[1, 0]], dtype=numpy.float64)
+def first_order_rotation(rotvec):
+    """First order approximation of a rotation: I + skew(rotvec)
+    cfo, 2015/08/13
+    """
+    R = numpy.zeros((3, 3), dtype=numpy.float64)
+    R[0, 0] = 1.0
+    R[1, 0] = rotvec[2]
+    R[2, 0] = -rotvec[1]
+    R[0, 1] = -rotvec[2]
+    R[1, 1] = 1.0
+    R[2, 1] = rotvec[0]
+    R[0, 2] = rotvec[1]
+    R[1, 2] = -rotvec[0]
+    R[2, 2] = 1.0
+    return R
+def axis_angle(axis, theta):
+    """Compute a rotation matrix from an axis and an angle.
+    Returns 3x3 Matrix.
+    Is the same as transformations.rotation_matrix(theta, axis).
+    cfo, 2015/08/13
+    """
+    if theta*theta > _EPS:
+        wx = axis[0]
+        wy = axis[1]
+        wz = axis[2]
+        costheta = numpy.cos(theta)
+        sintheta = numpy.sin(theta)
+        c_1 = 1.0 - costheta
+        wx_sintheta = wx * sintheta
+        wy_sintheta = wy * sintheta
+        wz_sintheta = wz * sintheta
+        C00 = c_1 * wx * wx
+        C01 = c_1 * wx * wy
+        C02 = c_1 * wx * wz
+        C11 = c_1 * wy * wy
+        C12 = c_1 * wy * wz
+        C22 = c_1 * wz * wz
+        R = numpy.zeros((3, 3), dtype=numpy.float64)
+        R[0, 0] = costheta + C00
+        R[1, 0] = wz_sintheta + C01
+        R[2, 0] = -wy_sintheta + C02
+        R[0, 1] = -wz_sintheta + C01
+        R[1, 1] = costheta + C11
+        R[2, 1] = wx_sintheta + C12
+        R[0, 2] = wy_sintheta + C02
+        R[1, 2] = -wx_sintheta + C12
+        R[2, 2] = costheta + C22
+        return R
+    else:
+        return first_order_rotation(axis*theta)
+def expmap_so3(rotvec):
+    """Exponential map at identity.
+    Create a rotation from canonical coordinates using Rodrigues' formula.
+    cfo, 2015/08/13
+    """
+    theta = numpy.linalg.norm(rotvec)
+    axis = rotvec/theta
+    return axis_angle(axis, theta)
+def logmap_so3(R):
+    """Logmap at the identity.
+    Returns canonical coordinates of rotation.
+    cfo, 2015/08/13
+    """
+    R11 = R[0, 0]
+    R12 = R[0, 1]
+    R13 = R[0, 2]
+    R21 = R[1, 0]
+    R22 = R[1, 1]
+    R23 = R[1, 2]
+    R31 = R[2, 0]
+    R32 = R[2, 1]
+    R33 = R[2, 2]
+    tr = numpy.trace(R)
+    omega = numpy.empty((3,), dtype=numpy.float64)
+    # when trace == -1, i.e., when theta = +-pi, +-3pi, +-5pi, we do something
+    # special
+    if(numpy.abs(tr + 1.0) < 1e-10):
+        if(numpy.abs(R33 + 1.0) > 1e-10):
+            omega = (numpy.pi / numpy.sqrt(2.0 + 2.0 * R33)) * \
+                numpy.array([R13, R23, 1.0+R33])
+        elif(numpy.abs(R22 + 1.0) > 1e-10):
+            omega = (numpy.pi / numpy.sqrt(2.0 + 2.0 * R22)) * \
+                numpy.array([R12, 1.0+R22, R32])
+        else:
+            omega = (numpy.pi / numpy.sqrt(2.0 + 2.0 * R11)) * \
+                numpy.array([1.0+R11, R21, R31])
+    else:
+        magnitude = 1.0
+        tr_3 = tr - 3.0
+        if tr_3 < -1e-7:
+            theta = numpy.arccos((tr - 1.0) / 2.0)
+            magnitude = theta / (2.0 * numpy.sin(theta))
+        else:
+            # when theta near 0, +-2pi, +-4pi, etc. (trace near 3.0)
+            # use Taylor expansion: theta \approx 1/2-(t-3)/12 + O((t-3)^2)
+            magnitude = 0.5 - tr_3 * tr_3 / 12.0
+        omega = magnitude * numpy.array([R32 - R23, R13 - R31, R21 - R12])
+    return omega
+def right_jacobian_so3(rotvec):
+    """Right Jacobian for Exponential map in SO(3)
+    Equation (10.86) and following equations in G.S. Chirikjian, "Stochastic
+    Models, Information Theory, and Lie Groups", Volume 2, 2008.
+    > expmap_so3(thetahat + omega) \approx expmap_so3(thetahat) * expmap_so3(Jr * omega)
+    where Jr = right_jacobian_so3(thetahat);
+    This maps a perturbation in the tangent space (omega) to a perturbation
+    on the manifold (expmap_so3(Jr * omega))
+    cfo, 2015/08/13
+    """
+    theta2 = numpy.dot(rotvec, rotvec)
+    if theta2 <= _EPS:
+        return numpy.identity(3, dtype=numpy.float64)
+    else:
+        theta = numpy.sqrt(theta2)
+        Y = skew(rotvec) / theta
+        I_3x3 = numpy.identity(3, dtype=numpy.float64)
+        J_r = I_3x3 - ((1.0 - numpy.cos(theta)) / theta) * Y + \
+            (1.0 - numpy.sin(theta) / theta) * numpy.dot(Y, Y)
+        return J_r
+def S_inv_eulerZYX_body(euler_coordinates):
+    """ Relates angular rates w to changes in eulerZYX coordinates.
+    dot(euler) = S^-1(euler_coordinates) * omega
+    Also called: rotation-rate matrix. (E in Lupton paper)
+    cfo, 2015/08/13
+    """
+    y = euler_coordinates[1]
+    z = euler_coordinates[2]
+    E = numpy.zeros((3, 3))
+    E[0, 1] = numpy.sin(z)/numpy.cos(y)
+    E[0, 2] = numpy.cos(z)/numpy.cos(y)
+    E[1, 1] = numpy.cos(z)
+    E[1, 2] = -numpy.sin(z)
+    E[2, 0] = 1.0
+    E[2, 1] = numpy.sin(z)*numpy.sin(y)/numpy.cos(y)
+    E[2, 2] = numpy.cos(z)*numpy.sin(y)/numpy.cos(y)
+    return E
+def S_inv_eulerZYX_body_deriv(euler_coordinates, omega):
+    """ Compute dE(euler_coordinates)*omega/deuler_coordinates
+        cfo, 2015/08/13
+    """
+    y = euler_coordinates[1]
+    z = euler_coordinates[2]
+    """
+    w1 = omega[0]; w2 = omega[1]; w3 = omega[2]
+    J = numpy.zeros((3,3))
+    J[0,0] = 0
+    J[0,1] = math.tan(y) / math.cos(y) * (math.sin(z) * w2 + math.cos(z) * w3)
+    J[0,2] = w2/math.cos(y)*math.cos(z) - w3/math.cos(y)*math.sin(z)
+    J[1,0] = 0
+    J[1,1] = 0
+    J[1,2] = -w2*math.sin(z) - w3*math.cos(z)
+    J[2,0] = w1
+    J[2,1] = 1.0/math.cos(y)**2 * (w2 * math.sin(z) + w3 * math.cos(z))
+    J[2,2] = w2*math.tan(y)*math.cos(z) - w3*math.tan(y)*math.sin(z)
+    """
+    # second version, x = psi, y = theta, z = phi
+    # J_x = numpy.zeros((3,3))
+    J_y = numpy.zeros((3, 3))
+    J_z = numpy.zeros((3, 3))
+    # dE^-1/dtheta
+    J_y[0, 1] = math.tan(y)/math.cos(y)*math.sin(z)
+    J_y[0, 2] = math.tan(y)/math.cos(y)*math.cos(z)
+    J_y[2, 1] = math.sin(z)/(math.cos(y))**2
+    J_y[2, 2] = math.cos(z)/(math.cos(y))**2
+    # dE^-1/dphi
+    J_z[0, 1] = math.cos(z)/math.cos(y)
+    J_z[0, 2] = -math.sin(z)/math.cos(y)
+    J_z[1, 1] = -math.sin(z)
+    J_z[1, 2] = -math.cos(z)
+    J_z[2, 1] = math.cos(z)*math.tan(y)
+    J_z[2, 2] = -math.sin(z)*math.tan(y)
+    J = numpy.zeros((3, 3))
+    J[:, 1] = numpy.dot(J_y, omega)
+    J[:, 2] = numpy.dot(J_z, omega)
+    return J
+def identity_matrix():
+    """Return 4x4 identity/unit matrix.
+    >>> I = identity_matrix()
+    >>> numpy.allclose(I, numpy.dot(I, I))
+    True
+    >>> numpy.sum(I), numpy.trace(I)
+    (4.0, 4.0)
+    >>> numpy.allclose(I, numpy.identity(4, dtype=numpy.float64))
+    True
+    """
+    return numpy.identity(4, dtype=numpy.float64)
+def translation_matrix(direction):
+    """Return matrix to translate by direction vector.
+    >>> v = numpy.random.random(3) - 0.5
+    >>> numpy.allclose(v, translation_matrix(v)[:3, 3])
+    True
+    """
+    M = numpy.identity(4)
+    M[:3, 3] = direction[:3]
+    return M
+def translation_from_matrix(matrix):
+    """Return translation vector from translation matrix.
+    >>> v0 = numpy.random.random(3) - 0.5
+    >>> v1 = translation_from_matrix(translation_matrix(v0))
+    >>> numpy.allclose(v0, v1)
+    True
+    """
+    return numpy.array(matrix, copy=False)[:3, 3].copy()
+def convert_3x3_to_4x4(matrix_3x3):
+    M = numpy.identity(4)
+    M[:3, :3] = matrix_3x3
+    return M
+def reflection_matrix(point, normal):
+    """Return matrix to mirror at plane defined by point and normal vector.
+    >>> v0 = numpy.random.random(4) - 0.5
+    >>> v0[3] = 1.0
+    >>> v1 = numpy.random.random(3) - 0.5
+    >>> R = reflection_matrix(v0, v1)
+    >>> numpy.allclose(2., numpy.trace(R))
+    True
+    >>> numpy.allclose(v0, numpy.dot(R, v0))
+    True
+    >>> v2 = v0.copy()
+    >>> v2[:3] += v1
+    >>> v3 = v0.copy()
+    >>> v2[:3] -= v1
+    >>> numpy.allclose(v2, numpy.dot(R, v3))
+    True
+    """
+    normal = unit_vector(normal[:3])
+    M = numpy.identity(4)
+    M[:3, :3] -= 2.0 * numpy.outer(normal, normal)
+    M[:3, 3] = (2.0 * numpy.dot(point[:3], normal)) * normal
+    return M
+def reflection_from_matrix(matrix):
+    """Return mirror plane point and normal vector from reflection matrix.
+    >>> v0 = numpy.random.random(3) - 0.5
+    >>> v1 = numpy.random.random(3) - 0.5
+    >>> M0 = reflection_matrix(v0, v1)
+    >>> point, normal = reflection_from_matrix(M0)
+    >>> M1 = reflection_matrix(point, normal)
+    >>> is_same_transform(M0, M1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    # normal: unit eigenvector corresponding to eigenvalue -1
+    l, V = numpy.linalg.eig(M[:3, :3])
+    i = numpy.where(abs(numpy.real(l) + 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError("no unit eigenvector corresponding to eigenvalue -1")
+    normal = numpy.real(V[:, i[0]]).squeeze()
+    # point: any unit eigenvector corresponding to eigenvalue 1
+    l, V = numpy.linalg.eig(M)
+    i = numpy.where(abs(numpy.real(l) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError("no unit eigenvector corresponding to eigenvalue 1")
+    point = numpy.real(V[:, i[-1]]).squeeze()
+    point /= point[3]
+    return point, normal
+def rotation_matrix(angle, direction, point=None):
+    """Return matrix to rotate about axis defined by point and direction.
+    >>> angle = (random.random() - 0.5) * (2*math.pi)
+    >>> direc = numpy.random.random(3) - 0.5
+    >>> point = numpy.random.random(3) - 0.5
+    >>> R0 = rotation_matrix(angle, direc, point)
+    >>> R1 = rotation_matrix(angle-2*math.pi, direc, point)
+    >>> is_same_transform(R0, R1)
+    True
+    >>> R0 = rotation_matrix(angle, direc, point)
+    >>> R1 = rotation_matrix(-angle, -direc, point)
+    >>> is_same_transform(R0, R1)
+    True
+    >>> I = numpy.identity(4, numpy.float64)
+    >>> numpy.allclose(I, rotation_matrix(math.pi*2, direc))
+    True
+    >>> numpy.allclose(2., numpy.trace(rotation_matrix(math.pi/2,
+    ...                                                direc, point)))
+    True
+    """
+    sina = math.sin(angle)
+    cosa = math.cos(angle)
+    direction = unit_vector(direction[:3])
+    # rotation matrix around unit vector
+    R = numpy.array(((cosa, 0.0,  0.0),
+                     (0.0,  cosa, 0.0),
+                     (0.0,  0.0,  cosa)), dtype=numpy.float64)
+    R += numpy.outer(direction, direction) * (1.0 - cosa)
+    direction *= sina
+    R += numpy.array(((0.0,         -direction[2],  direction[1]),
+                      (direction[2], 0.0,          -direction[0]),
+                      (-direction[1], direction[0],  0.0)),
+                     dtype=numpy.float64)
+    M = numpy.identity(4)
+    M[:3, :3] = R
+    if point is not None:
+        # rotation not around origin
+        point = numpy.array(point[:3], dtype=numpy.float64, copy=False)
+        M[:3, 3] = point - numpy.dot(R, point)
+    return M
+def rotation_from_matrix(matrix):
+    """Return rotation angle and axis from rotation matrix.
+    >>> angle = (random.random() - 0.5) * (2*math.pi)
+    >>> direc = numpy.random.random(3) - 0.5
+    >>> point = numpy.random.random(3) - 0.5
+    >>> R0 = rotation_matrix(angle, direc, point)
+    >>> angle, direc, point = rotation_from_matrix(R0)
+    >>> R1 = rotation_matrix(angle, direc, point)
+    >>> is_same_transform(R0, R1)
+    True
+    """
+    R = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    R33 = R[:3, :3]
+    # direction: unit eigenvector of R33 corresponding to eigenvalue of 1
+    l, W = numpy.linalg.eig(R33.T)
+    i = numpy.where(abs(numpy.real(l) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError("no unit eigenvector corresponding to eigenvalue 1")
+    direction = numpy.real(W[:, i[-1]]).squeeze()
+    # point: unit eigenvector of R33 corresponding to eigenvalue of 1
+    l, Q = numpy.linalg.eig(R)
+    i = numpy.where(abs(numpy.real(l) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError("no unit eigenvector corresponding to eigenvalue 1")
+    point = numpy.real(Q[:, i[-1]]).squeeze()
+    point /= point[3]
+    # rotation angle depending on direction
+    cosa = (numpy.trace(R33) - 1.0) / 2.0
+    if abs(direction[2]) > 1e-8:
+        sina = (R[1, 0] + (cosa-1.0)*direction[0]*direction[1]) / direction[2]
+    elif abs(direction[1]) > 1e-8:
+        sina = (R[0, 2] + (cosa-1.0)*direction[0]*direction[2]) / direction[1]
+    else:
+        sina = (R[2, 1] + (cosa-1.0)*direction[1]*direction[2]) / direction[0]
+    angle = math.atan2(sina, cosa)
+    return angle, direction, point
+def scale_matrix(factor, origin=None, direction=None):
+    """Return matrix to scale by factor around origin in direction.
+    Use factor -1 for point symmetry.
+    >>> v = (numpy.random.rand(4, 5) - 0.5) * 20.0
+    >>> v[3] = 1.0
+    >>> S = scale_matrix(-1.234)
+    >>> numpy.allclose(numpy.dot(S, v)[:3], -1.234*v[:3])
+    True
+    >>> factor = random.random() * 10 - 5
+    >>> origin = numpy.random.random(3) - 0.5
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> S = scale_matrix(factor, origin)
+    >>> S = scale_matrix(factor, origin, direct)
+    """
+    if direction is None:
+        # uniform scaling
+        M = numpy.array(((factor, 0.0,    0.0,    0.0),
+                         (0.0,    factor, 0.0,    0.0),
+                         (0.0,    0.0,    factor, 0.0),
+                         (0.0,    0.0,    0.0,    1.0)), dtype=numpy.float64)
+        if origin is not None:
+            M[:3, 3] = origin[:3]
+            M[:3, 3] *= 1.0 - factor
+    else:
+        # nonuniform scaling
+        direction = unit_vector(direction[:3])
+        factor = 1.0 - factor
+        M = numpy.identity(4)
+        M[:3, :3] -= factor * numpy.outer(direction, direction)
+        if origin is not None:
+            M[:3, 3] = (factor * numpy.dot(origin[:3], direction)) * direction
+    return M
+def scale_from_matrix(matrix):
+    """Return scaling factor, origin and direction from scaling matrix.
+    >>> factor = random.random() * 10 - 5
+    >>> origin = numpy.random.random(3) - 0.5
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> S0 = scale_matrix(factor, origin)
+    >>> factor, origin, direction = scale_from_matrix(S0)
+    >>> S1 = scale_matrix(factor, origin, direction)
+    >>> is_same_transform(S0, S1)
+    True
+    >>> S0 = scale_matrix(factor, origin, direct)
+    >>> factor, origin, direction = scale_from_matrix(S0)
+    >>> S1 = scale_matrix(factor, origin, direction)
+    >>> is_same_transform(S0, S1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    M33 = M[:3, :3]
+    factor = numpy.trace(M33) - 2.0
+    try:
+        # direction: unit eigenvector corresponding to eigenvalue factor
+        l, V = numpy.linalg.eig(M33)
+        i = numpy.where(abs(numpy.real(l) - factor) < 1e-8)[0][0]
+        direction = numpy.real(V[:, i]).squeeze()
+        direction /= vector_norm(direction)
+    except IndexError:
+        # uniform scaling
+        factor = (factor + 2.0) / 3.0
+        direction = None
+    # origin: any eigenvector corresponding to eigenvalue 1
+    l, V = numpy.linalg.eig(M)
+    i = numpy.where(abs(numpy.real(l) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError("no eigenvector corresponding to eigenvalue 1")
+    origin = numpy.real(V[:, i[-1]]).squeeze()
+    origin /= origin[3]
+    return factor, origin, direction
+def projection_matrix(point, normal, direction=None,
+                      perspective=None, pseudo=False):
+    """Return matrix to project onto plane defined by point and normal.
+    Using either perspective point, projection direction, or none of both.
+    If pseudo is True, perspective projections will preserve relative depth
+    such that Perspective = dot(Orthogonal, PseudoPerspective).
+    >>> P = projection_matrix((0, 0, 0), (1, 0, 0))
+    >>> numpy.allclose(P[1:, 1:], numpy.identity(4)[1:, 1:])
+    True
+    >>> point = numpy.random.random(3) - 0.5
+    >>> normal = numpy.random.random(3) - 0.5
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> persp = numpy.random.random(3) - 0.5
+    >>> P0 = projection_matrix(point, normal)
+    >>> P1 = projection_matrix(point, normal, direction=direct)
+    >>> P2 = projection_matrix(point, normal, perspective=persp)
+    >>> P3 = projection_matrix(point, normal, perspective=persp, pseudo=True)
+    >>> is_same_transform(P2, numpy.dot(P0, P3))
+    True
+    >>> P = projection_matrix((3, 0, 0), (1, 1, 0), (1, 0, 0))
+    >>> v0 = (numpy.random.rand(4, 5) - 0.5) * 20.0
+    >>> v0[3] = 1.0
+    >>> v1 = numpy.dot(P, v0)
+    >>> numpy.allclose(v1[1], v0[1])
+    True
+    >>> numpy.allclose(v1[0], 3.0-v1[1])
+    True
+    """
+    M = numpy.identity(4)
+    point = numpy.array(point[:3], dtype=numpy.float64, copy=False)
+    normal = unit_vector(normal[:3])
+    if perspective is not None:
+        # perspective projection
+        perspective = numpy.array(perspective[:3], dtype=numpy.float64,
+                                  copy=False)
+        M[0, 0] = M[1, 1] = M[2, 2] = numpy.dot(perspective-point, normal)
+        M[:3, :3] -= numpy.outer(perspective, normal)
+        if pseudo:
+            # preserve relative depth
+            M[:3, :3] -= numpy.outer(normal, normal)
+            M[:3, 3] = numpy.dot(point, normal) * (perspective+normal)
+        else:
+            M[:3, 3] = numpy.dot(point, normal) * perspective
+        M[3, :3] = -normal
+        M[3, 3] = numpy.dot(perspective, normal)
+    elif direction is not None:
+        # parallel projection
+        direction = numpy.array(direction[:3], dtype=numpy.float64, copy=False)
+        scale = numpy.dot(direction, normal)
+        M[:3, :3] -= numpy.outer(direction, normal) / scale
+        M[:3, 3] = direction * (numpy.dot(point, normal) / scale)
+    else:
+        # orthogonal projection
+        M[:3, :3] -= numpy.outer(normal, normal)
+        M[:3, 3] = numpy.dot(point, normal) * normal
+    return M
+def projection_from_matrix(matrix, pseudo=False):
+    """Return projection plane and perspective point from projection matrix.
+    Return values are same as arguments for projection_matrix function:
+    point, normal, direction, perspective, and pseudo.
+    >>> point = numpy.random.random(3) - 0.5
+    >>> normal = numpy.random.random(3) - 0.5
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> persp = numpy.random.random(3) - 0.5
+    >>> P0 = projection_matrix(point, normal)
+    >>> result = projection_from_matrix(P0)
+    >>> P1 = projection_matrix(*result)
+    >>> is_same_transform(P0, P1)
+    True
+    >>> P0 = projection_matrix(point, normal, direct)
+    >>> result = projection_from_matrix(P0)
+    >>> P1 = projection_matrix(*result)
+    >>> is_same_transform(P0, P1)
+    True
+    >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=False)
+    >>> result = projection_from_matrix(P0, pseudo=False)
+    >>> P1 = projection_matrix(*result)
+    >>> is_same_transform(P0, P1)
+    True
+    >>> P0 = projection_matrix(point, normal, perspective=persp, pseudo=True)
+    >>> result = projection_from_matrix(P0, pseudo=True)
+    >>> P1 = projection_matrix(*result)
+    >>> is_same_transform(P0, P1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    M33 = M[:3, :3]
+    l, V = numpy.linalg.eig(M)
+    i = numpy.where(abs(numpy.real(l) - 1.0) < 1e-8)[0]
+    if not pseudo and len(i):
+        # point: any eigenvector corresponding to eigenvalue 1
+        point = numpy.real(V[:, i[-1]]).squeeze()
+        point /= point[3]
+        # direction: unit eigenvector corresponding to eigenvalue 0
+        l, V = numpy.linalg.eig(M33)
+        i = numpy.where(abs(numpy.real(l)) < 1e-8)[0]
+        if not len(i):
+            raise ValueError("no eigenvector corresponding to eigenvalue 0")
+        direction = numpy.real(V[:, i[0]]).squeeze()
+        direction /= vector_norm(direction)
+        # normal: unit eigenvector of M33.T corresponding to eigenvalue 0
+        l, V = numpy.linalg.eig(M33.T)
+        i = numpy.where(abs(numpy.real(l)) < 1e-8)[0]
+        if len(i):
+            # parallel projection
+            normal = numpy.real(V[:, i[0]]).squeeze()
+            normal /= vector_norm(normal)
+            return point, normal, direction, None, False
+        else:
+            # orthogonal projection, where normal equals direction vector
+            return point, direction, None, None, False
+    else:
+        # perspective projection
+        i = numpy.where(abs(numpy.real(l)) > 1e-8)[0]
+        if not len(i):
+            raise ValueError(
+                "no eigenvector not corresponding to eigenvalue 0")
+        point = numpy.real(V[:, i[-1]]).squeeze()
+        point /= point[3]
+        normal = - M[3, :3]
+        perspective = M[:3, 3] / numpy.dot(point[:3], normal)
+        if pseudo:
+            perspective -= normal
+        return point, normal, None, perspective, pseudo
+def clip_matrix(left, right, bottom, top, near, far, perspective=False):
+    """Return matrix to obtain normalized device coordinates from frustrum.
+    The frustrum bounds are axis-aligned along x (left, right),
+    y (bottom, top) and z (near, far).
+    Normalized device coordinates are in range [-1, 1] if coordinates are
+    inside the frustrum.
+    If perspective is True the frustrum is a truncated pyramid with the
+    perspective point at origin and direction along z axis, otherwise an
+    orthographic canonical view volume (a box).
+    Homogeneous coordinates transformed by the perspective clip matrix
+    need to be dehomogenized (devided by w coordinate).
+    >>> frustrum = numpy.random.rand(6)
+    >>> frustrum[1] += frustrum[0]
+    >>> frustrum[3] += frustrum[2]
+    >>> frustrum[5] += frustrum[4]
+    >>> M = clip_matrix(*frustrum, perspective=False)
+    >>> numpy.dot(M, [frustrum[0], frustrum[2], frustrum[4], 1.0])
+    array([-1., -1., -1.,  1.])
+    >>> numpy.dot(M, [frustrum[1], frustrum[3], frustrum[5], 1.0])
+    array([ 1.,  1.,  1.,  1.])
+    >>> M = clip_matrix(*frustrum, perspective=True)
+    >>> v = numpy.dot(M, [frustrum[0], frustrum[2], frustrum[4], 1.0])
+    >>> v / v[3]
+    array([-1., -1., -1.,  1.])
+    >>> v = numpy.dot(M, [frustrum[1], frustrum[3], frustrum[4], 1.0])
+    >>> v / v[3]
+    array([ 1.,  1., -1.,  1.])
+    """
+    if left >= right or bottom >= top or near >= far:
+        raise ValueError("invalid frustrum")
+    if perspective:
+        if near <= _EPS:
+            raise ValueError("invalid frustrum: near <= 0")
+        t = 2.0 * near
+        M = ((-t/(right-left), 0.0, (right+left)/(right-left), 0.0),
+             (0.0, -t/(top-bottom), (top+bottom)/(top-bottom), 0.0),
+             (0.0, 0.0, -(far+near)/(far-near), t*far/(far-near)),
+             (0.0, 0.0, -1.0, 0.0))
+    else:
+        M = ((2.0/(right-left), 0.0, 0.0, (right+left)/(left-right)),
+             (0.0, 2.0/(top-bottom), 0.0, (top+bottom)/(bottom-top)),
+             (0.0, 0.0, 2.0/(far-near), (far+near)/(near-far)),
+             (0.0, 0.0, 0.0, 1.0))
+    return numpy.array(M, dtype=numpy.float64)
+def shear_matrix(angle, direction, point, normal):
+    """Return matrix to shear by angle along direction vector on shear plane.
+    The shear plane is defined by a point and normal vector. The direction
+    vector must be orthogonal to the plane's normal vector.
+    A point P is transformed by the shear matrix into P" such that
+    the vector P-P" is parallel to the direction vector and its extent is
+    given by the angle of P-P'-P", where P' is the orthogonal projection
+    of P onto the shear plane.
+    >>> angle = (random.random() - 0.5) * 4*math.pi
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> point = numpy.random.random(3) - 0.5
+    >>> normal = numpy.cross(direct, numpy.random.random(3))
+    >>> S = shear_matrix(angle, direct, point, normal)
+    >>> numpy.allclose(1.0, numpy.linalg.det(S))
+    True
+    """
+    normal = unit_vector(normal[:3])
+    direction = unit_vector(direction[:3])
+    if abs(numpy.dot(normal, direction)) > 1e-6:
+        raise ValueError("direction and normal vectors are not orthogonal")
+    angle = math.tan(angle)
+    M = numpy.identity(4)
+    M[:3, :3] += angle * numpy.outer(direction, normal)
+    M[:3, 3] = -angle * numpy.dot(point[:3], normal) * direction
+    return M
+def shear_from_matrix(matrix):
+    """Return shear angle, direction and plane from shear matrix.
+    >>> angle = (random.random() - 0.5) * 4*math.pi
+    >>> direct = numpy.random.random(3) - 0.5
+    >>> point = numpy.random.random(3) - 0.5
+    >>> normal = numpy.cross(direct, numpy.random.random(3))
+    >>> S0 = shear_matrix(angle, direct, point, normal)
+    >>> angle, direct, point, normal = shear_from_matrix(S0)
+    >>> S1 = shear_matrix(angle, direct, point, normal)
+    >>> is_same_transform(S0, S1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)
+    M33 = M[:3, :3]
+    # normal: cross independent eigenvectors corresponding to the eigenvalue 1
+    l, V = numpy.linalg.eig(M33)
+    i = numpy.where(abs(numpy.real(l) - 1.0) < 1e-4)[0]
+    if len(i) < 2:
+        raise ValueError("No two linear independent eigenvectors found %s" % l)
+    V = numpy.real(V[:, i]).squeeze().T
+    lenorm = -1.0
+    for i0, i1 in ((0, 1), (0, 2), (1, 2)):
+        n = numpy.cross(V[i0], V[i1])
+        l = vector_norm(n)
+        if l > lenorm:
+            lenorm = l
+            normal = n
+    normal /= lenorm
+    # direction and angle
+    direction = numpy.dot(M33 - numpy.identity(3), normal)
+    angle = vector_norm(direction)
+    direction /= angle
+    angle = math.atan(angle)
+    # point: eigenvector corresponding to eigenvalue 1
+    l, V = numpy.linalg.eig(M)
+    i = numpy.where(abs(numpy.real(l) - 1.0) < 1e-8)[0]
+    if not len(i):
+        raise ValueError("no eigenvector corresponding to eigenvalue 1")
+    point = numpy.real(V[:, i[-1]]).squeeze()
+    point /= point[3]
+    return angle, direction, point, normal
+def decompose_matrix(matrix):
+    """Return sequence of transformations from transformation matrix.
+    matrix : array_like
+        Non-degenerative homogeneous transformation matrix
+    Return tuple of:
+        scale : vector of 3 scaling factors
+        shear : list of shear factors for x-y, x-z, y-z axes
+        angles : list of Euler angles about static x, y, z axes
+        translate : translation vector along x, y, z axes
+        perspective : perspective partition of matrix
+    Raise ValueError if matrix is of wrong type or degenerative.
+    >>> T0 = translation_matrix((1, 2, 3))
+    >>> scale, shear, angles, trans, persp = decompose_matrix(T0)
+    >>> T1 = translation_matrix(trans)
+    >>> numpy.allclose(T0, T1)
+    True
+    >>> S = scale_matrix(0.123)
+    >>> scale, shear, angles, trans, persp = decompose_matrix(S)
+    >>> scale[0]
+    0.123
+    >>> R0 = euler_matrix(1, 2, 3)
+    >>> scale, shear, angles, trans, persp = decompose_matrix(R0)
+    >>> R1 = euler_matrix(*angles)
+    >>> numpy.allclose(R0, R1)
+    True
+    """
+    M = numpy.array(matrix, dtype=numpy.float64, copy=True).T
+    if abs(M[3, 3]) < _EPS:
+        raise ValueError("M[3, 3] is zero")
+    M /= M[3, 3]
+    P = M.copy()
+    P[:, 3] = 0, 0, 0, 1
+    if not numpy.linalg.det(P):
+        raise ValueError("Matrix is singular")
+    scale = numpy.zeros((3, ), dtype=numpy.float64)
+    shear = [0, 0, 0]
+    angles = [0, 0, 0]
+    if any(abs(M[:3, 3]) > _EPS):
+        perspective = numpy.dot(M[:, 3], numpy.linalg.inv(P.T))
+        M[:, 3] = 0, 0, 0, 1
+    else:
+        perspective = numpy.array((0, 0, 0, 1), dtype=numpy.float64)
+    translate = M[3, :3].copy()
+    M[3, :3] = 0
+    row = M[:3, :3].copy()
+    scale[0] = vector_norm(row[0])
+    row[0] /= scale[0]
+    shear[0] = numpy.dot(row[0], row[1])
+    row[1] -= row[0] * shear[0]
+    scale[1] = vector_norm(row[1])
+    row[1] /= scale[1]
+    shear[0] /= scale[1]
+    shear[1] = numpy.dot(row[0], row[2])
+    row[2] -= row[0] * shear[1]
+    shear[2] = numpy.dot(row[1], row[2])
+    row[2] -= row[1] * shear[2]
+    scale[2] = vector_norm(row[2])
+    row[2] /= scale[2]
+    shear[1:] /= scale[2]
+    if numpy.dot(row[0], numpy.cross(row[1], row[2])) < 0:
+        scale *= -1
+        row *= -1
+    angles[1] = math.asin(-row[0, 2])
+    if math.cos(angles[1]):
+        angles[0] = math.atan2(row[1, 2], row[2, 2])
+        angles[2] = math.atan2(row[0, 1], row[0, 0])
+    else:
+        #angles[0] = math.atan2(row[1, 0], row[1, 1])
+        angles[0] = math.atan2(-row[2, 1], row[1, 1])
+        angles[2] = 0.0
+    return scale, shear, angles, translate, perspective
+def compose_matrix(scale=None, shear=None, angles=None, translate=None,
+                   perspective=None):
+    """Return transformation matrix from sequence of transformations.
+    This is the inverse of the decompose_matrix function.
+    Sequence of transformations:
+        scale : vector of 3 scaling factors
+        shear : list of shear factors for x-y, x-z, y-z axes
+        angles : list of Euler angles about static x, y, z axes
+        translate : translation vector along x, y, z axes
+        perspective : perspective partition of matrix
+    >>> scale = numpy.random.random(3) - 0.5
+    >>> shear = numpy.random.random(3) - 0.5
+    >>> angles = (numpy.random.random(3) - 0.5) * (2*math.pi)
+    >>> trans = numpy.random.random(3) - 0.5
+    >>> persp = numpy.random.random(4) - 0.5
+    >>> M0 = compose_matrix(scale, shear, angles, trans, persp)
+    >>> result = decompose_matrix(M0)
+    >>> M1 = compose_matrix(*result)
+    >>> is_same_transform(M0, M1)
+    True
+    """
+    M = numpy.identity(4)
+    if perspective is not None:
+        P = numpy.identity(4)
+        P[3, :] = perspective[:4]
+        M = numpy.dot(M, P)
+    if translate is not None:
+        T = numpy.identity(4)
+        T[:3, 3] = translate[:3]
+        M = numpy.dot(M, T)
+    if angles is not None:
+        R = euler_matrix(angles[0], angles[1], angles[2], 'sxyz')
+        M = numpy.dot(M, R)
+    if shear is not None:
+        Z = numpy.identity(4)
+        Z[1, 2] = shear[2]
+        Z[0, 2] = shear[1]
+        Z[0, 1] = shear[0]
+        M = numpy.dot(M, Z)
+    if scale is not None:
+        S = numpy.identity(4)
+        S[0, 0] = scale[0]
+        S[1, 1] = scale[1]
+        S[2, 2] = scale[2]
+        M = numpy.dot(M, S)
+    M /= M[3, 3]
+    return M
+def orthogonalization_matrix(lengths, angles):
+    """Return orthogonalization matrix for crystallographic cell coordinates.
+    Angles are expected in degrees.
+    The de-orthogonalization matrix is the inverse.
+    >>> O = orthogonalization_matrix((10., 10., 10.), (90., 90., 90.))
+    >>> numpy.allclose(O[:3, :3], numpy.identity(3, float) * 10)
+    True
+    >>> O = orthogonalization_matrix([9.8, 12.0, 15.5], [87.2, 80.7, 69.7])
+    >>> numpy.allclose(numpy.sum(O), 43.063229)
+    True
+    """
+    a, b, c = lengths
+    angles = numpy.radians(angles)
+    sina, sinb, _ = numpy.sin(angles)
+    cosa, cosb, cosg = numpy.cos(angles)
+    co = (cosa * cosb - cosg) / (sina * sinb)
+    return numpy.array((
+        (a*sinb*math.sqrt(1.0-co*co),  0.0,    0.0, 0.0),
+        (-a*sinb*co,                    b*sina, 0.0, 0.0),
+        (a*cosb,                       b*cosa, c,   0.0),
+        (0.0,                          0.0,    0.0, 1.0)),
+        dtype=numpy.float64)
+def superimposition_matrix(v0, v1, scaling=False, usesvd=True):
+    """Return matrix to transform given vector set into second vector set.
+    v0 and v1 are shape (3, \*) or (4, \*) arrays of at least 3 vectors.
+    If usesvd is True, the weighted sum of squared deviations (RMSD) is
+    minimized according to the algorithm by W. Kabsch [8]. Otherwise the
+    quaternion based algorithm by B. Horn [9] is used (slower when using
+    this Python implementation).
+    The returned matrix performs rotation, translation and uniform scaling
+    (if specified).
+    >>> v0 = numpy.random.rand(3, 10)
+    >>> M = superimposition_matrix(v0, v0)
+    >>> numpy.allclose(M, numpy.identity(4))
+    True
+    >>> R = random_rotation_matrix(numpy.random.random(3))
+    >>> v0 = ((1,0,0), (0,1,0), (0,0,1), (1,1,1))
+    >>> v1 = numpy.dot(R, v0)
+    >>> M = superimposition_matrix(v0, v1)
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    >>> v0 = (numpy.random.rand(4, 100) - 0.5) * 20.0
+    >>> v0[3] = 1.0
+    >>> v1 = numpy.dot(R, v0)
+    >>> M = superimposition_matrix(v0, v1)
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    >>> S = scale_matrix(random.random())
+    >>> T = translation_matrix(numpy.random.random(3)-0.5)
+    >>> M = concatenate_matrices(T, R, S)
+    >>> v1 = numpy.dot(M, v0)
+    >>> v0[:3] += numpy.random.normal(0.0, 1e-9, 300).reshape(3, -1)
+    >>> M = superimposition_matrix(v0, v1, scaling=True)
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    >>> M = superimposition_matrix(v0, v1, scaling=True, usesvd=False)
+    >>> numpy.allclose(v1, numpy.dot(M, v0))
+    True
+    >>> v = numpy.empty((4, 100, 3), dtype=numpy.float64)
+    >>> v[:, :, 0] = v0
+    >>> M = superimposition_matrix(v0, v1, scaling=True, usesvd=False)
+    >>> numpy.allclose(v1, numpy.dot(M, v[:, :, 0]))
+    True
+    """
+    v0 = numpy.array(v0, dtype=numpy.float64, copy=False)[:3]
+    v1 = numpy.array(v1, dtype=numpy.float64, copy=False)[:3]
+    if v0.shape != v1.shape or v0.shape[1] < 3:
+        raise ValueError("Vector sets are of wrong shape or type.")
+    # move centroids to origin
+    t0 = numpy.mean(v0, axis=1)
+    t1 = numpy.mean(v1, axis=1)
+    v0 = v0 - t0.reshape(3, 1)
+    v1 = v1 - t1.reshape(3, 1)
+    if usesvd:
+        # Singular Value Decomposition of covariance matrix
+        u, s, vh = numpy.linalg.svd(numpy.dot(v1, v0.T))
+        # rotation matrix from SVD orthonormal bases
+        R = numpy.dot(u, vh)
+        if numpy.linalg.det(R) < 0.0:
+            # R does not constitute right handed system
+            R -= numpy.outer(u[:, 2], vh[2, :]*2.0)
+            s[-1] *= -1.0
+        # homogeneous transformation matrix
+        M = numpy.identity(4)
+        M[:3, :3] = R
+    else:
+        # compute symmetric matrix N
+        xx, yy, zz = numpy.sum(v0 * v1, axis=1)
+        xy, yz, zx = numpy.sum(v0 * numpy.roll(v1, -1, axis=0), axis=1)
+        xz, yx, zy = numpy.sum(v0 * numpy.roll(v1, -2, axis=0), axis=1)
+        N = ((xx+yy+zz, yz-zy,    zx-xz,    xy-yx),
+             (yz-zy,    xx-yy-zz, xy+yx,    zx+xz),
+             (zx-xz,    xy+yx,   -xx+yy-zz, yz+zy),
+             (xy-yx,    zx+xz,    yz+zy,   -xx-yy+zz))
+        # quaternion: eigenvector corresponding to most positive eigenvalue
+        l, V = numpy.linalg.eig(N)
+        q = V[:, numpy.argmax(l)]
+        q /= vector_norm(q)  # unit quaternion
+        q = numpy.roll(q, -1)  # move w component to end
+        # homogeneous transformation matrix
+        M = quaternion_matrix(q)
+    # scale: ratio of rms deviations from centroid
+    if scaling:
+        v0 *= v0
+        v1 *= v1
+        M[:3, :3] *= math.sqrt(numpy.sum(v1) / numpy.sum(v0))
+    # translation
+    M[:3, 3] = t1
+    T = numpy.identity(4)
+    T[:3, 3] = -t0
+    M = numpy.dot(M, T)
+    return M
+def euler_matrix(ai, aj, ak, axes='sxyz'):
+    """Return homogeneous rotation matrix from Euler angles and axis sequence.
+    ai, aj, ak : Euler's roll, pitch and yaw angles
+    axes : One of 24 axis sequences as string or encoded tuple
+    >>> R = euler_matrix(1, 2, 3, 'syxz')
+    >>> numpy.allclose(numpy.sum(R[0]), -1.34786452)
+    True
+    >>> R = euler_matrix(1, 2, 3, (0, 1, 0, 1))
+    >>> numpy.allclose(numpy.sum(R[0]), -0.383436184)
+    True
+    >>> ai, aj, ak = (4.0*math.pi) * (numpy.random.random(3) - 0.5)
+    >>> for axes in _AXES2TUPLE.keys():
+    ...    R = euler_matrix(ai, aj, ak, axes)
+    >>> for axes in _TUPLE2AXES.keys():
+    ...    R = euler_matrix(ai, aj, ak, axes)
+    """
+    try:
+        firstaxis, parity, repetition, frame = _AXES2TUPLE[axes]
+    except (AttributeError, KeyError):
+        _ = _TUPLE2AXES[axes]
+        firstaxis, parity, repetition, frame = axes
+    i = firstaxis
+    j = _NEXT_AXIS[i+parity]
+    k = _NEXT_AXIS[i-parity+1]
+    if frame:
+        ai, ak = ak, ai
+    if parity:
+        ai, aj, ak = -ai, -aj, -ak
+    si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak)
+    ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak)
+    cc, cs = ci*ck, ci*sk
+    sc, ss = si*ck, si*sk
+    M = numpy.identity(4)
+    if repetition:
+        M[i, i] = cj
+        M[i, j] = sj*si
+        M[i, k] = sj*ci
+        M[j, i] = sj*sk
+        M[j, j] = -cj*ss+cc
+        M[j, k] = -cj*cs-sc
+        M[k, i] = -sj*ck
+        M[k, j] = cj*sc+cs
+        M[k, k] = cj*cc-ss
+    else:
+        M[i, i] = cj*ck
+        M[i, j] = sj*sc-cs
+        M[i, k] = sj*cc+ss
+        M[j, i] = cj*sk
+        M[j, j] = sj*ss+cc
+        M[j, k] = sj*cs-sc
+        M[k, i] = -sj
+        M[k, j] = cj*si
+        M[k, k] = cj*ci
+    return M
+def euler_from_matrix(matrix, axes='sxyz'):
+    """Return Euler angles from rotation matrix for specified axis sequence.
+    axes : One of 24 axis sequences as string or encoded tuple
+    Note that many Euler angle triplets can describe one matrix.
+    >>> R0 = euler_matrix(1, 2, 3, 'syxz')
+    >>> al, be, ga = euler_from_matrix(R0, 'syxz')
+    >>> R1 = euler_matrix(al, be, ga, 'syxz')
+    >>> numpy.allclose(R0, R1)
+    True
+    >>> angles = (4.0*math.pi) * (numpy.random.random(3) - 0.5)
+    >>> for axes in _AXES2TUPLE.keys():
+    ...    R0 = euler_matrix(axes=axes, *angles)
+    ...    R1 = euler_matrix(axes=axes, *euler_from_matrix(R0, axes))
+    ...    if not numpy.allclose(R0, R1): print axes, "failed"
+    """
+    try:
+        firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+    except (AttributeError, KeyError):
+        _ = _TUPLE2AXES[axes]
+        firstaxis, parity, repetition, frame = axes
+    i = firstaxis
+    j = _NEXT_AXIS[i+parity]
+    k = _NEXT_AXIS[i-parity+1]
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:3, :3]
+    if repetition:
+        sy = math.sqrt(M[i, j]*M[i, j] + M[i, k]*M[i, k])
+        if sy > _EPS:
+            ax = math.atan2(M[i, j],  M[i, k])
+            ay = math.atan2(sy,       M[i, i])
+            az = math.atan2(M[j, i], -M[k, i])
+        else:
+            ax = math.atan2(-M[j, k],  M[j, j])
+            ay = math.atan2(sy,       M[i, i])
+            az = 0.0
+    else:
+        cy = math.sqrt(M[i, i]*M[i, i] + M[j, i]*M[j, i])
+        if cy > _EPS:
+            ax = math.atan2(M[k, j],  M[k, k])
+            ay = math.atan2(-M[k, i],  cy)
+            az = math.atan2(M[j, i],  M[i, i])
+        else:
+            ax = math.atan2(-M[j, k],  M[j, j])
+            ay = math.atan2(-M[k, i],  cy)
+            az = 0.0
+    if parity:
+        ax, ay, az = -ax, -ay, -az
+    if frame:
+        ax, az = az, ax
+    return ax, ay, az
+def euler_from_quaternion(quaternion, axes='sxyz'):
+    """Return Euler angles from quaternion for specified axis sequence.
+    >>> angles = euler_from_quaternion([0.06146124, 0, 0, 0.99810947])
+    >>> numpy.allclose(angles, [0.123, 0, 0])
+    True
+    """
+    return euler_from_matrix(quaternion_matrix(quaternion), axes)
+def quaternion_from_euler(ai, aj, ak, axes='sxyz'):
+    """Return quaternion from Euler angles and axis sequence.
+    ai, aj, ak : Euler's roll, pitch and yaw angles
+    axes : One of 24 axis sequences as string or encoded tuple
+    >>> q = quaternion_from_euler(1, 2, 3, 'ryxz')
+    >>> numpy.allclose(q, [0.310622, -0.718287, 0.444435, 0.435953])
+    True
+    """
+    try:
+        firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+    except (AttributeError, KeyError):
+        _ = _TUPLE2AXES[axes]
+        firstaxis, parity, repetition, frame = axes
+    i = firstaxis
+    j = _NEXT_AXIS[i+parity]
+    k = _NEXT_AXIS[i-parity+1]
+    if frame:
+        ai, ak = ak, ai
+    if parity:
+        aj = -aj
+    ai /= 2.0
+    aj /= 2.0
+    ak /= 2.0
+    ci = math.cos(ai)
+    si = math.sin(ai)
+    cj = math.cos(aj)
+    sj = math.sin(aj)
+    ck = math.cos(ak)
+    sk = math.sin(ak)
+    cc = ci*ck
+    cs = ci*sk
+    sc = si*ck
+    ss = si*sk
+    quaternion = numpy.empty((4, ), dtype=numpy.float64)
+    if repetition:
+        quaternion[i] = cj*(cs + sc)
+        quaternion[j] = sj*(cc + ss)
+        quaternion[k] = sj*(cs - sc)
+        quaternion[3] = cj*(cc - ss)
+    else:
+        quaternion[i] = cj*sc - sj*cs
+        quaternion[j] = cj*ss + sj*cc
+        quaternion[k] = cj*cs - sj*sc
+        quaternion[3] = cj*cc + sj*ss
+    if parity:
+        quaternion[j] *= -1
+    return quaternion
+def quaternion_about_axis(angle, axis):
+    """Return quaternion for rotation about axis.
+    >>> q = quaternion_about_axis(0.123, (1, 0, 0))
+    >>> numpy.allclose(q, [0.06146124, 0, 0, 0.99810947])
+    True
+    """
+    quaternion = numpy.zeros((4, ), dtype=numpy.float64)
+    quaternion[:3] = axis[:3]
+    qlen = vector_norm(quaternion)
+    if qlen > _EPS:
+        quaternion *= math.sin(angle/2.0) / qlen
+    quaternion[3] = math.cos(angle/2.0)
+    return quaternion
+def matrix_from_quaternion(quaternion):
+    return quaternion_matrix(quaternion)
+def quaternion_matrix(quaternion):
+    """Return homogeneous rotation matrix from quaternion.
+    >>> R = quaternion_matrix([0.06146124, 0, 0, 0.99810947])
+    >>> numpy.allclose(R, rotation_matrix(0.123, (1, 0, 0)))
+    True
+    """
+    q = numpy.array(quaternion[:4], dtype=numpy.float64, copy=True)
+    nq = numpy.dot(q, q)
+    if nq < _EPS:
+        return numpy.identity(4)
+    q *= math.sqrt(2.0 / nq)
+    q = numpy.outer(q, q)
+    return numpy.array((
+        (1.0-q[1, 1]-q[2, 2],     q[0, 1]-q[2, 3],     q[0, 2]+q[1, 3], 0.0),
+        (q[0, 1]+q[2, 3], 1.0-q[0, 0]-q[2, 2],     q[1, 2]-q[0, 3], 0.0),
+        (q[0, 2]-q[1, 3],     q[1, 2]+q[0, 3], 1.0-q[0, 0]-q[1, 1], 0.0),
+        (0.0,                 0.0,                 0.0, 1.0)
+    ), dtype=numpy.float64)
+def quaternionJPL_matrix(quaternion):
+    """Return homogeneous rotation matrix from quaternion in JPL notation.
+       quaternion = [x y z w]
+    """
+    q0 = quaternion[0]
+    q1 = quaternion[1]
+    q2 = quaternion[2]
+    q3 = quaternion[3]
+    return numpy.array([
+        [q0**2 - q1**2 - q2**2 + q3**2, 2.0*q0*q1 +
+            2.0*q2*q3, 2.0*q0*q2 - 2.0*q1*q3, 0],
+        [2.0*q0*q1 - 2.0*q2*q3, - q0**2 + q1**2 -
+            q2**2 + q3**2, 2.0*q0*q3 + 2.0*q1*q2, 0],
+        [2.0*q0*q2 + 2.0*q1*q3, 2.0*q1*q2 - 2.0*q0 *
+            q3, - q0**2 - q1**2 + q2**2 + q3**2, 0],
+        [0, 0, 0, 1.0]], dtype=numpy.float64)
+def quaternion_from_matrix(matrix):
+    """Return quaternion from rotation matrix.
+    >>> R = rotation_matrix(0.123, (1, 2, 3))
+    >>> q = quaternion_from_matrix(R)
+    >>> numpy.allclose(q, [0.0164262, 0.0328524, 0.0492786, 0.9981095])
+    True
+    """
+    q = numpy.empty((4, ), dtype=numpy.float64)
+    M = numpy.array(matrix, dtype=numpy.float64, copy=False)[:4, :4]
+    t = numpy.trace(M)
+    if t > M[3, 3]:
+        q[3] = t
+        q[2] = M[1, 0] - M[0, 1]
+        q[1] = M[0, 2] - M[2, 0]
+        q[0] = M[2, 1] - M[1, 2]
+    else:
+        i, j, k = 0, 1, 2
+        if M[1, 1] > M[0, 0]:
+            i, j, k = 1, 2, 0
+        if M[2, 2] > M[i, i]:
+            i, j, k = 2, 0, 1
+        t = M[i, i] - (M[j, j] + M[k, k]) + M[3, 3]
+        q[i] = t
+        q[j] = M[i, j] + M[j, i]
+        q[k] = M[k, i] + M[i, k]
+        q[3] = M[k, j] - M[j, k]
+    q *= 0.5 / math.sqrt(t * M[3, 3])
+    return q
+def quaternion_multiply(quaternion1, quaternion0):
+    """Return multiplication of two quaternions.
+    >>> q = quaternion_multiply([1, -2, 3, 4], [-5, 6, 7, 8])
+    >>> numpy.allclose(q, [-44, -14, 48, 28])
+    True
+    """
+    x0, y0, z0, w0 = quaternion0
+    x1, y1, z1, w1 = quaternion1
+    return numpy.array((
+        x1*w0 + y1*z0 - z1*y0 + w1*x0,
+        -x1*z0 + y1*w0 + z1*x0 + w1*y0,
+        x1*y0 - y1*x0 + z1*w0 + w1*z0,
+        -x1*x0 - y1*y0 - z1*z0 + w1*w0), dtype=numpy.float64)
+def quaternion_conjugate(quaternion):
+    """Return conjugate of quaternion.
+    >>> q0 = random_quaternion()
+    >>> q1 = quaternion_conjugate(q0)
+    >>> q1[3] == q0[3] and all(q1[:3] == -q0[:3])
+    True
+    """
+    return numpy.array((-quaternion[0], -quaternion[1],
+                        -quaternion[2], quaternion[3]), dtype=numpy.float64)
+def quaternion_inverse(quaternion):
+    """Return inverse of quaternion.
+    >>> q0 = random_quaternion()
+    >>> q1 = quaternion_inverse(q0)
+    >>> numpy.allclose(quaternion_multiply(q0, q1), [0, 0, 0, 1])
+    True
+    """
+    return quaternion_conjugate(quaternion) / numpy.dot(quaternion, quaternion)
+def quaternion_slerp(quat0, quat1, fraction, spin=0, shortestpath=True):
+    """Return spherical linear interpolation between two quaternions.
+    >>> q0 = random_quaternion()
+    >>> q1 = random_quaternion()
+    >>> q = quaternion_slerp(q0, q1, 0.0)
+    >>> numpy.allclose(q, q0)
+    True
+    >>> q = quaternion_slerp(q0, q1, 1.0, 1)
+    >>> numpy.allclose(q, q1)
+    True
+    >>> q = quaternion_slerp(q0, q1, 0.5)
+    >>> angle = math.acos(numpy.dot(q0, q))
+    >>> numpy.allclose(2.0, math.acos(numpy.dot(q0, q1)) / angle) or \
+        numpy.allclose(2.0, math.acos(-numpy.dot(q0, q1)) / angle)
+    True
+    """
+    q0 = unit_vector(quat0[:4])
+    q1 = unit_vector(quat1[:4])
+    if fraction == 0.0:
+        return q0
+    elif fraction == 1.0:
+        return q1
+    d = numpy.dot(q0, q1)
+    if abs(abs(d) - 1.0) < _EPS:
+        return q0
+    if shortestpath and d < 0.0:
+        # invert rotation
+        d = -d
+        q1 *= -1.0
+    angle = math.acos(d) + spin * math.pi
+    if abs(angle) < _EPS:
+        return q0
+    isin = 1.0 / math.sin(angle)
+    q0 *= math.sin((1.0 - fraction) * angle) * isin
+    q1 *= math.sin(fraction * angle) * isin
+    q0 += q1
+    return q0
+def random_quaternion(rand=None):
+    """Return uniform random unit quaternion.
+    rand: array like or None
+        Three independent random variables that are uniformly distributed
+        between 0 and 1.
+    >>> q = random_quaternion()
+    >>> numpy.allclose(1.0, vector_norm(q))
+    True
+    >>> q = random_quaternion(numpy.random.random(3))
+    >>> q.shape
+    (4,)
+    """
+    if rand is None:
+        rand = numpy.random.rand(3)
+    else:
+        assert len(rand) == 3
+    r1 = numpy.sqrt(1.0 - rand[0])
+    r2 = numpy.sqrt(rand[0])
+    pi2 = math.pi * 2.0
+    t1 = pi2 * rand[1]
+    t2 = pi2 * rand[2]
+    return numpy.array((numpy.sin(t1)*r1,
+                        numpy.cos(t1)*r1,
+                        numpy.sin(t2)*r2,
+                        numpy.cos(t2)*r2), dtype=numpy.float64)
+def random_rotation_matrix(rand=None):
+    """Return uniform random rotation matrix.
+    rnd: array like
+        Three independent random variables that are uniformly distributed
+        between 0 and 1 for each returned quaternion.
+    >>> R = random_rotation_matrix()
+    >>> numpy.allclose(numpy.dot(R.T, R), numpy.identity(4))
+    True
+    """
+    return quaternion_matrix(random_quaternion(rand))
+def random_direction_3d():
+    """ equal-area projection according to:
+        https://math.stackexchange.com/questions/44689/how-to-find-a-random-axis-or-unit-vector-in-3d
+        cfo, 2015/10/16
+    """
+    z = numpy.random.rand() * 2.0 - 1.0
+    t = numpy.random.rand() * 2.0 * numpy.pi
+    r = numpy.sqrt(1.0 - z*z)
+    x = r * numpy.cos(t)
+    y = r * numpy.sin(t)
+    return numpy.array([x, y, z], dtype=numpy.float64)
+class Arcball(object):
+    """Virtual Trackball Control.
+    >>> ball = Arcball()
+    >>> ball = Arcball(initial=numpy.identity(4))
+    >>> ball.place([320, 320], 320)
+    >>> ball.down([500, 250])
+    >>> ball.drag([475, 275])
+    >>> R = ball.matrix()
+    >>> numpy.allclose(numpy.sum(R), 3.90583455)
+    True
+    >>> ball = Arcball(initial=[0, 0, 0, 1])
+    >>> ball.place([320, 320], 320)
+    >>> ball.setaxes([1,1,0], [-1, 1, 0])
+    >>> ball.setconstrain(True)
+    >>> ball.down([400, 200])
+    >>> ball.drag([200, 400])
+    >>> R = ball.matrix()
+    >>> numpy.allclose(numpy.sum(R), 0.2055924)
+    True
+    >>> ball.next()
+    """
+    def __init__(self, initial=None):
+        """Initialize virtual trackball control.
+        initial : quaternion or rotation matrix
+        """
+        self._axis = None
+        self._axes = None
+        self._radius = 1.0
+        self._center = [0.0, 0.0]
+        self._vdown = numpy.array([0, 0, 1], dtype=numpy.float64)
+        self._constrain = False
+        if initial is None:
+            self._qdown = numpy.array([0, 0, 0, 1], dtype=numpy.float64)
+        else:
+            initial = numpy.array(initial, dtype=numpy.float64)
+            if initial.shape == (4, 4):
+                self._qdown = quaternion_from_matrix(initial)
+            elif initial.shape == (4, ):
+                initial /= vector_norm(initial)
+                self._qdown = initial
+            else:
+                raise ValueError("initial not a quaternion or matrix.")
+        self._qnow = self._qpre = self._qdown
+    def place(self, center, radius):
+        """Place Arcball, e.g. when window size changes.
+        center : sequence[2]
+            Window coordinates of trackball center.
+        radius : float
+            Radius of trackball in window coordinates.
+        """
+        self._radius = float(radius)
+        self._center[0] = center[0]
+        self._center[1] = center[1]
+    def setaxes(self, *axes):
+        """Set axes to constrain rotations."""
+        if axes is None:
+            self._axes = None
+        else:
+            self._axes = [unit_vector(axis) for axis in axes]
+    def setconstrain(self, constrain):
+        """Set state of constrain to axis mode."""
+        self._constrain = constrain == True
+    def getconstrain(self):
+        """Return state of constrain to axis mode."""
+        return self._constrain
+    def down(self, point):
+        """Set initial cursor window coordinates and pick constrain-axis."""
+        self._vdown = arcball_map_to_sphere(point, self._center, self._radius)
+        self._qdown = self._qpre = self._qnow
+        if self._constrain and self._axes is not None:
+            self._axis = arcball_nearest_axis(self._vdown, self._axes)
+            self._vdown = arcball_constrain_to_axis(self._vdown, self._axis)
+        else:
+            self._axis = None
+    def drag(self, point):
+        """Update current cursor window coordinates."""
+        vnow = arcball_map_to_sphere(point, self._center, self._radius)
+        if self._axis is not None:
+            vnow = arcball_constrain_to_axis(vnow, self._axis)
+        self._qpre = self._qnow
+        t = numpy.cross(self._vdown, vnow)
+        if numpy.dot(t, t) < _EPS:
+            self._qnow = self._qdown
+        else:
+            q = [t[0], t[1], t[2], numpy.dot(self._vdown, vnow)]
+            self._qnow = quaternion_multiply(q, self._qdown)
+    def next(self, acceleration=0.0):
+        """Continue rotation in direction of last drag."""
+        q = quaternion_slerp(self._qpre, self._qnow, 2.0+acceleration, False)
+        self._qpre, self._qnow = self._qnow, q
+    def matrix(self):
+        """Return homogeneous rotation matrix."""
+        return quaternion_matrix(self._qnow)
+def arcball_map_to_sphere(point, center, radius):
+    """Return unit sphere coordinates from window coordinates."""
+    v = numpy.array(((point[0] - center[0]) / radius,
+                     (center[1] - point[1]) / radius,
+                     0.0), dtype=numpy.float64)
+    n = v[0]*v[0] + v[1]*v[1]
+    if n > 1.0:
+        v /= math.sqrt(n)  # position outside of sphere
+    else:
+        v[2] = math.sqrt(1.0 - n)
+    return v
+def arcball_constrain_to_axis(point, axis):
+    """Return sphere point perpendicular to axis."""
+    v = numpy.array(point, dtype=numpy.float64, copy=True)
+    a = numpy.array(axis, dtype=numpy.float64, copy=True)
+    v -= a * numpy.dot(a, v)  # on plane
+    n = vector_norm(v)
+    if n > _EPS:
+        if v[2] < 0.0:
+            v *= -1.0
+        v /= n
+        return v
+    if a[2] == 1.0:
+        return numpy.array([1, 0, 0], dtype=numpy.float64)
+    return unit_vector([-a[1], a[0], 0])
+def arcball_nearest_axis(point, axes):
+    """Return axis, which arc is nearest to point."""
+    point = numpy.array(point, dtype=numpy.float64, copy=False)
+    nearest = None
+    mx = -1.0
+    for axis in axes:
+        t = numpy.dot(arcball_constrain_to_axis(point, axis), point)
+        if t > mx:
+            nearest = axis
+            mx = t
+    return nearest
+# epsilon for testing whether a number is close to zero
+_EPS = numpy.finfo(float).eps * 4.0
+# axis sequences for Euler angles
+_NEXT_AXIS = [1, 2, 0, 1]
+# map axes strings to/from tuples of inner axis, parity, repetition, frame
+_AXES2TUPLE = {
+    'sxyz': (0, 0, 0, 0), 'sxyx': (0, 0, 1, 0), 'sxzy': (0, 1, 0, 0),
+    'sxzx': (0, 1, 1, 0), 'syzx': (1, 0, 0, 0), 'syzy': (1, 0, 1, 0),
+    'syxz': (1, 1, 0, 0), 'syxy': (1, 1, 1, 0), 'szxy': (2, 0, 0, 0),
+    'szxz': (2, 0, 1, 0), 'szyx': (2, 1, 0, 0), 'szyz': (2, 1, 1, 0),
+    'rzyx': (0, 0, 0, 1), 'rxyx': (0, 0, 1, 1), 'ryzx': (0, 1, 0, 1),
+    'rxzx': (0, 1, 1, 1), 'rxzy': (1, 0, 0, 1), 'ryzy': (1, 0, 1, 1),
+    'rzxy': (1, 1, 0, 1), 'ryxy': (1, 1, 1, 1), 'ryxz': (2, 0, 0, 1),
+    'rzxz': (2, 0, 1, 1), 'rxyz': (2, 1, 0, 1), 'rzyz': (2, 1, 1, 1)}
+_TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items())
+# helper functions
+def vector_norm(data, axis=None, out=None):
+    """Return length, i.e. eucledian norm, of ndarray along axis.
+    >>> v = numpy.random.random(3)
+    >>> n = vector_norm(v)
+    >>> numpy.allclose(n, numpy.linalg.norm(v))
+    True
+    >>> v = numpy.random.rand(6, 5, 3)
+    >>> n = vector_norm(v, axis=-1)
+    >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=2)))
+    True
+    >>> n = vector_norm(v, axis=1)
+    >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1)))
+    True
+    >>> v = numpy.random.rand(5, 4, 3)
+    >>> n = numpy.empty((5, 3), dtype=numpy.float64)
+    >>> vector_norm(v, axis=1, out=n)
+    >>> numpy.allclose(n, numpy.sqrt(numpy.sum(v*v, axis=1)))
+    True
+    >>> vector_norm([])
+    0.0
+    >>> vector_norm([1.0])
+    1.0
+    """
+    data = numpy.array(data, dtype=numpy.float64, copy=True)
+    if out is None:
+        if data.ndim == 1:
+            return math.sqrt(numpy.dot(data, data))
+        data *= data
+        out = numpy.atleast_1d(numpy.sum(data, axis=axis))
+        numpy.sqrt(out, out)
+        return out
+    else:
+        data *= data
+        numpy.sum(data, axis=axis, out=out)
+        numpy.sqrt(out, out)
+def unit_vector(data, axis=None, out=None):
+    """Return ndarray normalized by length, i.e. eucledian norm, along axis.
+    >>> v0 = numpy.random.random(3)
+    >>> v1 = unit_vector(v0)
+    >>> numpy.allclose(v1, v0 / numpy.linalg.norm(v0))
+    True
+    >>> v0 = numpy.random.rand(5, 4, 3)
+    >>> v1 = unit_vector(v0, axis=-1)
+    >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=2)), 2)
+    >>> numpy.allclose(v1, v2)
+    True
+    >>> v1 = unit_vector(v0, axis=1)
+    >>> v2 = v0 / numpy.expand_dims(numpy.sqrt(numpy.sum(v0*v0, axis=1)), 1)
+    >>> numpy.allclose(v1, v2)
+    True
+    >>> v1 = numpy.empty((5, 4, 3), dtype=numpy.float64)
+    >>> unit_vector(v0, axis=1, out=v1)
+    >>> numpy.allclose(v1, v2)
+    True
+    >>> list(unit_vector([]))
+    []
+    >>> list(unit_vector([1.0]))
+    [1.0]
+    """
+    if out is None:
+        data = numpy.array(data, dtype=numpy.float64, copy=True)
+        if data.ndim == 1:
+            data /= math.sqrt(numpy.dot(data, data))
+            return data
+    else:
+        if out is not data:
+            out[:] = numpy.array(data, copy=False)
+        data = out
+    length = numpy.atleast_1d(numpy.sum(data*data, axis))
+    numpy.sqrt(length, length)
+    if axis is not None:
+        length = numpy.expand_dims(length, axis)
+    data /= length
+    if out is None:
+        return data
+def random_vector(size):
+    """Return array of random doubles in the half-open interval [0.0, 1.0).
+    >>> v = random_vector(10000)
+    >>> numpy.all(v >= 0.0) and numpy.all(v < 1.0)
+    True
+    >>> v0 = random_vector(10)
+    >>> v1 = random_vector(10)
+    >>> numpy.any(v0 == v1)
+    False
+    """
+    return numpy.random.random(size)
+def inverse_matrix(matrix):
+    """Return inverse of square transformation matrix.
+    >>> M0 = random_rotation_matrix()
+    >>> M1 = inverse_matrix(M0.T)
+    >>> numpy.allclose(M1, numpy.linalg.inv(M0.T))
+    True
+    >>> for size in range(1, 7):
+    ...     M0 = numpy.random.rand(size, size)
+    ...     M1 = inverse_matrix(M0)
+    ...     if not numpy.allclose(M1, numpy.linalg.inv(M0)): print size
+    """
+    return numpy.linalg.inv(matrix)
+def concatenate_matrices(*matrices):
+    """Return concatenation of series of transformation matrices.
+    >>> M = numpy.random.rand(16).reshape((4, 4)) - 0.5
+    >>> numpy.allclose(M, concatenate_matrices(M))
+    True
+    >>> numpy.allclose(numpy.dot(M, M.T), concatenate_matrices(M, M.T))
+    True
+    """
+    M = numpy.identity(4)
+    for i in matrices:
+        M = numpy.dot(M, i)
+    return M
+def is_same_transform(matrix0, matrix1):
+    """Return True if two matrices perform same transformation.
+    >>> is_same_transform(numpy.identity(4), numpy.identity(4))
+    True
+    >>> is_same_transform(numpy.identity(4), random_rotation_matrix())
+    False
+    """
+    matrix0 = numpy.array(matrix0, dtype=numpy.float64, copy=True)
+    matrix0 /= matrix0[3, 3]
+    matrix1 = numpy.array(matrix1, dtype=numpy.float64, copy=True)
+    matrix1 /= matrix1[3, 3]
+    return numpy.allclose(matrix0, matrix1)
+def _import_module(module_name, warn=True, prefix='_py_', ignore='_'):
+    """Try import all public attributes from module into global namespace.
+    Existing attributes with name clashes are renamed with prefix.
+    Attributes starting with underscore are ignored by default.
+    Return True on successful import.
+    """
+    try:
+        module = __import__(module_name)
+    except ImportError:
+        if warn:
+            warnings.warn("Failed to import module " + module_name)
+    else:
+        for attr in dir(module):
+            if ignore and attr.startswith(ignore):
+                continue
+            if prefix:
+                if attr in globals():
+                    globals()[prefix + attr] = globals()[attr]
+                elif warn:
+                    warnings.warn("No Python implementation of " + attr)
+            globals()[attr] = getattr(module, attr)
+        return True

utils/utils_poses/align_traj.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import numpy as np
+import torch
+from utils.utils_poses.ATE.align_utils import alignTrajectory
+from utils.utils_poses.lie_group_helper import SO3_to_quat, convert3x4_4x4
+def pts_dist_max(pts):
+    """
+    :param pts:  (N, 3) torch or np
+    :return:     scalar
+    """
+    if torch.is_tensor(pts):
+        dist = pts.unsqueeze(0) - pts.unsqueeze(1)  # (1, N, 3) - (N, 1, 3) -> (N, N, 3)
+        dist = dist[0]  # (N, 3)
+        dist = dist.norm(dim=1)  # (N, )
+        max_dist = dist.max()
+    else:
+        dist = pts[None, :, :] - pts[:, None, :]  # (1, N, 3) - (N, 1, 3) -> (N, N, 3)
+        dist = dist[0]  # (N, 3)
+        dist = np.linalg.norm(dist, axis=1)  # (N, )
+        max_dist = dist.max()
+    return max_dist
+def align_ate_c2b_use_a2b(traj_a, traj_b, traj_c=None, method='sim3'):
+    """Align c to b using the sim3 from a to b.
+    :param traj_a:  (N0, 3/4, 4) torch tensor
+    :param traj_b:  (N0, 3/4, 4) torch tensor
+    :param traj_c:  None or (N1, 3/4, 4) torch tensor
+    :return:        (N1, 4,   4) torch tensor
+    """
+    device = traj_a.device
+    if traj_c is None:
+        traj_c = traj_a.clone()
+    traj_a = traj_a.float().cpu().numpy()
+    traj_b = traj_b.float().cpu().numpy()
+    traj_c = traj_c.float().cpu().numpy()
+    R_a = traj_a[:, :3, :3]  # (N0, 3, 3)
+    t_a = traj_a[:, :3, 3]  # (N0, 3)
+    quat_a = SO3_to_quat(R_a)  # (N0, 4)
+    R_b = traj_b[:, :3, :3]  # (N0, 3, 3)
+    t_b = traj_b[:, :3, 3]  # (N0, 3)
+    quat_b = SO3_to_quat(R_b)  # (N0, 4)
+    # This function works in quaternion.
+    # scalar, (3, 3), (3, ) gt = R * s * est + t.
+    s, R, t = alignTrajectory(t_a, t_b, quat_a, quat_b, method=method)
+    # reshape tensors
+    R = R[None, :, :].astype(np.float32)  # (1, 3, 3)
+    t = t[None, :, None].astype(np.float32)  # (1, 3, 1)
+    s = float(s)
+    R_c = traj_c[:, :3, :3]  # (N1, 3, 3)
+    t_c = traj_c[:, :3, 3:4]  # (N1, 3, 1)
+    R_c_aligned = R @ R_c  # (N1, 3, 3)
+    t_c_aligned = s * (R @ t_c) + t  # (N1, 3, 1)
+    traj_c_aligned = np.concatenate([R_c_aligned, t_c_aligned], axis=2)  # (N1, 3, 4)
+    # append the last row
+    traj_c_aligned = convert3x4_4x4(traj_c_aligned)  # (N1, 4, 4)
+    traj_c_aligned = torch.from_numpy(traj_c_aligned).to(device)
+    return traj_c_aligned  # (N1, 4, 4)
+def align_scale_c2b_use_a2b(traj_a, traj_b, traj_c=None):
+    '''Scale c to b using the scale from a to b.
+    :param traj_a:      (N0, 3/4, 4) torch tensor
+    :param traj_b:      (N0, 3/4, 4) torch tensor
+    :param traj_c:      None or (N1, 3/4, 4) torch tensor
+    :return:
+        scaled_traj_c   (N1, 4, 4)   torch tensor
+        scale           scalar
+    '''
+    if traj_c is None:
+        traj_c = traj_a.clone()
+    t_a = traj_a[:, :3, 3]  # (N, 3)
+    t_b = traj_b[:, :3, 3]  # (N, 3)
+    # scale estimated poses to colmap scale
+    # s_a2b: a*s ~ b
+    scale_a2b = pts_dist_max(t_b) / pts_dist_max(t_a)
+    traj_c[:, :3, 3] *= scale_a2b
+    if traj_c.shape[1] == 3:
+        traj_c = convert3x4_4x4(traj_c)  # (N, 4, 4)
+    return traj_c, scale_a2b  # (N, 4, 4)

utils/utils_poses/comp_ate.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import numpy as np
+import utils.utils_poses.ATE.trajectory_utils as tu
+import utils.utils_poses.ATE.transformations as tf
+def rotation_error(pose_error):
+    """Compute rotation error
+    Args:
+        pose_error (4x4 array): relative pose error
+    Returns:
+        rot_error (float): rotation error
+    """
+    a = pose_error[0, 0]
+    b = pose_error[1, 1]
+    c = pose_error[2, 2]
+    d = 0.5*(a+b+c-1.0)
+    rot_error = np.arccos(max(min(d, 1.0), -1.0))
+    return rot_error
+def translation_error(pose_error):
+    """Compute translation error
+    Args:
+        pose_error (4x4 array): relative pose error
+    Returns:
+        trans_error (float): translation error
+    """
+    dx = pose_error[0, 3]
+    dy = pose_error[1, 3]
+    dz = pose_error[2, 3]
+    trans_error = np.sqrt(dx**2+dy**2+dz**2)
+    return trans_error
+def compute_rpe(gt, pred):
+    trans_errors = []
+    rot_errors = []
+    for i in range(len(gt)-1):
+        gt1 = gt[i]
+        gt2 = gt[i+1]
+        gt_rel = np.linalg.inv(gt1) @ gt2
+        pred1 = pred[i]
+        pred2 = pred[i+1]
+        pred_rel = np.linalg.inv(pred1) @ pred2
+        rel_err = np.linalg.inv(gt_rel) @ pred_rel
+        trans_errors.append(translation_error(rel_err))
+        rot_errors.append(rotation_error(rel_err))
+    rpe_trans = np.mean(np.asarray(trans_errors))
+    rpe_rot = np.mean(np.asarray(rot_errors))
+    return rpe_trans, rpe_rot
+def compute_ATE(gt, pred):
+    """Compute RMSE of ATE
+    Args:
+        gt: ground-truth poses
+        pred: predicted poses
+    """
+    errors = []
+    for i in range(len(pred)):
+        # cur_gt = np.linalg.inv(gt_0) @ gt[i]
+        cur_gt = gt[i]
+        gt_xyz = cur_gt[:3, 3]
+        # cur_pred = np.linalg.inv(pred_0) @ pred[i]
+        cur_pred = pred[i]
+        pred_xyz = cur_pred[:3, 3]
+        align_err = gt_xyz - pred_xyz
+        errors.append(np.sqrt(np.sum(align_err ** 2)))
+    ate = np.sqrt(np.mean(np.asarray(errors) ** 2))
+    return ate

utils/utils_poses/lie_group_helper.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import numpy as np
+import torch
+from scipy.spatial.transform import Rotation as RotLib
+def SO3_to_quat(R):
+    """
+    :param R:  (N, 3, 3) or (3, 3) np
+    :return:   (N, 4, ) or (4, ) np
+    """
+    x = RotLib.from_matrix(R)
+    quat = x.as_quat()
+    return quat
+def quat_to_SO3(quat):
+    """
+    :param quat:    (N, 4, ) or (4, ) np
+    :return:        (N, 3, 3) or (3, 3) np
+    """
+    x = RotLib.from_quat(quat)
+    R = x.as_matrix()
+    return R
+def convert3x4_4x4(input):
+    """
+    :param input:  (N, 3, 4) or (3, 4) torch or np
+    :return:       (N, 4, 4) or (4, 4) torch or np
+    """
+    if torch.is_tensor(input):
+        if len(input.shape) == 3:
+            output = torch.cat([input, torch.zeros_like(input[:, 0:1])], dim=1)  # (N, 4, 4)
+            output[:, 3, 3] = 1.0
+        else:
+            output = torch.cat([input, torch.tensor([[0,0,0,1]], dtype=input.dtype, device=input.device)], dim=0)  # (4, 4)
+    else:
+        if len(input.shape) == 3:
+            output = np.concatenate([input, np.zeros_like(input[:, 0:1])], axis=1)  # (N, 4, 4)
+            output[:, 3, 3] = 1.0
+        else:
+            output = np.concatenate([input, np.array([[0,0,0,1]], dtype=input.dtype)], axis=0)  # (4, 4)
+            output[3, 3] = 1.0
+    return output
+def vec2skew(v):
+    """
+    :param v:  (3, ) torch tensor
+    :return:   (3, 3)
+    """
+    zero = torch.zeros(1, dtype=torch.float32, device=v.device)
+    skew_v0 = torch.cat([ zero,    -v[2:3],   v[1:2]])  # (3, 1)
+    skew_v1 = torch.cat([ v[2:3],   zero,    -v[0:1]])
+    skew_v2 = torch.cat([-v[1:2],   v[0:1],   zero])
+    skew_v = torch.stack([skew_v0, skew_v1, skew_v2], dim=0)  # (3, 3)
+    return skew_v  # (3, 3)
+def Exp(r):
+    """so(3) vector to SO(3) matrix
+    :param r: (3, ) axis-angle, torch tensor
+    :return:  (3, 3)
+    """
+    skew_r = vec2skew(r)  # (3, 3)
+    norm_r = r.norm() + 1e-15
+    eye = torch.eye(3, dtype=torch.float32, device=r.device)
+    R = eye + (torch.sin(norm_r) / norm_r) * skew_r + ((1 - torch.cos(norm_r)) / norm_r**2) * (skew_r @ skew_r)
+    return R
+def make_c2w(r, t):
+    """
+    :param r:  (3, ) axis-angle             torch tensor
+    :param t:  (3, ) translation vector     torch tensor
+    :return:   (4, 4)
+    """
+    R = Exp(r)  # (3, 3)
+    c2w = torch.cat([R, t.unsqueeze(1)], dim=1)  # (3, 4)
+    c2w = convert3x4_4x4(c2w)  # (4, 4)
+    return c2w

utils/utils_poses/relative_pose.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import torch
+import numpy as np
+def compute_relative_world_to_camera(R1, t1, R2, t2):
+    zero_row = torch.tensor([[0, 0, 0, 1]], dtype=torch.float32, device="cuda") #, requires_grad=True
+    E1_inv = torch.cat([torch.transpose(R1, 0, 1), -torch.transpose(R1, 0, 1) @ t1.reshape(-1, 1)], dim=1)
+    E1_inv = torch.cat([E1_inv, zero_row], dim=0)
+    E2 = torch.cat([R2, -R2 @ t2.reshape(-1, 1)], dim=1)
+    E2 = torch.cat([E2, zero_row], dim=0)
+    # Compute relative transformation
+    E_rel = E2 @ E1_inv
+    # # Extract rotation and translation
+    # R_rel = E_rel[:3, :3]
+    # t_rel = E_rel[:3, 3]
+    # E_rel = torch.cat([E_rel, zero_row], dim=0)
+    return E_rel

utils/utils_poses/vis_cam_traj.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# This file is modified from NeRF++: https://github.com/Kai-46/nerfplusplus
+import numpy as np
+try:
+    import open3d as o3d
+except ImportError:
+    pass
+def frustums2lineset(frustums):
+    N = len(frustums)
+    merged_points = np.zeros((N*5, 3))      # 5 vertices per frustum
+    merged_lines = np.zeros((N*8, 2))       # 8 lines per frustum
+    merged_colors = np.zeros((N*8, 3))      # each line gets a color
+    for i, (frustum_points, frustum_lines, frustum_colors) in enumerate(frustums):
+        merged_points[i*5:(i+1)*5, :] = frustum_points
+        merged_lines[i*8:(i+1)*8, :] = frustum_lines + i*5
+        merged_colors[i*8:(i+1)*8, :] = frustum_colors
+    lineset = o3d.geometry.LineSet()
+    lineset.points = o3d.utility.Vector3dVector(merged_points)
+    lineset.lines = o3d.utility.Vector2iVector(merged_lines)
+    lineset.colors = o3d.utility.Vector3dVector(merged_colors)
+    return lineset
+def get_camera_frustum_opengl_coord(H, W, fx, fy, W2C, frustum_length=0.5, color=np.array([0., 1., 0.])):
+    '''X right, Y up, Z backward to the observer.
+    :param H, W:
+    :param fx, fy:
+    :param W2C:             (4, 4)  matrix
+    :param frustum_length:  scalar: scale the frustum
+    :param color:           (3,)    list, frustum line color
+    :return:
+        frustum_points:     (5, 3)  frustum points in world coordinate
+        frustum_lines:      (8, 2)  8 lines connect 5 frustum points, specified in line start/end index.
+        frustum_colors:     (8, 3)  colors for 8 lines.
+    '''
+    hfov = np.rad2deg(np.arctan(W / 2. / fx) * 2.)
+    vfov = np.rad2deg(np.arctan(H / 2. / fy) * 2.)
+    half_w = frustum_length * np.tan(np.deg2rad(hfov / 2.))
+    half_h = frustum_length * np.tan(np.deg2rad(vfov / 2.))
+    # build view frustum in camera space in homogenous coordinate (5, 4)
+    frustum_points = np.array([[0., 0., 0., 1.0],                          # frustum origin
+                               [-half_w, half_h,  -frustum_length, 1.0],   # top-left image corner
+                               [half_w, half_h,   -frustum_length, 1.0],   # top-right image corner
+                               [half_w, -half_h,  -frustum_length, 1.0],   # bottom-right image corner
+                               [-half_w, -half_h, -frustum_length, 1.0]])  # bottom-left image corner
+    frustum_lines = np.array([[0, i] for i in range(1, 5)] + [[i, (i+1)] for i in range(1, 4)] + [[4, 1]])  # (8, 2)
+    frustum_colors = np.tile(color.reshape((1, 3)), (frustum_lines.shape[0], 1))  # (8, 3)
+    # transform view frustum from camera space to world space
+    C2W = np.linalg.inv(W2C)
+    frustum_points = np.matmul(C2W, frustum_points.T).T  # (5, 4)
+    frustum_points = frustum_points[:, :3] / frustum_points[:, 3:4]  # (5, 3)  remove homogenous coordinate
+    return frustum_points, frustum_lines, frustum_colors
+def get_camera_frustum_opencv_coord(H, W, fx, fy, W2C, frustum_length=0.5, color=np.array([0., 1., 0.])):
+    '''X right, Y up, Z backward to the observer.
+    :param H, W:
+    :param fx, fy:
+    :param W2C:             (4, 4)  matrix
+    :param frustum_length:  scalar: scale the frustum
+    :param color:           (3,)    list, frustum line color
+    :return:
+        frustum_points:     (5, 3)  frustum points in world coordinate
+        frustum_lines:      (8, 2)  8 lines connect 5 frustum points, specified in line start/end index.
+        frustum_colors:     (8, 3)  colors for 8 lines.
+    '''
+    hfov = np.rad2deg(np.arctan(W / 2. / fx) * 2.)
+    vfov = np.rad2deg(np.arctan(H / 2. / fy) * 2.)
+    half_w = frustum_length * np.tan(np.deg2rad(hfov / 2.))
+    half_h = frustum_length * np.tan(np.deg2rad(vfov / 2.))
+    # build view frustum in camera space in homogenous coordinate (5, 4)
+    frustum_points = np.array([[0., 0., 0., 1.0],                          # frustum origin
+                               [-half_w, -half_h, frustum_length, 1.0],   # top-left image corner
+                               [ half_w, -half_h, frustum_length, 1.0],   # top-right image corner
+                               [ half_w,  half_h, frustum_length, 1.0],   # bottom-right image corner
+                               [-half_w, +half_h, frustum_length, 1.0]])  # bottom-left image corner
+    frustum_lines = np.array([[0, i] for i in range(1, 5)] + [[i, (i+1)] for i in range(1, 4)] + [[4, 1]])  # (8, 2)
+    frustum_colors = np.tile(color.reshape((1, 3)), (frustum_lines.shape[0], 1))  # (8, 3)
+    # transform view frustum from camera space to world space
+    C2W = np.linalg.inv(W2C)
+    frustum_points = np.matmul(C2W, frustum_points.T).T  # (5, 4)
+    frustum_points = frustum_points[:, :3] / frustum_points[:, 3:4]  # (5, 3)  remove homogenous coordinate
+    return frustum_points, frustum_lines, frustum_colors
+def draw_camera_frustum_geometry(c2ws, H, W, fx=600.0, fy=600.0, frustum_length=0.5,
+                                 color=np.array([29.0, 53.0, 87.0])/255.0, draw_now=False, coord='opengl'):
+    '''
+    :param c2ws:            (N, 4, 4)  np.array
+    :param H:               scalar
+    :param W:               scalar
+    :param fx:              scalar
+    :param fy:              scalar
+    :param frustum_length:  scalar
+    :param color:           None or (N, 3) or (3, ) or (1, 3) or (3, 1) np array
+    :param draw_now:        True/False call o3d vis now
+    :return:
+    '''
+    N = c2ws.shape[0]
+    num_ele = color.flatten().shape[0]
+    if num_ele == 3:
+        color = color.reshape(1, 3)
+        color = np.tile(color, (N, 1))
+    frustum_list = []
+    if coord == 'opengl':
+        for i in range(N):
+            frustum_list.append(get_camera_frustum_opengl_coord(H, W, fx, fy,
+                                                                W2C=np.linalg.inv(c2ws[i]),
+                                                                frustum_length=frustum_length,
+                                                                color=color[i]))
+    elif coord == 'opencv':
+        for i in range(N):
+            frustum_list.append(get_camera_frustum_opencv_coord(H, W, fx, fy,
+                                                                W2C=np.linalg.inv(c2ws[i]),
+                                                                frustum_length=frustum_length,
+                                                                color=color[i]))
+    else:
+        print('Undefined coordinate system. Exit')
+        exit()
+    frustums_geometry = frustums2lineset(frustum_list)
+    if draw_now:
+        o3d.visualization.draw_geometries([frustums_geometry])
+    return frustums_geometry  # this is an o3d geometry object.

utils/utils_poses/vis_pose_utils.py ADDED Viewed

	@@ -0,0 +1,270 @@

+import os
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot as plt
+plt.ioff()
+import copy
+from evo.core.trajectory import PosePath3D, PoseTrajectory3D
+from evo.main_ape import ape
+from evo.tools import plot
+from evo.core import sync
+from evo.tools import file_interface
+from evo.core import metrics
+import evo
+import torch
+import numpy as np
+from scipy.spatial.transform import Slerp
+from scipy.spatial.transform import Rotation as R
+import scipy.interpolate as si
+def interp_poses(c2ws, N_views):
+    N_inputs = c2ws.shape[0]
+    trans = c2ws[:, :3, 3:].permute(2, 1, 0)
+    rots = c2ws[:, :3, :3]
+    render_poses = []
+    rots = R.from_matrix(rots)
+    slerp = Slerp(np.linspace(0, 1, N_inputs), rots)
+    interp_rots = torch.tensor(
+        slerp(np.linspace(0, 1, N_views)).as_matrix().astype(np.float32))
+    interp_trans = torch.nn.functional.interpolate(
+        trans, size=N_views, mode='linear').permute(2, 1, 0)
+    render_poses = torch.cat([interp_rots, interp_trans], dim=2)
+    render_poses = convert3x4_4x4(render_poses)
+    return render_poses
+def interp_poses_bspline(c2ws, N_novel_imgs, input_times, degree):
+    target_trans = torch.tensor(scipy_bspline(
+        c2ws[:, :3, 3], n=N_novel_imgs, degree=degree, periodic=False).astype(np.float32)).unsqueeze(2)
+    rots = R.from_matrix(c2ws[:, :3, :3])
+    slerp = Slerp(input_times, rots)
+    target_times = np.linspace(input_times[0], input_times[-1], N_novel_imgs)
+    target_rots = torch.tensor(
+        slerp(target_times).as_matrix().astype(np.float32))
+    target_poses = torch.cat([target_rots, target_trans], dim=2)
+    target_poses = convert3x4_4x4(target_poses)
+    return target_poses
+def poses_avg(poses):
+    hwf = poses[0, :3, -1:]
+    center = poses[:, :3, 3].mean(0)
+    vec2 = normalize(poses[:, :3, 2].sum(0))
+    up = poses[:, :3, 1].sum(0)
+    c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1)
+    return c2w
+def normalize(v):
+    """Normalize a vector."""
+    return v / np.linalg.norm(v)
+def viewmatrix(z, up, pos):
+    vec2 = normalize(z)
+    vec1_avg = up
+    vec0 = normalize(np.cross(vec1_avg, vec2))
+    vec1 = normalize(np.cross(vec2, vec0))
+    m = np.stack([vec0, vec1, vec2, pos], 1)
+    return m
+def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N):
+    render_poses = []
+    rads = np.array(list(rads) + [1.])
+    hwf = c2w[:, 4:5]
+    for theta in np.linspace(0., 2. * np.pi * rots, N+1)[:-1]:
+        # c = np.dot(c2w[:3,:4], np.array([0.7*np.cos(theta) , -0.3*np.sin(theta) , -np.sin(theta*zrate) *0.1, 1.]) * rads)
+        # c = np.dot(c2w[:3,:4], np.array([0.3*np.cos(theta) , -0.3*np.sin(theta) , -np.sin(theta*zrate) *0.01, 1.]) * rads)
+        c = np.dot(c2w[:3, :4], np.array(
+            [0.2*np.cos(theta), -0.2*np.sin(theta), -np.sin(theta*zrate) * 0.1, 1.]) * rads)
+        z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.])))
+        render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1))
+    return render_poses
+def scipy_bspline(cv, n=100, degree=3, periodic=False):
+    """ Calculate n samples on a bspline
+        cv :      Array ov control vertices
+        n  :      Number of samples to return
+        degree:   Curve degree
+        periodic: True - Curve is closed
+    """
+    cv = np.asarray(cv)
+    count = cv.shape[0]
+    # Closed curve
+    if periodic:
+        kv = np.arange(-degree, count+degree+1)
+        factor, fraction = divmod(count+degree+1, count)
+        cv = np.roll(np.concatenate(
+            (cv,) * factor + (cv[:fraction],)), -1, axis=0)
+        degree = np.clip(degree, 1, degree)
+    # Opened curve
+    else:
+        degree = np.clip(degree, 1, count-1)
+        kv = np.clip(np.arange(count+degree+1)-degree, 0, count-degree)
+    # Return samples
+    max_param = count - (degree * (1-periodic))
+    spl = si.BSpline(kv, cv, degree)
+    return spl(np.linspace(0, max_param, n))
+def generate_spiral_nerf(learned_poses, bds, N_novel_views, hwf):
+    learned_poses_ = np.concatenate((learned_poses[:, :3, :4].detach(
+    ).cpu().numpy(), hwf[:len(learned_poses)]), axis=-1)
+    c2w = poses_avg(learned_poses_)
+    print('recentered', c2w.shape)
+    # Get spiral
+    # Get average pose
+    up = normalize(learned_poses_[:, :3, 1].sum(0))
+    # Find a reasonable "focus depth" for this dataset
+    close_depth, inf_depth = bds.min()*.9, bds.max()*5.
+    dt = .75
+    mean_dz = 1./(((1.-dt)/close_depth + dt/inf_depth))
+    focal = mean_dz
+    # Get radii for spiral path
+    shrink_factor = .8
+    zdelta = close_depth * .2
+    tt = learned_poses_[:, :3, 3]  # ptstocam(poses[:3,3,:].T, c2w).T
+    rads = np.percentile(np.abs(tt), 90, 0)
+    c2w_path = c2w
+    N_rots = 2
+    c2ws = render_path_spiral(
+        c2w_path, up, rads, focal, zdelta, zrate=.5, rots=N_rots, N=N_novel_views)
+    c2ws = torch.tensor(np.stack(c2ws).astype(np.float32))
+    c2ws = c2ws[:, :3, :4]
+    c2ws = convert3x4_4x4(c2ws)
+    return c2ws
+def convert3x4_4x4(input):
+    """
+    :param input:  (N, 3, 4) or (3, 4) torch or np
+    :return:       (N, 4, 4) or (4, 4) torch or np
+    """
+    if torch.is_tensor(input):
+        if len(input.shape) == 3:
+            output = torch.cat([input, torch.zeros_like(
+                input[:, 0:1])], dim=1)  # (N, 4, 4)
+            output[:, 3, 3] = 1.0
+        else:
+            output = torch.cat([input, torch.tensor(
+                [[0, 0, 0, 1]], dtype=input.dtype, device=input.device)], dim=0)  # (4, 4)
+    else:
+        if len(input.shape) == 3:
+            output = np.concatenate(
+                [input, np.zeros_like(input[:, 0:1])], axis=1)  # (N, 4, 4)
+            output[:, 3, 3] = 1.0
+        else:
+            output = np.concatenate(
+                [input, np.array([[0, 0, 0, 1]], dtype=input.dtype)], axis=0)  # (4, 4)
+            output[3, 3] = 1.0
+    return output
+plt.rc('legend', fontsize=20)  # using a named size
+def plot_pose(ref_poses, est_poses, output_path, args, vid=False):
+    ref_poses = [pose for pose in ref_poses]
+    if isinstance(est_poses, dict):
+        est_poses = [pose for k, pose in est_poses.items()]
+    else:
+        est_poses = [pose for pose in est_poses]
+    traj_ref = PosePath3D(poses_se3=ref_poses)
+    traj_est = PosePath3D(poses_se3=est_poses)
+    traj_est_aligned = copy.deepcopy(traj_est)
+    traj_est_aligned.align(traj_ref, correct_scale=True,
+                           correct_only_scale=False)
+    if vid:
+        for p_idx in range(len(ref_poses)):
+            fig = plt.figure()
+            current_est_aligned = traj_est_aligned.poses_se3[:p_idx+1]
+            current_ref = traj_ref.poses_se3[:p_idx+1]
+            current_est_aligned = PosePath3D(poses_se3=current_est_aligned)
+            current_ref = PosePath3D(poses_se3=current_ref)
+            traj_by_label = {
+                # "estimate (not aligned)": traj_est,
+                "Ours (aligned)": current_est_aligned,
+                "Ground-truth": current_ref
+            }
+            plot_mode = plot.PlotMode.xyz
+            # ax = plot.prepare_axis(fig, plot_mode, 111)
+            ax = fig.add_subplot(111, projection="3d")
+            ax.xaxis.set_tick_params(labelbottom=False)
+            ax.yaxis.set_tick_params(labelleft=False)
+            ax.zaxis.set_tick_params(labelleft=False)
+            colors = ['r', 'b']
+            styles = ['-', '--']
+            for idx, (label, traj) in enumerate(traj_by_label.items()):
+                plot.traj(ax, plot_mode, traj,
+                          styles[idx], colors[idx], label)
+                # break
+            # plot.trajectories(fig, traj_by_label, plot.PlotMode.xyz)
+            ax.view_init(elev=10., azim=45)
+            plt.tight_layout()
+            os.makedirs(os.path.join(os.path.dirname(
+                output_path), 'pose_vid'), exist_ok=True)
+            pose_vis_path = os.path.join(os.path.dirname(
+                output_path), 'pose_vid', 'pose_vis_{:03d}.png'.format(p_idx))
+            print(pose_vis_path)
+            fig.savefig(pose_vis_path)
+    # else:
+    fig = plt.figure()
+    fig.patch.set_facecolor('white')                    # 把背景设置为纯白色
+    traj_by_label = {
+        # "estimate (not aligned)": traj_est,
+        "Ours (aligned)": traj_est_aligned,
+        # "NoPe-NeRF (aligned)": traj_est_aligned,
+        # "CF-3DGS (aligned)": traj_est_aligned,
+        # "NeRFmm (aligned)": traj_est_aligned,
+        # args.method + " (aligned)": traj_est_aligned,
+        "COLMAP (GT)": traj_ref
+        # "Ground-truth": traj_ref
+    }
+    plot_mode = plot.PlotMode.xyz
+    # ax = plot.prepare_axis(fig, plot_mode, 111)
+    ax = fig.add_subplot(111, projection="3d")
+    ax.set_facecolor('white')                           # 把子图设置为纯白色
+    ax.xaxis.set_tick_params(labelbottom=True)
+    ax.yaxis.set_tick_params(labelleft=True)
+    ax.zaxis.set_tick_params(labelleft=True)
+    colors = ['#2c9e38', '#d12920']     #
+    # colors = ['#2c9e38', '#a72126']     #
+    # colors = ['r', 'b']
+    styles = ['-', '--']
+    for idx, (label, traj) in enumerate(traj_by_label.items()):
+        plot.traj(ax, plot_mode, traj,
+                  styles[idx], colors[idx], label)
+        # break
+    # plot.trajectories(fig, traj_by_label, plot.PlotMode.xyz)
+    ax.view_init(elev=30., azim=45)
+    # ax.view_init(elev=10., azim=45)
+    plt.tight_layout()
+    pose_vis_path = output_path / f'pose_vis.png'
+    # pose_vis_path = os.path.join(os.path.dirname(output_path), f'pose_vis_{args.method}_{args.scene}.png')
+    fig.savefig(pose_vis_path)
+    # path_parts = args.pose_path.split('/')
+    # tmp_vis_path = '/'.join(path_parts[:-1]) + '/all_vis'
+    # tmp_vis_path2 = os.path.join(tmp_vis_path, f'pose_vis_{args.method}_{args.scene}.png')
+    # fig.savefig(tmp_vis_path2)