#!/usr/bin/env python3
# Copyright (C) 2024-present Naver Corporation. All rights reserved.
# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
#
# --------------------------------------------------------
# Preprocessing code for the MegaDepth dataset
# dataset at https://www.cs.cornell.edu/projects/megadepth/
# --------------------------------------------------------
import collections
import os
import os.path as osp

import numpy as np
from tqdm import tqdm

os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
import cv2
import h5py
import path_to_root  # noqa
from dust3r.datasets.utils import cropping  # noqa
from dust3r.utils.parallel import parallel_threads


def get_parser():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument("--megadepth_dir", required=True)
    parser.add_argument("--precomputed_pairs", required=True)
    parser.add_argument("--output_dir", default="data/megadepth_processed")
    return parser


def main(db_root, pairs_path, output_dir):
    os.makedirs(output_dir, exist_ok=True)

    # load all pairs
    data = np.load(pairs_path, allow_pickle=True)
    scenes = data["scenes"]
    images = data["images"]
    pairs = data["pairs"]

    # enumerate all unique images
    todo = collections.defaultdict(set)
    for scene, im1, im2, score in pairs:
        todo[scene].add(im1)
        todo[scene].add(im2)

    # for each scene, load intrinsics and then parallel crops
    for scene, im_idxs in tqdm(todo.items(), desc="Overall"):
        scene, subscene = scenes[scene].split()
        out_dir = osp.join(output_dir, scene, subscene)
        os.makedirs(out_dir, exist_ok=True)

        # load all camera params
        _, pose_w2cam, intrinsics = _load_kpts_and_poses(
            db_root, scene, subscene, intrinsics=True
        )

        in_dir = osp.join(db_root, scene, "dense" + subscene)
        args = [
            (in_dir, img, intrinsics[img], pose_w2cam[img], out_dir)
            for img in [images[im_id] for im_id in im_idxs]
        ]
        parallel_threads(
            resize_one_image,
            args,
            star_args=True,
            front_num=0,
            leave=False,
            desc=f"{scene}/{subscene}",
        )

    # save pairs
    print("Done! prepared all pairs in", output_dir)


def resize_one_image(root, tag, K_pre_rectif, pose_w2cam, out_dir):
    if osp.isfile(osp.join(out_dir, tag + ".npz")):
        return

    # load image
    img = cv2.cvtColor(
        cv2.imread(osp.join(root, "imgs", tag), cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB
    )
    H, W = img.shape[:2]

    # load depth
    with h5py.File(osp.join(root, "depths", osp.splitext(tag)[0] + ".h5"), "r") as hd5:
        depthmap = np.asarray(hd5["depth"])

    # rectify = undistort the intrinsics
    imsize_pre, K_pre, distortion = K_pre_rectif
    imsize_post = img.shape[1::-1]
    K_post = cv2.getOptimalNewCameraMatrix(
        K_pre,
        distortion,
        imsize_pre,
        alpha=0,
        newImgSize=imsize_post,
        centerPrincipalPoint=True,
    )[0]

    # downscale
    img_out, depthmap_out, intrinsics_out, R_in2out = _downscale_image(
        K_post, img, depthmap, resolution_out=(800, 600)
    )

    # write everything
    img_out.save(osp.join(out_dir, tag + ".jpg"), quality=90)
    cv2.imwrite(osp.join(out_dir, tag + ".exr"), depthmap_out)

    camout2world = np.linalg.inv(pose_w2cam)
    camout2world[:3, :3] = camout2world[:3, :3] @ R_in2out.T
    np.savez(
        osp.join(out_dir, tag + ".npz"),
        intrinsics=intrinsics_out,
        cam2world=camout2world,
    )


def _downscale_image(camera_intrinsics, image, depthmap, resolution_out=(512, 384)):
    H, W = image.shape[:2]
    resolution_out = sorted(resolution_out)[:: +1 if W < H else -1]

    image, depthmap, intrinsics_out = cropping.rescale_image_depthmap(
        image, depthmap, camera_intrinsics, resolution_out, force=False
    )
    R_in2out = np.eye(3)

    return image, depthmap, intrinsics_out, R_in2out


def _load_kpts_and_poses(root, scene_id, subscene, z_only=False, intrinsics=False):
    if intrinsics:
        with open(
            os.path.join(
                root, scene_id, "sparse", "manhattan", subscene, "cameras.txt"
            ),
            "r",
        ) as f:
            raw = f.readlines()[3:]  # skip the header

        camera_intrinsics = {}
        for camera in raw:
            camera = camera.split(" ")
            width, height, focal, cx, cy, k0 = [float(elem) for elem in camera[2:]]
            K = np.eye(3)
            K[0, 0] = focal
            K[1, 1] = focal
            K[0, 2] = cx
            K[1, 2] = cy
            camera_intrinsics[int(camera[0])] = (
                (int(width), int(height)),
                K,
                (k0, 0, 0, 0),
            )

    with open(
        os.path.join(root, scene_id, "sparse", "manhattan", subscene, "images.txt"), "r"
    ) as f:
        raw = f.read().splitlines()[4:]  # skip the header

    extract_pose = (
        colmap_raw_pose_to_principal_axis if z_only else colmap_raw_pose_to_RT
    )

    poses = {}
    points3D_idxs = {}
    camera = []

    for image, points in zip(raw[::2], raw[1::2]):
        image = image.split(" ")
        points = points.split(" ")

        image_id = image[-1]
        camera.append(int(image[-2]))

        # find the principal axis
        raw_pose = [float(elem) for elem in image[1:-2]]
        poses[image_id] = extract_pose(raw_pose)

        current_points3D_idxs = {int(i) for i in points[2::3] if i != "-1"}
        assert -1 not in current_points3D_idxs, bb()
        points3D_idxs[image_id] = current_points3D_idxs

    if intrinsics:
        image_intrinsics = {
            im_id: camera_intrinsics[cam] for im_id, cam in zip(poses, camera)
        }
        return points3D_idxs, poses, image_intrinsics
    else:
        return points3D_idxs, poses


def colmap_raw_pose_to_principal_axis(image_pose):
    qvec = image_pose[:4]
    qvec = qvec / np.linalg.norm(qvec)
    w, x, y, z = qvec
    z_axis = np.float32(
        [2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y]
    )
    return z_axis


def colmap_raw_pose_to_RT(image_pose):
    qvec = image_pose[:4]
    qvec = qvec / np.linalg.norm(qvec)
    w, x, y, z = qvec
    R = np.array(
        [
            [1 - 2 * y * y - 2 * z * z, 2 * x * y - 2 * z * w, 2 * x * z + 2 * y * w],
            [2 * x * y + 2 * z * w, 1 - 2 * x * x - 2 * z * z, 2 * y * z - 2 * x * w],
            [2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y],
        ]
    )
    # principal_axis.append(R[2, :])
    t = image_pose[4:7]
    # World-to-Camera pose
    current_pose = np.eye(4)
    current_pose[:3, :3] = R
    current_pose[:3, 3] = t
    return current_pose


if __name__ == "__main__":
    parser = get_parser()
    args = parser.parse_args()
    main(args.megadepth_dir, args.precomputed_pairs, args.output_dir)