EDGS

Running on Zero

File size: 40,037 Bytes

5f9d349

from matplotlib import pyplot as plt
import numpy as np
import torch

import numpy as np
from typing import List
import sys
sys.path.append('./submodules/gaussian-splatting/')
from scene.cameras import Camera
from PIL import Image
import imageio
from scipy.interpolate import splprep, splev

import cv2
import numpy as np
import plotly.graph_objects as go
import numpy as np
from scipy.spatial.transform import Rotation as R, Slerp
from scipy.spatial import distance_matrix
from sklearn.decomposition import PCA
from scipy.interpolate import splprep, splev
from typing import List
from sklearn.mixture import GaussianMixture

def render_gaussians_rgb(generator3DGS, viewpoint_cam, visualize=False):
    """
    Simply render gaussians from the generator3DGS from the viewpoint_cam.
    Args:
        generator3DGS : instance of the Generator3DGS class from the networks.py file
        viewpoint_cam : camera instance
        visualize : boolean flag. If True, will call pyplot function and render image inplace
    Returns:
        uint8 numpy array with shape (H, W, 3) representing the image
    """
    with torch.no_grad():
        render_pkg = generator3DGS(viewpoint_cam)
        image = render_pkg["render"]
        image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0)

        # Clip values to be in the range [0, 1]
        image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
        if visualize:
            plt.figure(figsize=(12, 8))
            plt.imshow(image_np)
            plt.show()

        return image_np

def render_gaussians_D_scores(generator3DGS, viewpoint_cam, mask=None, mask_channel=0, visualize=False):
    """
        Simply render D_scores of gaussians from the generator3DGS from the viewpoint_cam.
        Args:
            generator3DGS : instance of the Generator3DGS class from the networks.py file
            viewpoint_cam : camera instance
            visualize : boolean flag. If True, will call pyplot function and render image inplace
            mask : optional mask to highlight specific gaussians. Must be of shape (N) where N is the numnber
                of gaussians in generator3DGS.gaussians. Must be a torch tensor of floats, please scale according
                to how much color you want to have. Recommended mask value is 10.
            mask_channel: to which color channel should we add mask
        Returns:
            uint8 numpy array with shape (H, W, 3) representing the generator3DGS.gaussians.D_scores rendered as colors
        """
    with torch.no_grad():
        # Visualize D_scores
        generator3DGS.gaussians._features_dc = generator3DGS.gaussians._features_dc * 1e-4 + \
                                               torch.stack([generator3DGS.gaussians.D_scores] * 3, axis=-1)
        generator3DGS.gaussians._features_rest = generator3DGS.gaussians._features_rest * 1e-4
        if mask is not None:
            generator3DGS.gaussians._features_dc[..., mask_channel] += mask.unsqueeze(-1)
        render_pkg = generator3DGS(viewpoint_cam)
        image = render_pkg["render"]
        image_np = image.clone().detach().cpu().numpy().transpose(1, 2, 0)

        # Clip values to be in the range [0, 1]
        image_np = np.clip(image_np * 255, 0, 255).astype(np.uint8)
        if visualize:
            plt.figure(figsize=(12, 8))
            plt.imshow(image_np)
            plt.show()

        if mask is not None:
            generator3DGS.gaussians._features_dc[..., mask_channel] -= mask.unsqueeze(-1)

        generator3DGS.gaussians._features_dc = (generator3DGS.gaussians._features_dc - \
                                                     torch.stack([generator3DGS.gaussians.D_scores] * 3, axis=-1)) * 1e4
        generator3DGS.gaussians._features_rest = generator3DGS.gaussians._features_rest * 1e4

        return image_np
    


def normalize(v):
    """
    Normalize a vector to unit length.

    Parameters:
        v (np.ndarray): Input vector.

    Returns:
        np.ndarray: Unit vector in the same direction as `v`.
    """
    return v / np.linalg.norm(v)

def look_at_rotation(camera_position: np.ndarray, target: np.ndarray, world_up=np.array([0, 1, 0])):
    """
    Compute a rotation matrix for a camera looking at a target point.

    Parameters:
        camera_position (np.ndarray): The 3D position of the camera.
        target (np.ndarray): The point the camera should look at.
        world_up (np.ndarray): A vector that defines the global 'up' direction.

    Returns:
        np.ndarray: A 3x3 rotation matrix (camera-to-world) with columns [right, up, forward].
    """
    z_axis = normalize(target - camera_position)         # Forward direction
    x_axis = normalize(np.cross(world_up, z_axis))       # Right direction
    y_axis = np.cross(z_axis, x_axis)                    # Recomputed up
    return np.stack([x_axis, y_axis, z_axis], axis=1)

    
def generate_circular_camera_path(existing_cameras: List[Camera], N: int = 12, radius_scale: float = 1.0, d: float = 2.0) -> List[Camera]:
    """
    Generate a circular path of cameras around an existing camera group, 
    with each new camera oriented to look at the average viewing direction.

    Parameters:
        existing_cameras (List[Camera]): List of existing camera objects to estimate average orientation and layout.
        N (int): Number of new cameras to generate along the circular path.
        radius_scale (float): Scale factor to adjust the radius of the circle.
        d (float): Distance ahead of each camera used to estimate its look-at point.

    Returns:
        List[Camera]: A list of newly generated Camera objects forming a circular path and oriented toward a shared view center.
    """
    # Step 1: Compute average camera position
    center = np.mean([cam.T for cam in existing_cameras], axis=0)

    # Estimate where each camera is looking
    # d denotes how far ahead each camera sees — you can scale this
    look_targets = [cam.T + cam.R[:, 2] * d for cam in existing_cameras]
    center_of_view = np.mean(look_targets, axis=0)

    # Step 2: Define circular plane basis using fixed up vector
    avg_forward = normalize(np.mean([cam.R[:, 2] for cam in existing_cameras], axis=0))
    up_guess = np.array([0, 1, 0])
    right = normalize(np.cross(avg_forward, up_guess))
    up = normalize(np.cross(right, avg_forward))

    # Step 3: Estimate radius
    avg_radius = np.mean([np.linalg.norm(cam.T - center) for cam in existing_cameras]) * radius_scale

    # Step 4: Create cameras on a circular path
    angles = np.linspace(0, 2 * np.pi, N, endpoint=False)
    reference_cam = existing_cameras[0]
    new_cameras = []

    
    for i, a in enumerate(angles):
        position = center + avg_radius * (np.cos(a) * right + np.sin(a) * up)

        if d < 1e-5 or radius_scale < 1e-5:
            # Use same orientation as the first camera
            R = reference_cam.R.copy()
        else:
            # Change orientation
            R = look_at_rotation(position, center_of_view)
        new_cameras.append(Camera(
            R=R, 
            T=position,                                   # New position
            FoVx=reference_cam.FoVx,
            FoVy=reference_cam.FoVy,
            resolution=(reference_cam.image_width, reference_cam.image_height),
            colmap_id=-1,
            depth_params=None,
            image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)),
            invdepthmap=None,
            image_name=f"circular_a={a:.3f}",
            uid=i
        ))

    return new_cameras


def save_numpy_frames_as_gif(frames, output_path="animation.gif", duration=100):
    """
    Save a list of RGB NumPy frames as a looping GIF animation.

    Parameters:
        frames (List[np.ndarray]): List of RGB images as uint8 NumPy arrays (shape HxWx3).
        output_path (str): Path to save the output GIF.
        duration (int): Duration per frame in milliseconds.

    Returns:
        None
    """
    pil_frames = [Image.fromarray(f) for f in frames]
    pil_frames[0].save(
        output_path,
        save_all=True,
        append_images=pil_frames[1:],
        duration=duration,  # duration per frame in ms
        loop=0
    )
    print(f"GIF saved to: {output_path}")

def center_crop_frame(frame: np.ndarray, crop_fraction: float) -> np.ndarray:
    """
    Crop the central region of the frame by the given fraction.

    Parameters:
        frame (np.ndarray): Input RGB image (H, W, 3).
        crop_fraction (float): Fraction of the original size to retain (e.g., 0.8 keeps 80%).

    Returns:
        np.ndarray: Cropped RGB image.
    """
    if crop_fraction >= 1.0:
        return frame

    h, w, _ = frame.shape
    new_h, new_w = int(h * crop_fraction), int(w * crop_fraction)
    start_y = (h - new_h) // 2
    start_x = (w - new_w) // 2
    return frame[start_y:start_y + new_h, start_x:start_x + new_w, :]



def generate_smooth_closed_camera_path(existing_cameras: List[Camera], N: int = 120, d: float = 2.0, s=.25) -> List[Camera]:
    """
    Generate a smooth, closed path interpolating the positions of existing cameras.

    Parameters:
        existing_cameras (List[Camera]): List of existing cameras.
        N (int): Number of points (cameras) to sample along the smooth path.
        d (float): Distance ahead for estimating the center of view.

    Returns:
        List[Camera]: A list of smoothly moving Camera objects along a closed loop.
    """
    # Step 1: Extract camera positions
    positions = np.array([cam.T for cam in existing_cameras])
    
    # Step 2: Estimate center of view
    look_targets = [cam.T + cam.R[:, 2] * d for cam in existing_cameras]
    center_of_view = np.mean(look_targets, axis=0)

    # Step 3: Fit a smooth closed spline through the positions
    positions = np.vstack([positions, positions[0]])  # close the loop
    tck, u = splprep(positions.T, s=s, per=True)  # periodic=True for closed loop

    # Step 4: Sample points along the spline
    u_fine = np.linspace(0, 1, N)
    smooth_path = np.stack(splev(u_fine, tck), axis=-1)

    # Step 5: Generate cameras along the smooth path
    reference_cam = existing_cameras[0]
    new_cameras = []

    for i, pos in enumerate(smooth_path):
        R = look_at_rotation(pos, center_of_view)
        new_cameras.append(Camera(
            R=R,
            T=pos,
            FoVx=reference_cam.FoVx,
            FoVy=reference_cam.FoVy,
            resolution=(reference_cam.image_width, reference_cam.image_height),
            colmap_id=-1,
            depth_params=None,
            image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)),
            invdepthmap=None,
            image_name=f"smooth_path_i={i}",
            uid=i
        ))

    return new_cameras


def save_numpy_frames_as_mp4(frames, output_path="animation.mp4", fps=10, center_crop: float = 1.0):
    """
    Save a list of RGB NumPy frames as an MP4 video with optional center cropping.

    Parameters:
        frames (List[np.ndarray]): List of RGB images as uint8 NumPy arrays (shape HxWx3).
        output_path (str): Path to save the output MP4.
        fps (int): Frames per second for playback speed.
        center_crop (float): Fraction (0 < center_crop <= 1.0) of central region to retain. 
                             Use 1.0 for no cropping; 0.8 to crop to 80% center region.

    Returns:
        None
    """
    with imageio.get_writer(output_path, fps=fps, codec='libx264', quality=8) as writer:
        for frame in frames:
            cropped = center_crop_frame(frame, center_crop)
            writer.append_data(cropped)
    print(f"MP4 saved to: {output_path}")


    
def put_text_on_image(img: np.ndarray, text: str) -> np.ndarray:
    """
    Draws multiline white text on a copy of the input image, positioned near the bottom
    and around 80% of the image width. Handles '\n' characters to split text into multiple lines.

    Args:
        img (np.ndarray): Input image as a (H, W, 3) uint8 numpy array.
        text (str): Text string to draw on the image. Newlines '\n' are treated as line breaks.

    Returns:
        np.ndarray: The output image with the text drawn on it.
    
    Notes:
        - The function automatically adjusts line spacing and prevents text from going outside the image.
        - Text is drawn in white with small font size (0.5) for minimal visual impact.
    """
    img = img.copy()
    height, width, _ = img.shape
    
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 1.
    color = (255, 255, 255)
    thickness = 2
    line_spacing = 5  # extra pixels between lines
    
    lines = text.split('\n')
    
    # Precompute the maximum text width to adjust starting x
    max_text_width = max(cv2.getTextSize(line, font, font_scale, thickness)[0][0] for line in lines)
    
    x = int(0.8 * width)
    x = min(x, width - max_text_width - 30)  # margin on right
    #x = int(0.03 * width)
    
    # Start near the bottom, but move up depending on number of lines
    total_text_height = len(lines) * (cv2.getTextSize('A', font, font_scale, thickness)[0][1] + line_spacing)
    y_start = int(height*0.9) - total_text_height  # 30 pixels from bottom

    for i, line in enumerate(lines):
        y = y_start + i * (cv2.getTextSize(line, font, font_scale, thickness)[0][1] + line_spacing)
        cv2.putText(img, line, (x, y), font, font_scale, color, thickness, cv2.LINE_AA)
    
    return img




def catmull_rom_spline(P0, P1, P2, P3, n_points=20):
    """
    Compute Catmull-Rom spline segment between P1 and P2.
    """
    t = np.linspace(0, 1, n_points)[:, None]

    M = 0.5 * np.array([
        [-1,  3, -3, 1],
        [ 2, -5,  4, -1],
        [-1,  0,  1, 0],
        [ 0,  2,  0, 0]
    ])

    G = np.stack([P0, P1, P2, P3], axis=0)
    T = np.concatenate([t**3, t**2, t, np.ones_like(t)], axis=1)

    return T @ M @ G

def sort_cameras_pca(existing_cameras: List[Camera]):
    """
    Sort cameras along the main PCA axis.
    """
    positions = np.array([cam.T for cam in existing_cameras])
    pca = PCA(n_components=1)
    scores = pca.fit_transform(positions)
    sorted_indices = np.argsort(scores[:, 0])
    return sorted_indices

def generate_fully_smooth_cameras(existing_cameras: List[Camera], 
                                  n_selected: int = 30, 
                                  n_points_per_segment: int = 20, 
                                  d: float = 2.0,
                                  closed: bool = False) -> List[Camera]:
    """
    Generate a fully smooth camera path using PCA ordering, global Catmull-Rom spline for positions, and global SLERP for orientations.

    Args:
        existing_cameras (List[Camera]): List of input cameras.
        n_selected (int): Number of cameras to select after sorting.
        n_points_per_segment (int): Number of interpolated points per spline segment.
        d (float): Distance ahead for estimating center of view.
        closed (bool): Whether to close the path.

    Returns:
        List[Camera]: List of smoothly moving Camera objects.
    """
    # 1. Sort cameras along PCA axis
    sorted_indices = sort_cameras_pca(existing_cameras)
    sorted_cameras = [existing_cameras[i] for i in sorted_indices]
    positions = np.array([cam.T for cam in sorted_cameras])

    # 2. Subsample uniformly
    idx = np.linspace(0, len(positions) - 1, n_selected).astype(int)
    sampled_positions = positions[idx]
    sampled_cameras = [sorted_cameras[i] for i in idx]

    # 3. Prepare for Catmull-Rom
    if closed:
        sampled_positions = np.vstack([sampled_positions[-1], sampled_positions, sampled_positions[0], sampled_positions[1]])
    else:
        sampled_positions = np.vstack([sampled_positions[0], sampled_positions, sampled_positions[-1], sampled_positions[-1]])

    # 4. Generate smooth path positions
    path_positions = []
    for i in range(1, len(sampled_positions) - 2):
        segment = catmull_rom_spline(sampled_positions[i-1], sampled_positions[i], sampled_positions[i+1], sampled_positions[i+2], n_points_per_segment)
        path_positions.append(segment)
    path_positions = np.concatenate(path_positions, axis=0)

    # 5. Global SLERP for rotations
    rotations = R.from_matrix([cam.R for cam in sampled_cameras])
    key_times = np.linspace(0, 1, len(rotations))
    slerp = Slerp(key_times, rotations)

    query_times = np.linspace(0, 1, len(path_positions))
    interpolated_rotations = slerp(query_times)

    # 6. Generate Camera objects
    reference_cam = existing_cameras[0]
    smooth_cameras = []

    for i, pos in enumerate(path_positions):
        R_interp = interpolated_rotations[i].as_matrix()

        smooth_cameras.append(Camera(
            R=R_interp,
            T=pos,
            FoVx=reference_cam.FoVx,
            FoVy=reference_cam.FoVy,
            resolution=(reference_cam.image_width, reference_cam.image_height),
            colmap_id=-1,
            depth_params=None,
            image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)),
            invdepthmap=None,
            image_name=f"fully_smooth_path_i={i}",
            uid=i
        ))

    return smooth_cameras


def plot_cameras_and_smooth_path_with_orientation(existing_cameras: List[Camera], smooth_cameras: List[Camera], scale: float = 0.1):
    """
    Plot input cameras and smooth path cameras with their orientations in 3D.

    Args:
        existing_cameras (List[Camera]): List of original input cameras.
        smooth_cameras (List[Camera]): List of smooth path cameras.
        scale (float): Length of orientation arrows.

    Returns:
        None
    """
    # Input cameras
    input_positions = np.array([cam.T for cam in existing_cameras])

    # Smooth cameras
    smooth_positions = np.array([cam.T for cam in smooth_cameras])

    fig = go.Figure()

    # Plot input camera positions
    fig.add_trace(go.Scatter3d(
        x=input_positions[:, 0], y=input_positions[:, 1], z=input_positions[:, 2],
        mode='markers',
        marker=dict(size=4, color='blue'),
        name='Input Cameras'
    ))

    # Plot smooth path positions
    fig.add_trace(go.Scatter3d(
        x=smooth_positions[:, 0], y=smooth_positions[:, 1], z=smooth_positions[:, 2],
        mode='lines+markers',
        line=dict(color='red', width=3),
        marker=dict(size=2, color='red'),
        name='Smooth Path Cameras'
    ))

    # Plot input camera orientations
    for cam in existing_cameras:
        origin = cam.T
        forward = cam.R[:, 2]  # Forward direction

        fig.add_trace(go.Cone(
            x=[origin[0]], y=[origin[1]], z=[origin[2]],
            u=[forward[0]], v=[forward[1]], w=[forward[2]],
            colorscale=[[0, 'blue'], [1, 'blue']],
            sizemode="absolute",
            sizeref=scale,
            anchor="tail",
            showscale=False,
            name='Input Camera Direction'
        ))

    # Plot smooth camera orientations
    for cam in smooth_cameras:
        origin = cam.T
        forward = cam.R[:, 2]  # Forward direction

        fig.add_trace(go.Cone(
            x=[origin[0]], y=[origin[1]], z=[origin[2]],
            u=[forward[0]], v=[forward[1]], w=[forward[2]],
            colorscale=[[0, 'red'], [1, 'red']],
            sizemode="absolute",
            sizeref=scale,
            anchor="tail",
            showscale=False,
            name='Smooth Camera Direction'
        ))

    fig.update_layout(
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z',
            aspectmode='data'
        ),
        title="Input Cameras and Smooth Path with Orientations",
        margin=dict(l=0, r=0, b=0, t=30)
    )

    fig.show()


def solve_tsp_nearest_neighbor(points: np.ndarray):
    """
    Solve TSP approximately using nearest neighbor heuristic.

    Args:
        points (np.ndarray): (N, 3) array of points.

    Returns:
        List[int]: Optimal visiting order of points.
    """
    N = points.shape[0]
    dist = distance_matrix(points, points)
    visited = [0]
    unvisited = set(range(1, N))

    while unvisited:
        last = visited[-1]
        next_city = min(unvisited, key=lambda city: dist[last, city])
        visited.append(next_city)
        unvisited.remove(next_city)

    return visited

def solve_tsp_2opt(points: np.ndarray, n_iter: int = 1000) -> np.ndarray:
    """
    Solve TSP approximately using Nearest Neighbor + 2-Opt.

    Args:
        points (np.ndarray): Array of shape (N, D) with points.
        n_iter (int): Number of 2-opt iterations.

    Returns:
        np.ndarray: Ordered list of indices.
    """
    n_points = points.shape[0]

    # === 1. Start with Nearest Neighbor
    unvisited = list(range(n_points))
    current = unvisited.pop(0)
    path = [current]

    while unvisited:
        dists = np.linalg.norm(points[unvisited] - points[current], axis=1)
        next_idx = unvisited[np.argmin(dists)]
        unvisited.remove(next_idx)
        path.append(next_idx)
        current = next_idx

    # === 2. Apply 2-Opt improvements
    def path_length(path):
        return np.sum(np.linalg.norm(points[path[i]] - points[path[i+1]], axis=0) for i in range(len(path)-1))

    best_length = path_length(path)
    improved = True

    for _ in range(n_iter):
        if not improved:
            break
        improved = False
        for i in range(1, n_points - 2):
            for j in range(i + 1, n_points):
                if j - i == 1: continue
                new_path = path[:i] + path[i:j][::-1] + path[j:]
                new_length = path_length(new_path)
                if new_length < best_length:
                    path = new_path
                    best_length = new_length
                    improved = True
                    break
            if improved:
                break

    return np.array(path)

def generate_fully_smooth_cameras_with_tsp(existing_cameras: List[Camera], 
                                           n_selected: int = 30, 
                                           n_points_per_segment: int = 20, 
                                           d: float = 2.0,
                                           closed: bool = False) -> List[Camera]:
    """
    Generate a fully smooth camera path using TSP ordering, global Catmull-Rom spline for positions, and global SLERP for orientations.

    Args:
        existing_cameras (List[Camera]): List of input cameras.
        n_selected (int): Number of cameras to select after ordering.
        n_points_per_segment (int): Number of interpolated points per spline segment.
        d (float): Distance ahead for estimating center of view.
        closed (bool): Whether to close the path.

    Returns:
        List[Camera]: List of smoothly moving Camera objects.
    """
    positions = np.array([cam.T for cam in existing_cameras])

    # 1. Solve approximate TSP
    order = solve_tsp_nearest_neighbor(positions)
    ordered_cameras = [existing_cameras[i] for i in order]
    ordered_positions = positions[order]

    # 2. Subsample uniformly
    idx = np.linspace(0, len(ordered_positions) - 1, n_selected).astype(int)
    sampled_positions = ordered_positions[idx]
    sampled_cameras = [ordered_cameras[i] for i in idx]

    # 3. Prepare for Catmull-Rom
    if closed:
        sampled_positions = np.vstack([sampled_positions[-1], sampled_positions, sampled_positions[0], sampled_positions[1]])
    else:
        sampled_positions = np.vstack([sampled_positions[0], sampled_positions, sampled_positions[-1], sampled_positions[-1]])

    # 4. Generate smooth path positions
    path_positions = []
    for i in range(1, len(sampled_positions) - 2):
        segment = catmull_rom_spline(sampled_positions[i-1], sampled_positions[i], sampled_positions[i+1], sampled_positions[i+2], n_points_per_segment)
        path_positions.append(segment)
    path_positions = np.concatenate(path_positions, axis=0)

    # 5. Global SLERP for rotations
    rotations = R.from_matrix([cam.R for cam in sampled_cameras])
    key_times = np.linspace(0, 1, len(rotations))
    slerp = Slerp(key_times, rotations)

    query_times = np.linspace(0, 1, len(path_positions))
    interpolated_rotations = slerp(query_times)

    # 6. Generate Camera objects
    reference_cam = existing_cameras[0]
    smooth_cameras = []

    for i, pos in enumerate(path_positions):
        R_interp = interpolated_rotations[i].as_matrix()

        smooth_cameras.append(Camera(
            R=R_interp,
            T=pos,
            FoVx=reference_cam.FoVx,
            FoVy=reference_cam.FoVy,
            resolution=(reference_cam.image_width, reference_cam.image_height),
            colmap_id=-1,
            depth_params=None,
            image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)),
            invdepthmap=None,
            image_name=f"fully_smooth_path_i={i}",
            uid=i
        ))

    return smooth_cameras

from typing import List
import numpy as np
from sklearn.mixture import GaussianMixture
from scipy.spatial.transform import Rotation as R, Slerp
from PIL import Image

def generate_clustered_smooth_cameras_with_tsp(existing_cameras: List[Camera], 
                                                n_selected: int = 30, 
                                                n_points_per_segment: int = 20, 
                                                d: float = 2.0,
                                                n_clusters: int = 5,
                                                closed: bool = False) -> List[Camera]:
    """
    Generate a fully smooth camera path using clustering + TSP between nearest cluster centers + TSP inside clusters.
    Positions are normalized before clustering and denormalized before generating final cameras.

    Args:
        existing_cameras (List[Camera]): List of input cameras.
        n_selected (int): Number of cameras to select after ordering.
        n_points_per_segment (int): Number of interpolated points per spline segment.
        d (float): Distance ahead for estimating center of view.
        n_clusters (int): Number of GMM clusters.
        closed (bool): Whether to close the path.

    Returns:
        List[Camera]: Smooth path of Camera objects.
    """
    # Extract positions and rotations
    positions = np.array([cam.T for cam in existing_cameras])
    rotations = np.array([R.from_matrix(cam.R).as_quat() for cam in existing_cameras])

    # === Normalize positions
    mean_pos = np.mean(positions, axis=0)
    scale_pos = np.std(positions, axis=0)
    scale_pos[scale_pos == 0] = 1.0  # avoid division by zero

    positions_normalized = (positions - mean_pos) / scale_pos

    # === Features for clustering (only positions, not rotations)
    features = positions_normalized

    # === 1. GMM clustering
    gmm = GaussianMixture(n_components=n_clusters, covariance_type='full', random_state=42)
    cluster_labels = gmm.fit_predict(features)

    clusters = {}
    cluster_centers = []

    for cluster_id in range(n_clusters):
        cluster_indices = np.where(cluster_labels == cluster_id)[0]
        if len(cluster_indices) == 0:
            continue
        clusters[cluster_id] = cluster_indices
        cluster_center = np.mean(features[cluster_indices], axis=0)
        cluster_centers.append(cluster_center)

    cluster_centers = np.stack(cluster_centers)

    # === 2. Remap cluster centers to nearest existing cameras
    if False:
        mapped_centers = []
        for center in cluster_centers:
            dists = np.linalg.norm(features - center, axis=1)
            nearest_idx = np.argmin(dists)
            mapped_centers.append(features[nearest_idx])
        mapped_centers = np.stack(mapped_centers)
        cluster_centers = mapped_centers
    # === 3. Solve TSP between mapped cluster centers
    cluster_order = solve_tsp_2opt(cluster_centers)

    # === 4. For each cluster, solve TSP inside cluster
    final_indices = []
    for cluster_id in cluster_order:
        cluster_indices = clusters[cluster_id]
        cluster_positions = features[cluster_indices]

        if len(cluster_positions) == 1:
            final_indices.append(cluster_indices[0])
            continue

        local_order = solve_tsp_nearest_neighbor(cluster_positions)
        ordered_cluster_indices = cluster_indices[local_order]
        final_indices.extend(ordered_cluster_indices)

    ordered_cameras = [existing_cameras[i] for i in final_indices]
    ordered_positions = positions_normalized[final_indices]

    # === 5. Subsample uniformly
    idx = np.linspace(0, len(ordered_positions) - 1, n_selected).astype(int)
    sampled_positions = ordered_positions[idx]
    sampled_cameras = [ordered_cameras[i] for i in idx]

    # === 6. Prepare for Catmull-Rom spline
    if closed:
        sampled_positions = np.vstack([sampled_positions[-1], sampled_positions, sampled_positions[0], sampled_positions[1]])
    else:
        sampled_positions = np.vstack([sampled_positions[0], sampled_positions, sampled_positions[-1], sampled_positions[-1]])

    # === 7. Smooth path positions
    path_positions = []
    for i in range(1, len(sampled_positions) - 2):
        segment = catmull_rom_spline(sampled_positions[i-1], sampled_positions[i], sampled_positions[i+1], sampled_positions[i+2], n_points_per_segment)
        path_positions.append(segment)
    path_positions = np.concatenate(path_positions, axis=0)

    # === 8. Denormalize
    path_positions = path_positions * scale_pos + mean_pos

    # === 9. SLERP for rotations
    rotations = R.from_matrix([cam.R for cam in sampled_cameras])
    key_times = np.linspace(0, 1, len(rotations))
    slerp = Slerp(key_times, rotations)

    query_times = np.linspace(0, 1, len(path_positions))
    interpolated_rotations = slerp(query_times)

    # === 10. Generate Camera objects
    reference_cam = existing_cameras[0]
    smooth_cameras = []

    for i, pos in enumerate(path_positions):
        R_interp = interpolated_rotations[i].as_matrix()

        smooth_cameras.append(Camera(
            R=R_interp,
            T=pos,
            FoVx=reference_cam.FoVx,
            FoVy=reference_cam.FoVy,
            resolution=(reference_cam.image_width, reference_cam.image_height),
            colmap_id=-1,
            depth_params=None,
            image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)),
            invdepthmap=None,
            image_name=f"clustered_smooth_path_i={i}",
            uid=i
        ))

    return smooth_cameras


# def generate_clustered_path(existing_cameras: List[Camera], 
#                              n_points_per_segment: int = 20, 
#                              d: float = 2.0,
#                              n_clusters: int = 5,
#                              closed: bool = False) -> List[Camera]:
#     """
#     Generate a smooth camera path using GMM clustering and TSP on cluster centers.

#     Args:
#         existing_cameras (List[Camera]): List of input cameras.
#         n_points_per_segment (int): Number of interpolated points per spline segment.
#         d (float): Distance ahead for estimating center of view.
#         n_clusters (int): Number of GMM clusters (zones).
#         closed (bool): Whether to close the path.

#     Returns:
#         List[Camera]: Smooth path of Camera objects.
#     """
#     # Extract positions and rotations
#     positions = np.array([cam.T for cam in existing_cameras])

#     # === Normalize positions
#     mean_pos = np.mean(positions, axis=0)
#     scale_pos = np.std(positions, axis=0)
#     scale_pos[scale_pos == 0] = 1.0

#     positions_normalized = (positions - mean_pos) / scale_pos

#     # === 1. GMM clustering (only positions)
#     gmm = GaussianMixture(n_components=n_clusters, covariance_type='full', random_state=42)
#     cluster_labels = gmm.fit_predict(positions_normalized)

#     cluster_centers = []
#     for cluster_id in range(n_clusters):
#         cluster_indices = np.where(cluster_labels == cluster_id)[0]
#         if len(cluster_indices) == 0:
#             continue
#         cluster_center = np.mean(positions_normalized[cluster_indices], axis=0)
#         cluster_centers.append(cluster_center)

#     cluster_centers = np.stack(cluster_centers)

#     # === 2. Solve TSP between cluster centers
#     cluster_order = solve_tsp_2opt(cluster_centers)

#     # === 3. Reorder cluster centers
#     ordered_centers = cluster_centers[cluster_order]

#     # === 4. Prepare Catmull-Rom spline
#     if closed:
#         ordered_centers = np.vstack([ordered_centers[-1], ordered_centers, ordered_centers[0], ordered_centers[1]])
#     else:
#         ordered_centers = np.vstack([ordered_centers[0], ordered_centers, ordered_centers[-1], ordered_centers[-1]])

#     # === 5. Generate smooth path positions
#     path_positions = []
#     for i in range(1, len(ordered_centers) - 2):
#         segment = catmull_rom_spline(ordered_centers[i-1], ordered_centers[i], ordered_centers[i+1], ordered_centers[i+2], n_points_per_segment)
#         path_positions.append(segment)
#     path_positions = np.concatenate(path_positions, axis=0)

#     # === 6. Denormalize back
#     path_positions = path_positions * scale_pos + mean_pos

#     # === 7. Generate dummy rotations (constant forward facing)
#     reference_cam = existing_cameras[0]
#     default_rotation = R.from_matrix(reference_cam.R)

#     # For simplicity, fixed rotation for all
#     smooth_cameras = []

#     for i, pos in enumerate(path_positions):
#         R_interp = default_rotation.as_matrix()

#         smooth_cameras.append(Camera(
#             R=R_interp,
#             T=pos,
#             FoVx=reference_cam.FoVx,
#             FoVy=reference_cam.FoVy,
#             resolution=(reference_cam.image_width, reference_cam.image_height),
#             colmap_id=-1,
#             depth_params=None,
#             image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)),
#             invdepthmap=None,
#             image_name=f"cluster_path_i={i}",
#             uid=i
#         ))

#     return smooth_cameras

from typing import List
import numpy as np
from sklearn.cluster import KMeans
from scipy.spatial.transform import Rotation as R, Slerp
from PIL import Image

def generate_clustered_path(existing_cameras: List[Camera], 
                             n_points_per_segment: int = 20, 
                             d: float = 2.0,
                             n_clusters: int = 5,
                             closed: bool = False) -> List[Camera]:
    """
    Generate a smooth camera path using K-Means clustering and TSP on cluster centers.

    Args:
        existing_cameras (List[Camera]): List of input cameras.
        n_points_per_segment (int): Number of interpolated points per spline segment.
        d (float): Distance ahead for estimating center of view.
        n_clusters (int): Number of KMeans clusters (zones).
        closed (bool): Whether to close the path.

    Returns:
        List[Camera]: Smooth path of Camera objects.
    """
    # Extract positions
    positions = np.array([cam.T for cam in existing_cameras])

    # === Normalize positions
    mean_pos = np.mean(positions, axis=0)
    scale_pos = np.std(positions, axis=0)
    scale_pos[scale_pos == 0] = 1.0

    positions_normalized = (positions - mean_pos) / scale_pos

    # === 1. K-Means clustering (only positions)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init='auto')
    cluster_labels = kmeans.fit_predict(positions_normalized)

    cluster_centers = []
    for cluster_id in range(n_clusters):
        cluster_indices = np.where(cluster_labels == cluster_id)[0]
        if len(cluster_indices) == 0:
            continue
        cluster_center = np.mean(positions_normalized[cluster_indices], axis=0)
        cluster_centers.append(cluster_center)

    cluster_centers = np.stack(cluster_centers)

    # === 2. Solve TSP between cluster centers
    cluster_order = solve_tsp_2opt(cluster_centers)

    # === 3. Reorder cluster centers
    ordered_centers = cluster_centers[cluster_order]

    # === 4. Prepare Catmull-Rom spline
    if closed:
        ordered_centers = np.vstack([ordered_centers[-1], ordered_centers, ordered_centers[0], ordered_centers[1]])
    else:
        ordered_centers = np.vstack([ordered_centers[0], ordered_centers, ordered_centers[-1], ordered_centers[-1]])

    # === 5. Generate smooth path positions
    path_positions = []
    for i in range(1, len(ordered_centers) - 2):
        segment = catmull_rom_spline(ordered_centers[i-1], ordered_centers[i], ordered_centers[i+1], ordered_centers[i+2], n_points_per_segment)
        path_positions.append(segment)
    path_positions = np.concatenate(path_positions, axis=0)

    # === 6. Denormalize back
    path_positions = path_positions * scale_pos + mean_pos

    # === 7. Generate dummy rotations (constant forward facing)
    reference_cam = existing_cameras[0]
    default_rotation = R.from_matrix(reference_cam.R)

    # For simplicity, fixed rotation for all
    smooth_cameras = []

    for i, pos in enumerate(path_positions):
        R_interp = default_rotation.as_matrix()

        smooth_cameras.append(Camera(
            R=R_interp,
            T=pos,
            FoVx=reference_cam.FoVx,
            FoVy=reference_cam.FoVy,
            resolution=(reference_cam.image_width, reference_cam.image_height),
            colmap_id=-1,
            depth_params=None,
            image=Image.fromarray(np.zeros((reference_cam.image_height, reference_cam.image_width, 3), dtype=np.uint8)),
            invdepthmap=None,
            image_name=f"cluster_path_i={i}",
            uid=i
        ))

    return smooth_cameras




def visualize_image_with_points(image, points):
    """
    Visualize an image with points overlaid on top. This is useful for correspondences visualizations

    Parameters:
    - image: PIL Image object
    - points: Numpy array of shape [N, 2] containing (x, y) coordinates of points

    Returns:
    - None (displays the visualization)
    """

    # Convert PIL image to numpy array
    img_array = np.array(image)

    # Create a figure and axis
    fig, ax = plt.subplots(figsize=(7,7))

    # Display the image
    ax.imshow(img_array)

    # Scatter plot the points on top of the image
    ax.scatter(points[:, 0], points[:, 1], color='red', marker='o', s=1)

    # Show the plot
    plt.show()


def visualize_correspondences(image1, points1, image2, points2):
    """
    Visualize two images concatenated horizontally with key points and correspondences.

    Parameters:
    - image1: PIL Image object (left image)
    - points1: Numpy array of shape [N, 2] containing (x, y) coordinates of key points for image1
    - image2: PIL Image object (right image)
    - points2: Numpy array of shape [N, 2] containing (x, y) coordinates of key points for image2

    Returns:
    - None (displays the visualization)
    """

    # Concatenate images horizontally
    concatenated_image = np.concatenate((np.array(image1), np.array(image2)), axis=1)

    # Create a figure and axis
    fig, ax = plt.subplots(figsize=(10,10))

    # Display the concatenated image
    ax.imshow(concatenated_image)

    # Plot key points on the left image
    ax.scatter(points1[:, 0], points1[:, 1], color='red', marker='o', s=10)

    # Plot key points on the right image
    ax.scatter(points2[:, 0] + image1.width, points2[:, 1], color='blue', marker='o', s=10)

    # Draw lines connecting corresponding key points
    for i in range(len(points1)):
        ax.plot([points1[i, 0], points2[i, 0] + image1.width], [points1[i, 1], points2[i, 1]])#, color='green')

    # Show the plot
    plt.show()