Spaces:

xinjjj
/

ImgRoboAssetGen

Running on Zero

App Files Files Community

xinjie.wang commited on Apr 21

Commit

146eff7

1 Parent(s): 0111b97

update

Browse files

Files changed (14) hide show

asset3d_gen/data/backproject_v2.py +32 -21
asset3d_gen/data/backup/backproject_v2 copy.py +0 -652
asset3d_gen/data/backup/backproject_v2.py +0 -700
asset3d_gen/data/backup/backproject_v3.py +0 -866
asset3d_gen/data/backup/backprojectv2.py +0 -835
asset3d_gen/data/backup/gpt_qwen.py +0 -70
asset3d_gen/data/backup/quat.py +0 -49
asset3d_gen/data/differentiable_render.py +5 -4
asset3d_gen/data/mesh_operator.py +2 -1
asset3d_gen/models/delight_model.py +2 -1
asset3d_gen/models/sr_model.py +3 -1
asset3d_gen/scripts/render_gs.py +2 -1
common.py +49 -71
requirements.txt +2 -1

asset3d_gen/data/backproject_v2.py CHANGED Viewed

@@ -2,7 +2,7 @@ import argparse
 import logging
 import math
 import os
 import cv2
 import numpy as np
 import nvdiffrast.torch as dr
@@ -247,7 +247,7 @@ class TextureBacker:
             (2 / 512) * max(self.render_wh[0], self.render_wh[1])
         )
-    def load_mesh(self, mesh: trimesh.Trimesh) -> None:
         mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
         self.scale, self.center = scale, center
@@ -257,9 +257,7 @@ class TextureBacker:
         mesh.faces = indices
         mesh.visual.uv = uvs
-        self.vertices = torch.from_numpy(mesh.vertices).to(self.device).float()
-        self.faces = torch.from_numpy(mesh.faces).to(self.device).to(torch.int)
-        self.uv_map = torch.from_numpy(mesh.visual.uv).to(self.device).float()
     def get_mesh_np_attrs(
         self,
@@ -397,32 +395,32 @@ class TextureBacker:
         return texture_merge, trust_map_merge > 1e-8
     def uv_inpaint(
-        self, texture: torch.Tensor, mask: torch.Tensor
     ) -> np.ndarray:
-        texture_np = texture.cpu().numpy()
-        mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
         vertices, faces, uv_map = self.get_mesh_np_attrs()
-        texture_np, mask_np = _texture_inpaint_smooth(
-            texture_np, mask_np, vertices, faces, uv_map
         )
-        texture_np = texture_np.clip(0, 1)
-        texture_np = cv2.inpaint(
-            (texture_np * 255).astype(np.uint8),
-            255 - mask_np,
             3,
             cv2.INPAINT_NS,
         )
-        return texture_np
-    def __call__(
         self,
         colors: list[Image.Image],
         mesh: trimesh.Trimesh,
-        output_path: str,
     ) -> trimesh.Trimesh:
-        self.load_mesh(mesh)
         rendered_depth, masks = self.renderer.render_depth(
             self.vertices, self.faces
         )
@@ -448,12 +446,26 @@ class TextureBacker:
             weighted_cos_maps.append(weight * (cos_map**4))
         texture, mask = self.fast_bake_texture(textures, weighted_cos_maps)
-        texture_np = self.uv_inpaint(texture, mask)
         texture_np = post_process_texture(texture_np)
         vertices, faces, uv_map = self.get_mesh_np_attrs(
             self.scale, self.center
         )
         textured_mesh = save_mesh_with_mtl(
             vertices, faces, uv_map, texture_np, output_path
         )
@@ -567,7 +579,6 @@ def entrypoint(
             )
         save_dir = os.path.dirname(args.output_path)
         os.makedirs(save_dir, exist_ok=True)
-        color_grid.save(f"{save_dir}/color_grid.png")
         color_grid = delight_model(color_grid)
         color_grid.save(f"{save_dir}/color_grid_delight.png")

 import logging
 import math
 import os
+import spaces
 import cv2
 import numpy as np
 import nvdiffrast.torch as dr
             (2 / 512) * max(self.render_wh[0], self.render_wh[1])
         )
+    def load_mesh(self, mesh: trimesh.Trimesh) -> trimesh.Trimesh:
         mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
         self.scale, self.center = scale, center
         mesh.faces = indices
         mesh.visual.uv = uvs
+        return mesh
     def get_mesh_np_attrs(
         self,
         return texture_merge, trust_map_merge > 1e-8
     def uv_inpaint(
+        self, texture: np.ndarray, mask: np.ndarray
     ) -> np.ndarray:
         vertices, faces, uv_map = self.get_mesh_np_attrs()
+        texture, mask = _texture_inpaint_smooth(
+            texture, mask, vertices, faces, uv_map
         )
+        texture = texture.clip(0, 1)
+        texture = cv2.inpaint(
+            (texture * 255).astype(np.uint8),
+            255 - mask,
             3,
             cv2.INPAINT_NS,
         )
+        return texture
+    @spaces.GPU
+    def cuda_forward(
         self,
         colors: list[Image.Image],
         mesh: trimesh.Trimesh,
     ) -> trimesh.Trimesh:
+        self.vertices = torch.from_numpy(mesh.vertices).to(self.device).float()
+        self.faces = torch.from_numpy(mesh.faces).to(self.device).to(torch.int)
+        self.uv_map = torch.from_numpy(mesh.visual.uv).to(self.device).float()
         rendered_depth, masks = self.renderer.render_depth(
             self.vertices, self.faces
         )
             weighted_cos_maps.append(weight * (cos_map**4))
         texture, mask = self.fast_bake_texture(textures, weighted_cos_maps)
+        texture_np = texture.cpu().numpy()
+        mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
+        return texture_np, mask_np
+    def __call__(
+        self,
+        colors: list[Image.Image],
+        mesh: trimesh.Trimesh,
+        output_path: str,
+    ) -> trimesh.Trimesh:
+        mesh = self.load_mesh(mesh)
+        texture_np, mask_np = self.cuda_forward(colors, mesh)
+        texture_np = self.uv_inpaint(texture_np, mask_np)
         texture_np = post_process_texture(texture_np)
         vertices, faces, uv_map = self.get_mesh_np_attrs(
             self.scale, self.center
         )
         textured_mesh = save_mesh_with_mtl(
             vertices, faces, uv_map, texture_np, output_path
         )
             )
         save_dir = os.path.dirname(args.output_path)
         os.makedirs(save_dir, exist_ok=True)
         color_grid = delight_model(color_grid)
         color_grid.save(f"{save_dir}/color_grid_delight.png")

asset3d_gen/data/backup/backproject_v2 copy.py DELETED Viewed

@@ -1,652 +0,0 @@
-import argparse
-import logging
-import math
-import os
-import cv2
-import numpy as np
-import nvdiffrast.torch as dr
-import torch
-import torch.nn.functional as F
-try:
-    from torchvision.transforms import functional as tF
-except ImportError as e:
-    tF = None
-import trimesh
-import xatlas
-from PIL import Image
-from asset3d_gen.data.mesh_operator import MeshFixer
-from asset3d_gen.data.utils import (
-    CameraSetting,
-    DiffrastRender,
-    get_images_from_grid,
-    init_kal_camera,
-    normalize_vertices_array,
-    post_process_texture,
-    save_mesh_with_mtl,
-)
-from asset3d_gen.models.delight_model import DelightingModel
-from asset3d_gen.models.sr_model import ImageRealESRGAN
-logging.basicConfig(
-    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
-)
-logger = logging.getLogger(__name__)
-__all__ = [
-    "TextureBacker",
-]
-def transform_vertices(
-    mtx: torch.Tensor, pos: torch.Tensor, keepdim: bool = False
-) -> torch.Tensor:
-    """Transform 3D vertices using a projection matrix."""
-    t_mtx = torch.as_tensor(mtx, device=pos.device, dtype=pos.dtype)
-    if pos.size(-1) == 3:
-        pos = torch.cat([pos, torch.ones_like(pos[..., :1])], dim=-1)
-    result = pos @ t_mtx.T
-    return result if keepdim else result.unsqueeze(0)
-def _bilinear_interpolation_scattering(
-    image_h: int, image_w: int, coords: torch.Tensor, values: torch.Tensor
-) -> torch.Tensor:
-    """Bilinear interpolation scattering for grid-based value accumulation."""
-    device = values.device
-    dtype = values.dtype
-    C = values.shape[-1]
-    indices = coords * torch.tensor(
-        [image_h - 1, image_w - 1], dtype=dtype, device=device
-    )
-    i, j = indices.unbind(-1)
-    i0, j0 = (
-        indices.floor()
-        .long()
-        .clamp(0, image_h - 2)
-        .clamp(0, image_w - 2)
-        .unbind(-1)
-    )
-    i1, j1 = i0 + 1, j0 + 1
-    w_i = i - i0.float()
-    w_j = j - j0.float()
-    weights = torch.stack(
-        [(1 - w_i) * (1 - w_j), (1 - w_i) * w_j, w_i * (1 - w_j), w_i * w_j],
-        dim=1,
-    )
-    indices_comb = torch.stack(
-        [
-            torch.stack([i0, j0], dim=1),
-            torch.stack([i0, j1], dim=1),
-            torch.stack([i1, j0], dim=1),
-            torch.stack([i1, j1], dim=1),
-        ],
-        dim=1,
-    )
-    grid = torch.zeros(image_h, image_w, C, device=device, dtype=dtype)
-    cnt = torch.zeros(image_h, image_w, 1, device=device, dtype=dtype)
-    for k in range(4):
-        idx = indices_comb[:, k]
-        w = weights[:, k].unsqueeze(-1)
-        stride = torch.tensor([image_w, 1], device=device, dtype=torch.long)
-        flat_idx = (idx * stride).sum(-1)
-        grid.view(-1, C).scatter_add_(
-            0, flat_idx.unsqueeze(-1).expand(-1, C), values * w
-        )
-        cnt.view(-1, 1).scatter_add_(0, flat_idx.unsqueeze(-1), w)
-    mask = cnt.squeeze(-1) > 0
-    grid[mask] = grid[mask] / cnt[mask].repeat(1, C)
-    return grid
-def _texture_inpaint_smooth(
-    texture: np.ndarray,
-    mask: np.ndarray,
-    vertices: np.ndarray,
-    faces: np.ndarray,
-    uv_map: np.ndarray,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Perform texture inpainting using vertex-based color propagation."""
-    image_h, image_w, C = texture.shape
-    N = vertices.shape[0]
-    # Initialize vertex data structures
-    vtx_mask = np.zeros(N, dtype=np.float32)
-    vtx_colors = np.zeros((N, C), dtype=np.float32)
-    unprocessed = []
-    adjacency = [[] for _ in range(N)]
-    # Build adjacency graph and initial color assignment
-    for face_idx in range(faces.shape[0]):
-        for k in range(3):
-            uv_idx_k = faces[face_idx, k]
-            v_idx = faces[face_idx, k]
-            # Convert UV to pixel coordinates with boundary clamping
-            u = np.clip(
-                int(round(uv_map[uv_idx_k, 0] * (image_w - 1))), 0, image_w - 1
-            )
-            v = np.clip(
-                int(round((1.0 - uv_map[uv_idx_k, 1]) * (image_h - 1))),
-                0,
-                image_h - 1,
-            )
-            if mask[v, u]:
-                vtx_mask[v_idx] = 1.0
-                vtx_colors[v_idx] = texture[v, u]
-            elif v_idx not in unprocessed:
-                unprocessed.append(v_idx)
-            # Build undirected adjacency graph
-            neighbor = faces[face_idx, (k + 1) % 3]
-            if neighbor not in adjacency[v_idx]:
-                adjacency[v_idx].append(neighbor)
-            if v_idx not in adjacency[neighbor]:
-                adjacency[neighbor].append(v_idx)
-    # Color propagation with dynamic stopping
-    remaining_iters, prev_count = 2, 0
-    while remaining_iters > 0:
-        current_unprocessed = []
-        for v_idx in unprocessed:
-            valid_neighbors = [n for n in adjacency[v_idx] if vtx_mask[n] > 0]
-            if not valid_neighbors:
-                current_unprocessed.append(v_idx)
-                continue
-            # Calculate inverse square distance weights
-            neighbors_pos = vertices[valid_neighbors]
-            dist_sq = np.sum((vertices[v_idx] - neighbors_pos) ** 2, axis=1)
-            weights = 1 / np.maximum(dist_sq, 1e-8)
-            vtx_colors[v_idx] = np.average(
-                vtx_colors[valid_neighbors], weights=weights, axis=0
-            )
-            vtx_mask[v_idx] = 1.0
-        # Update iteration control
-        if len(current_unprocessed) == prev_count:
-            remaining_iters -= 1
-        else:
-            remaining_iters = min(remaining_iters + 1, 2)
-        prev_count = len(current_unprocessed)
-        unprocessed = current_unprocessed
-    # Generate output texture
-    inpainted_texture, updated_mask = texture.copy(), mask.copy()
-    for face_idx in range(faces.shape[0]):
-        for k in range(3):
-            v_idx = faces[face_idx, k]
-            if not vtx_mask[v_idx]:
-                continue
-            # UV coordinate conversion
-            uv_idx_k = faces[face_idx, k]
-            u = np.clip(
-                int(round(uv_map[uv_idx_k, 0] * (image_w - 1))), 0, image_w - 1
-            )
-            v = np.clip(
-                int(round((1.0 - uv_map[uv_idx_k, 1]) * (image_h - 1))),
-                0,
-                image_h - 1,
-            )
-            inpainted_texture[v, u] = vtx_colors[v_idx]
-            updated_mask[v, u] = 255
-    return inpainted_texture, updated_mask
-def interp_tensers(tensors: list[torch.Tensor], target_wh: tuple[int, int]) -> list[torch.Tensor]:
-    for idx in range(len(tensors)):
-        tensor = tensors[idx].permute(2, 0, 1)
-        tensor = tF.resize(tensor, target_wh[::-1], antialias=True)
-        tensors[idx] = tensor.permute(1, 2, 0)
-    return tensors
-class TextureBacker:
-    """Texture baking pipeline for multi-view projection and fusion."""
-    def __init__(
-        self,
-        camera_params: CameraSetting,
-        view_weights: list[float],
-        render_wh: tuple[int, int] = (2048, 2048),
-        texture_wh: tuple[int, int] = (2048, 2048),
-        bake_angle_thresh: int = 75,
-        mask_thresh: float = 0.5,
-    ):
-        camera = init_kal_camera(camera_params)
-        mv = camera.view_matrix()  # (n 4 4) world2cam
-        p = camera.intrinsics.projection_matrix()
-        # NOTE: add a negative sign at P[0, 2] as the y axis is flipped in `nvdiffrast` output.  # noqa
-        p[:, 1, 1] = -p[:, 1, 1]
-        self.renderer = DiffrastRender(
-            p_matrix=p,
-            mv_matrix=mv,
-            resolution_hw=camera_params.resolution_hw,
-            context=dr.RasterizeCudaContext(),
-            mask_thresh=mask_thresh,
-            grad_db=False,
-            device=camera_params.device,
-            antialias_mask=True,
-        )
-        self.camera = camera
-        self.view_weights = view_weights
-        self.device = camera_params.device
-        self.render_wh = render_wh
-        self.texture_wh = texture_wh
-        self.bake_angle_thresh = bake_angle_thresh
-        self.bake_unreliable_kernel_size = int(
-            (2 / 512) * max(self.render_wh[0], self.render_wh[1])
-        )
-    def load_mesh(self, mesh: trimesh.Trimesh) -> None:
-        mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
-        self.scale, self.center = scale, center
-        vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
-        uvs[:, 1] = 1 - uvs[:, 1]
-        mesh.vertices = mesh.vertices[vmapping]
-        mesh.faces = indices
-        mesh.visual.uv = uvs
-        self.vertices = torch.from_numpy(mesh.vertices).to(self.device).float()
-        self.faces = torch.from_numpy(mesh.faces).to(self.device).to(torch.int)
-        self.uv_map = torch.from_numpy(mesh.visual.uv).to(self.device).float()
-    def get_mesh_np_attrs(
-        self,
-        scale: float = None,
-        center: np.ndarray = None,
-    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-        vertices = self.vertices.cpu().numpy()
-        faces = self.faces.cpu().numpy()
-        uv_map = self.uv_map.cpu().numpy()
-        uv_map[:, 1] = 1.0 - uv_map[:, 1]
-        if scale is not None:
-            vertices = vertices / scale
-        if center is not None:
-            vertices = vertices + center
-        return vertices, faces, uv_map
-    def _render_depth_edges(self, depth_image: torch.Tensor) -> torch.Tensor:
-        depth_image_np = depth_image.cpu().numpy()
-        depth_image_np = (depth_image_np * 255).astype(np.uint8)
-        depth_edges = cv2.Canny(depth_image_np, 30, 80)
-        sketch_image = (
-            torch.from_numpy(depth_edges).to(depth_image.device).float() / 255
-        )
-        sketch_image = sketch_image.unsqueeze(-1)
-        return sketch_image
-    def compute_enhanced_viewnormal(
-        self, mv_mtx: torch.Tensor, vertices: torch.Tensor, faces: torch.Tensor
-    ) -> torch.Tensor:
-        rast, _ = self.renderer.compute_dr_raster(vertices, faces)
-        rendered_view_normals = []
-        for idx in range(len(mv_mtx)):
-            pos_cam = transform_vertices(mv_mtx[idx], vertices, keepdim=True)
-            pos_cam = pos_cam[:, :3] / pos_cam[:, 3:]
-            v0, v1, v2 = (pos_cam[faces[:, i]] for i in range(3))
-            face_norm = F.normalize(
-                torch.cross(v1 - v0, v2 - v0, dim=-1), dim=-1
-            )
-            vertex_norm = (
-                torch.from_numpy(
-                    trimesh.geometry.mean_vertex_normals(
-                        len(pos_cam), faces.cpu(), face_norm.cpu()
-                    )
-                )
-                .to(vertices.device)
-                .contiguous()
-            )
-            im_base_normals, _ = dr.interpolate(
-                vertex_norm[None, ...].float(),
-                rast[idx : idx + 1],
-                faces.to(torch.int32),
-            )
-            rendered_view_normals.append(im_base_normals)
-        rendered_view_normals = torch.cat(rendered_view_normals, dim=0)
-        return rendered_view_normals
-    def back_project(
-        self, image, vis_mask, depth, normal, uv
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        image = np.array(image)
-        image = torch.as_tensor(image, device=self.device, dtype=torch.float32)
-        if image.ndim == 2:
-            image = image.unsqueeze(-1)
-        image = image / 255
-        depth_inv = (1.0 - depth) * vis_mask
-        sketch_image = self._render_depth_edges(depth_inv)
-        cos = F.cosine_similarity(
-            torch.tensor([[0, 0, 1]], device=self.device),
-            normal.view(-1, 3),
-        ).view_as(normal[..., :1])
-        cos[cos < np.cos(np.radians(self.bake_angle_thresh))] = 0
-        k = self.bake_unreliable_kernel_size * 2 + 1
-        kernel = torch.ones((1, 1, k, k), device=self.device)
-        vis_mask = vis_mask.permute(2, 0, 1).unsqueeze(0).float()
-        vis_mask = F.conv2d(
-            1.0 - vis_mask,
-            kernel,
-            padding=k // 2,
-        )
-        vis_mask = 1.0 - (vis_mask > 0).float()
-        vis_mask = vis_mask.squeeze(0).permute(1, 2, 0)
-        sketch_image = sketch_image.permute(2, 0, 1).unsqueeze(0)
-        sketch_image = F.conv2d(sketch_image, kernel, padding=k // 2)
-        sketch_image = (sketch_image > 0).float()
-        sketch_image = sketch_image.squeeze(0).permute(1, 2, 0)
-        vis_mask = vis_mask * (sketch_image < 0.5)
-        cos[vis_mask == 0] = 0
-        valid_pixels = (vis_mask != 0).view(-1)
-        return (
-            self._scatter_texture(uv, image, valid_pixels),
-            self._scatter_texture(uv, cos, valid_pixels),
-        )
-    def _scatter_texture(self, uv, data, mask):
-        def __filter_data(data, mask):
-            return data.view(-1, data.shape[-1])[mask]
-        return _bilinear_interpolation_scattering(
-            self.texture_wh[1],
-            self.texture_wh[0],
-            __filter_data(uv, mask)[..., [1, 0]],
-            __filter_data(data, mask),
-        )
-    @torch.no_grad()
-    def fast_bake_texture(
-        self, textures: list[torch.Tensor], confidence_maps: list[torch.Tensor]
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        channel = textures[0].shape[-1]
-        texture_merge = torch.zeros(self.texture_wh + [channel]).to(
-            self.device
-        )
-        trust_map_merge = torch.zeros(self.texture_wh + [1]).to(self.device)
-        for texture, cos_map in zip(textures, confidence_maps):
-            view_sum = (cos_map > 0).sum()
-            painted_sum = ((cos_map > 0) * (trust_map_merge > 0)).sum()
-            if painted_sum / view_sum > 0.99:
-                continue
-            texture_merge += texture * cos_map
-            trust_map_merge += cos_map
-        texture_merge = texture_merge / torch.clamp(trust_map_merge, min=1e-8)
-        return texture_merge, trust_map_merge > 1e-8
-    def uv_inpaint(
-        self, texture: torch.Tensor, mask: torch.Tensor
-    ) -> np.ndarray:
-        texture_np = texture.cpu().numpy()
-        mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
-        vertices, faces, uv_map = self.get_mesh_np_attrs()
-        texture_np, mask_np = _texture_inpaint_smooth(
-            texture_np, mask_np, vertices, faces, uv_map
-        )
-        texture_np = texture_np.clip(0, 1)
-        texture_np = cv2.inpaint(
-            (texture_np * 255).astype(np.uint8),
-            255 - mask_np,
-            3,
-            cv2.INPAINT_NS,
-        )
-        return texture_np
-    def __call__(
-        self,
-        colors: list[Image.Image],
-        mesh: trimesh.Trimesh,
-        output_path: str,
-    ) -> trimesh.Trimesh:
-        import time
-        start = time.time()
-        self.load_mesh(mesh)
-        print("load_mesh", time.time() - start)
-        start = time.time()
-        rendered_depth, masks = self.renderer.render_depth(
-            self.vertices, self.faces
-        )
-        norm_deps = self.renderer.normalize_map_by_mask(rendered_depth, masks)
-        render_uvs, _ = self.renderer.render_uv(
-            self.vertices, self.faces, self.uv_map
-        )
-        view_normals = self.compute_enhanced_viewnormal(
-            self.renderer.mv_mtx, self.vertices, self.faces
-        )
-        print("0", time.time() - start)
-        textures, weighted_cos_maps = [], []
-        start = time.time()
-        for color, mask, dep, normal, uv, weight in zip(
-            colors,
-            masks,
-            norm_deps,
-            view_normals,
-            render_uvs,
-            self.view_weights,
-        ):
-            mask, dep, normal, uv = interp_tensers([mask, dep, normal, uv], self.render_wh)
-            texture, cos_map = self.back_project(color, mask, dep, normal, uv)
-            textures.append(texture)
-            weighted_cos_maps.append(weight * (cos_map**4))
-        print("1", time.time() - start)
-        start = time.time()
-        texture, mask = self.fast_bake_texture(textures, weighted_cos_maps)
-        print("2", time.time() - start)
-        start = time.time()
-        texture_np = self.uv_inpaint(texture, mask)
-        print("3", time.time() - start)
-        start = time.time()
-        texture_np = post_process_texture(texture_np)
-        vertices, faces, uv_map = self.get_mesh_np_attrs(
-            self.scale, self.center
-        )
-        textured_mesh = save_mesh_with_mtl(
-            vertices, faces, uv_map, texture_np, output_path
-        )
-        print("4", time.time() - start)
-        return textured_mesh
-def parse_args():
-    parser = argparse.ArgumentParser(description="Backproject texture")
-    parser.add_argument(
-        "--color_path",
-        type=str,
-        help="Multiview color image in 6x512x512 file path",
-    )
-    parser.add_argument(
-        "--mesh_path",
-        type=str,
-        help="Mesh path, .obj, .glb or .ply",
-    )
-    parser.add_argument(
-        "--output_path",
-        type=str,
-        help="Output mesh path with suffix",
-    )
-    parser.add_argument(
-        "--num_images", type=int, default=6, help="Number of images to render."
-    )
-    parser.add_argument(
-        "--elevation",
-        nargs=2,
-        type=float,
-        default=[20.0, -10.0],
-        help="Elevation angles for the camera (default: [20.0, -10.0])",
-    )
-    parser.add_argument(
-        "--distance",
-        type=float,
-        default=5,
-        help="Camera distance (default: 5)",
-    )
-    parser.add_argument(
-        "--resolution_hw",
-        type=int,
-        nargs=2,
-        default=(2048, 2048),
-        help="Resolution of the mesh rendering",
-    )
-    parser.add_argument(
-        "--target_hw",
-        type=int,
-        nargs=2,
-        default=(2048, 2048),
-        help="Target rendering images resolution",
-    )
-    parser.add_argument(
-        "--fov",
-        type=float,
-        default=30,
-        help="Field of view in degrees (default: 30)",
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        choices=["cpu", "cuda"],
-        default="cuda",
-        help="Device to run on (default: `cuda`)",
-    )
-    parser.add_argument(
-        "--skip_fix_mesh", action="store_true", help="Fix mesh geometry."
-    )
-    parser.add_argument(
-        "--texture_wh",
-        nargs=2,
-        type=int,
-        default=[2048, 2048],
-        help="Texture resolution width and height",
-    )
-    parser.add_argument(
-        "--mesh_sipmlify_ratio",
-        type=float,
-        default=0.9,
-        help="Mesh simplification ratio (default: 0.9)",
-    )
-    parser.add_argument(
-        "--delight", action="store_true", help="Use delighting model."
-    )
-    args = parser.parse_args()
-    return args
-def entrypoint(
-    delight_model: DelightingModel = None,
-    imagesr_model: ImageRealESRGAN = None,
-    **kwargs,
-) -> trimesh.Trimesh:
-    args = parse_args()
-    for k, v in kwargs.items():
-        if hasattr(args, k) and v is not None:
-            setattr(args, k, v)
-    # Setup camera parameters.
-    camera_params = CameraSetting(
-        num_images=args.num_images,
-        elevation=args.elevation,
-        distance=args.distance,
-        resolution_hw=args.resolution_hw,
-        fov=math.radians(args.fov),
-        device=args.device,
-    )
-    view_weights = [1, 0.1, 0.02, 0.1, 1, 0.02]
-    color_grid = Image.open(args.color_path)
-    if args.delight:
-        if delight_model is None:
-            delight_model = DelightingModel(
-                model_path="/horizon-bucket/robot_lab/users/xinjie.wang/weights/hunyuan3d-delight-v2-0"  # noqa
-            )
-        save_dir = os.path.dirname(args.output_path)
-        os.makedirs(save_dir, exist_ok=True)
-        color_grid.save(f"{save_dir}/color_grid.png")
-        color_grid = delight_model(color_grid)
-        color_grid.save(f"{save_dir}/color_grid_delight.png")
-    multiviews = get_images_from_grid(color_grid, img_size=512)
-    # Use RealESRGAN_x4plus for x4 (512->2048) image super resolution.
-    if imagesr_model is None:
-        imagesr_model = ImageRealESRGAN(outscale=4)
-    multiviews = [imagesr_model(img.convert("RGB")) for img in multiviews]
-    multiviews = [img.resize(args.target_hw[::-1]) for img in multiviews]
-    mesh = trimesh.load(args.mesh_path)
-    if isinstance(mesh, trimesh.Scene):
-        mesh = mesh.dump(concatenate=True)
-    if not args.skip_fix_mesh:
-        mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
-        mesh_fixer = MeshFixer(mesh.vertices, mesh.faces, args.device)
-        mesh.vertices, mesh.faces = mesh_fixer(
-            filter_ratio=args.mesh_sipmlify_ratio,
-            max_hole_size=0.04,
-            resolution=1024,
-            num_views=1000,
-            norm_mesh_ratio=0.5,
-        )
-        # Restore scale.
-        mesh.vertices = mesh.vertices / scale
-        mesh.vertices = mesh.vertices + center
-    # Baking texture to mesh.
-    import time
-    start = time.time()
-    texture_backer = TextureBacker(
-        camera_params=camera_params,
-        view_weights=view_weights,
-        render_wh=args.target_hw,
-        texture_wh=args.texture_wh,
-    )
-    print(time.time()-start)
-    start = time.time()
-    textured_mesh = texture_backer(multiviews, mesh, args.output_path)
-    print(f"Texture backproject time: {time.time() - start:.2f}s")
-    return textured_mesh
-if __name__ == "__main__":
-    entrypoint()

asset3d_gen/data/backup/backproject_v2.py DELETED Viewed

@@ -1,700 +0,0 @@
-import logging
-import math
-from typing import Union
-import custom_rasterizer as cr
-import cv2
-import numpy as np
-import torch
-import torch.nn.functional as F
-import trimesh
-import xatlas
-from PIL import Image
-from asset3d_gen.data.utils import (
-    get_images_from_file,
-    normalize_vertices_array,
-    post_process_texture,
-    save_mesh_with_mtl,
-)
-logging.basicConfig(
-    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
-)
-logger = logging.getLogger(__name__)
-__all__ = ["TextureBacker", "Image_Super_Net", "Image_GANNet"]
-import math
-import numpy as np
-def get_perspective_projection(
-    fov: float, aspect_wh: float, near: float = 0.01, far: float = 100
-) -> np.ndarray:
-    """Compute the perspective projection matrix for 3D rendering."""
-    fov_rad = math.radians(fov)
-    tan_half_fov = math.tan(fov_rad / 2.0)
-    return np.array(
-        [
-            [1.0 / (tan_half_fov * aspect_wh), 0.0, 0.0, 0.0],
-            [0.0, 1.0 / tan_half_fov, 0.0, 0.0],
-            [
-                0.0,
-                0.0,
-                -(far + near) / (far - near),
-                -(2.0 * far * near) / (far - near),
-            ],
-            [0.0, 0.0, -1.0, 0.0],
-        ],
-        dtype=np.float32,
-    )
-def transform_vertices(
-    mtx: torch.Tensor, pos: torch.Tensor, keepdim: bool = False
-) -> torch.Tensor:
-    """Transform 3D vertices using a projection matrix."""
-    t_mtx = torch.as_tensor(mtx, device=pos.device, dtype=pos.dtype)
-    if pos.size(-1) == 3:
-        pos = torch.cat([pos, torch.ones_like(pos[..., :1])], dim=-1)
-    result = pos @ t_mtx.T
-    return result if keepdim else result.unsqueeze(0)
-def compute_w2c_matrix(
-    elev_deg: float, azim_deg: float, cam_dist: float
-) -> np.ndarray:
-    """Compute w2c 4x4 transformation matrix from spherical coordinates."""
-    elev_rad = math.radians(-elev_deg)
-    azim_rad = math.radians(azim_deg)
-    sin_elev = math.sin(elev_rad)
-    cos_elev = math.cos(elev_rad)
-    sin_azim = math.sin(azim_rad)
-    cos_azim = math.cos(azim_rad)
-    cam_pos = np.array(
-        [
-            cam_dist * cos_elev * cos_azim,
-            cam_dist * cos_elev * sin_azim,
-            cam_dist * sin_elev,
-        ]
-    )
-    look_dir = -cam_pos / np.linalg.norm(cam_pos)
-    right_dir = np.cross(look_dir, [0, 0, 1])
-    right_dir /= np.linalg.norm(right_dir)
-    up_dir = np.cross(right_dir, look_dir)
-    c2w = np.eye(4)
-    c2w[:3, 0] = right_dir
-    c2w[:3, 1] = up_dir
-    c2w[:3, 2] = -look_dir
-    c2w[:3, 3] = cam_pos
-    try:
-        w2c = np.linalg.inv(c2w)
-    except np.linalg.LinAlgError as e:
-        raise ArithmeticError("Failed to invert camera-to-world matrix") from e
-    return w2c.astype(np.float32)
-def _bilinear_interpolation_scattering(
-    image_h: int, image_w: int, coords: torch.Tensor, values: torch.Tensor
-) -> torch.Tensor:
-    """Bilinear interpolation scattering for grid-based value accumulation."""
-    device = values.device
-    dtype = values.dtype
-    C = values.shape[-1]
-    indices = coords * torch.tensor(
-        [image_h - 1, image_w - 1], dtype=dtype, device=device
-    )
-    i, j = indices.unbind(-1)
-    i0, j0 = (
-        indices.floor()
-        .long()
-        .clamp(0, image_h - 2)
-        .clamp(0, image_w - 2)
-        .unbind(-1)
-    )
-    i1, j1 = i0 + 1, j0 + 1
-    w_i = i - i0.float()
-    w_j = j - j0.float()
-    weights = torch.stack(
-        [(1 - w_i) * (1 - w_j), (1 - w_i) * w_j, w_i * (1 - w_j), w_i * w_j],
-        dim=1,
-    )
-    indices_comb = torch.stack(
-        [
-            torch.stack([i0, j0], dim=1),
-            torch.stack([i0, j1], dim=1),
-            torch.stack([i1, j0], dim=1),
-            torch.stack([i1, j1], dim=1),
-        ],
-        dim=1,
-    )
-    grid = torch.zeros(image_h, image_w, C, device=device, dtype=dtype)
-    cnt = torch.zeros(image_h, image_w, 1, device=device, dtype=dtype)
-    for k in range(4):
-        idx = indices_comb[:, k]
-        w = weights[:, k].unsqueeze(-1)
-        stride = torch.tensor([image_w, 1], device=device, dtype=torch.long)
-        flat_idx = (idx * stride).sum(-1)
-        grid.view(-1, C).scatter_add_(
-            0, flat_idx.unsqueeze(-1).expand(-1, C), values * w
-        )
-        cnt.view(-1, 1).scatter_add_(0, flat_idx.unsqueeze(-1), w)
-    mask = cnt.squeeze(-1) > 0
-    grid[mask] = grid[mask] / cnt[mask].repeat(1, C)
-    return grid
-def _texture_inpaint_smooth(
-    texture: np.ndarray,
-    mask: np.ndarray,
-    vertices: np.ndarray,
-    faces: np.ndarray,
-    uv_map: np.ndarray,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Perform texture inpainting using vertex-based color propagation."""
-    image_h, image_w, C = texture.shape
-    N = vertices.shape[0]
-    # Initialize vertex data structures
-    vtx_mask = np.zeros(N, dtype=np.float32)
-    vtx_colors = np.zeros((N, C), dtype=np.float32)
-    unprocessed = []
-    adjacency = [[] for _ in range(N)]
-    # Build adjacency graph and initial color assignment
-    for face_idx in range(faces.shape[0]):
-        for k in range(3):
-            uv_idx_k = faces[face_idx, k]
-            v_idx = faces[face_idx, k]
-            # Convert UV to pixel coordinates with boundary clamping
-            u = np.clip(
-                int(round(uv_map[uv_idx_k, 0] * (image_w - 1))), 0, image_w - 1
-            )
-            v = np.clip(
-                int(round((1.0 - uv_map[uv_idx_k, 1]) * (image_h - 1))),
-                0,
-                image_h - 1,
-            )
-            if mask[v, u]:
-                vtx_mask[v_idx] = 1.0
-                vtx_colors[v_idx] = texture[v, u]
-            elif v_idx not in unprocessed:
-                unprocessed.append(v_idx)
-            # Build undirected adjacency graph
-            neighbor = faces[face_idx, (k + 1) % 3]
-            if neighbor not in adjacency[v_idx]:
-                adjacency[v_idx].append(neighbor)
-            if v_idx not in adjacency[neighbor]:
-                adjacency[neighbor].append(v_idx)
-    # Color propagation with dynamic stopping
-    remaining_iters, prev_count = 2, 0
-    while remaining_iters > 0:
-        current_unprocessed = []
-        for v_idx in unprocessed:
-            valid_neighbors = [n for n in adjacency[v_idx] if vtx_mask[n] > 0]
-            if not valid_neighbors:
-                current_unprocessed.append(v_idx)
-                continue
-            # Calculate inverse square distance weights
-            neighbors_pos = vertices[valid_neighbors]
-            dist_sq = np.sum((vertices[v_idx] - neighbors_pos) ** 2, axis=1)
-            weights = 1 / np.maximum(dist_sq, 1e-8)
-            vtx_colors[v_idx] = np.average(
-                vtx_colors[valid_neighbors], weights=weights, axis=0
-            )
-            vtx_mask[v_idx] = 1.0
-        # Update iteration control
-        if len(current_unprocessed) == prev_count:
-            remaining_iters -= 1
-        else:
-            remaining_iters = min(remaining_iters + 1, 2)
-        prev_count = len(current_unprocessed)
-        unprocessed = current_unprocessed
-    # Generate output texture
-    inpainted_texture, updated_mask = texture.copy(), mask.copy()
-    for face_idx in range(faces.shape[0]):
-        for k in range(3):
-            v_idx = faces[face_idx, k]
-            if not vtx_mask[v_idx]:
-                continue
-            # UV coordinate conversion
-            uv_idx_k = faces[face_idx, k]
-            u = np.clip(
-                int(round(uv_map[uv_idx_k, 0] * (image_w - 1))), 0, image_w - 1
-            )
-            v = np.clip(
-                int(round((1.0 - uv_map[uv_idx_k, 1]) * (image_h - 1))),
-                0,
-                image_h - 1,
-            )
-            inpainted_texture[v, u] = vtx_colors[v_idx]
-            updated_mask[v, u] = 255
-    return inpainted_texture, updated_mask
-class TextureBacker:
-    """Texture baking pipeline for multi-view projection and fusion."""
-    def __init__(
-        self,
-        camera_elevs: list[float],
-        camera_azims: list[float],
-        camera_distance: int,
-        camera_fov: float,
-        view_weights: list[float] = None,
-        render_wh: tuple[int, int] = (2048, 2048),
-        texture_wh: tuple[int, int] = (2048, 2048),
-        use_antialias: bool = True,
-        bake_angle_thres: int = 75,
-        device="cuda",
-    ):
-        self.camera_elevs = camera_elevs
-        self.camera_azims = camera_azims
-        self.view_weights = (
-            view_weights
-            if view_weights is not None
-            else [1] * len(camera_elevs)
-        )
-        self.device = device
-        self.render_wh = render_wh
-        self.texture_wh = texture_wh
-        self.camera_distance = camera_distance
-        self.use_antialias = use_antialias
-        self.bake_angle_thres = bake_angle_thres
-        self.bake_unreliable_kernel_size = int(
-            (2 / 512) * max(self.render_wh[0], self.render_wh[1])
-        )
-        self.camera_proj_mat = get_perspective_projection(
-            camera_fov,
-            self.render_wh[1] / self.render_wh[0],
-        )
-        self.cnt = 0
-    def rasterize_mesh(
-        self,
-        vertex: torch.Tensor,
-        face: torch.Tensor,
-        resolution: tuple[int, int],
-    ) -> torch.Tensor:
-        vertex = vertex[None] if vertex.ndim == 2 else vertex
-        indices, weights = cr.rasterize(vertex, face, resolution)
-        return torch.cat(
-            [weights, indices.unsqueeze(-1).to(weights.dtype)], dim=-1
-        ).unsqueeze(0)
-    def raster_interpolate(
-        self, uv: torch.Tensor, rast_out: torch.Tensor, faces: torch.Tensor
-    ) -> torch.Tensor:
-        barycentric = rast_out[0, ..., :-1]
-        findices = rast_out[0, ..., -1]
-        if uv.dim() == 2:
-            uv = uv.unsqueeze(0)
-        return cr.interpolate(uv, findices, barycentric, faces)[0]
-    def load_mesh(self, mesh_path: str) -> None:
-        mesh = trimesh.load(mesh_path)
-        if isinstance(mesh, trimesh.Scene):
-            mesh = mesh.dump(concatenate=True)
-        mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
-        self.scale, self.center = scale, center
-        vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
-        mesh.vertices = mesh.vertices[vmapping]
-        mesh.faces = indices
-        mesh.visual.uv = uvs
-        self.vertices = torch.from_numpy(mesh.vertices).to(self.device).float()
-        self.faces = torch.from_numpy(mesh.faces).to(self.device).to(torch.int)
-        self.uv_map = torch.from_numpy(mesh.visual.uv).to(self.device).float()
-        # Transformation of coordinate system
-        self.vertices[:, [0, 1]] = -self.vertices[:, [0, 1]]
-        self.vertices[:, [1, 2]] = self.vertices[:, [2, 1]]
-        self.uv_map[:, 1] = 1 - self.uv_map[:, 1]
-    def get_mesh_attrs(
-        self,
-        scale: float = None,
-        center: np.ndarray = None,
-    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-        vertices = self.vertices.cpu().numpy()
-        faces = self.faces.cpu().numpy()
-        uv_map = self.uv_map.cpu().numpy()
-        # Inverse transformation of coordinate system
-        vertices[:, [1, 2]] = vertices[:, [2, 1]]
-        vertices[:, [0, 1]] = -vertices[:, [0, 1]]
-        uv_map[:, 1] = 1.0 - uv_map[:, 1]
-        if scale is not None:
-            vertices = vertices / scale
-        if center is not None:
-            vertices = vertices + center
-        return vertices, faces, uv_map
-    def _render_depth_edges(self, depth_image: torch.Tensor) -> torch.Tensor:
-        depth_image_np = depth_image.cpu().numpy()
-        depth_image_np = (depth_image_np * 255).astype(np.uint8)
-        depth_edges = cv2.Canny(depth_image_np, 30, 80)
-        combined_edges = depth_edges
-        sketch_image = (
-            torch.from_numpy(combined_edges).to(depth_image.device).float()
-            / 255
-        )
-        sketch_image = sketch_image.unsqueeze(-1)
-        return sketch_image
-    def back_project(
-        self, image: Image.Image, elev: float, azim: float
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        image = torch.as_tensor(image, device=self.device, dtype=torch.float32)
-        if image.ndim == 2:
-            image = image.unsqueeze(-1)
-        image = image / 255.0
-        view_mat = compute_w2c_matrix(elev, azim, self.camera_distance)
-        import pdb
-        pdb.set_trace()
-        pos_cam = transform_vertices(view_mat, self.vertices, keepdim=True)
-        pos_clip = transform_vertices(self.camera_proj_mat, pos_cam)
-        pos_cam = pos_cam[:, :3] / pos_cam[:, 3:]
-        v0, v1, v2 = (pos_cam[self.faces[:, i]] for i in range(3))
-        face_norm = F.normalize(torch.cross(v1 - v0, v2 - v0, dim=-1), dim=-1)
-        vertex_norm = (
-            torch.from_numpy(
-                trimesh.geometry.mean_vertex_normals(
-                    len(pos_cam), self.faces.cpu(), face_norm.cpu()
-                )
-            )
-            .to(self.device)
-            .contiguous()
-        )
-        rast_out = self.rasterize_mesh(pos_clip, self.faces, image.shape[:2])
-        vis_mask = torch.clamp(rast_out[..., -1:], 0, 1)[0]
-        interp_data = {
-            "normal": self.raster_interpolate(
-                vertex_norm[None], rast_out, self.faces
-            ),
-            "uv": self.raster_interpolate(
-                self.uv_map[None], rast_out, self.faces
-            ),
-            "depth": self.raster_interpolate(
-                pos_cam[:, 2].reshape(1, -1, 1), rast_out, self.faces
-            ),
-        }
-        valid_depth = interp_data["depth"][vis_mask > 0]
-        depth_norm = (interp_data["depth"] - valid_depth.min()) / (
-            valid_depth.max() - valid_depth.min()
-        )
-        # depth_norm[vis_mask <= 0] = 0
-        sketch_image = self._render_depth_edges(depth_norm * vis_mask)
-        # ddd = depth_norm * vis_mask
-        # cv2.imwrite(f"v2_depth_d{self.cnt}.png", (ddd.cpu().numpy() * 255).astype(np.uint8))
-        cv2.imwrite(
-            f"v2_vis_mask{self.cnt}.png",
-            (vis_mask.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_normal{self.cnt}.png",
-            (interp_data["normal"].cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_depth{self.cnt}.png",
-            (depth_norm.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_uv{self.cnt}.png",
-            (interp_data["uv"][..., 0].cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_sketch{self.cnt}.png",
-            (sketch_image.cpu().numpy() * 255).astype(np.uint8),
-        )
-        self.cnt += 1
-        cos = F.cosine_similarity(
-            torch.tensor([[0, 0, -1]], device=self.device),
-            interp_data["normal"].view(-1, 3),
-        ).view_as(interp_data["normal"][..., :1])
-        cos[cos < np.cos(np.radians(self.bake_angle_thres))] = 0
-        cv2.imwrite(
-            f"v2_cos{self.cnt}.png", (cos.cpu().numpy() * 255).astype(np.uint8)
-        )
-        k = self.bake_unreliable_kernel_size * 2 + 1
-        kernel = torch.ones((1, 1, k, k), device=self.device)
-        vis_mask = vis_mask.permute(2, 0, 1).unsqueeze(0).float()
-        vis_mask = F.conv2d(
-            1.0 - vis_mask,
-            kernel,
-            padding=k // 2,
-        )
-        vis_mask = 1.0 - (vis_mask > 0).float()
-        vis_mask = vis_mask.squeeze(0).permute(1, 2, 0)
-        sketch_image = sketch_image.permute(2, 0, 1).unsqueeze(0)
-        sketch_image = F.conv2d(sketch_image, kernel, padding=k // 2)
-        sketch_image = (sketch_image > 0).float()
-        sketch_image = sketch_image.squeeze(0).permute(1, 2, 0)
-        vis_mask = vis_mask * (sketch_image < 0.5)
-        cos[vis_mask == 0] = 0
-        vis_mask = cv2.imread(
-            f"v3_db_mask{self.cnt}.png", cv2.IMREAD_GRAYSCALE
-        )
-        vis_mask = (
-            torch.from_numpy(vis_mask[..., None]).to(self.device).float() / 255
-        )
-        # cos2 = cv2.imread(f"v3_db_cos{self.cnt}.png", cv2.IMREAD_GRAYSCALE)
-        # cos2 = torch.from_numpy(cos2[..., None]).to(self.device).float() / 255
-        # cos = cos2
-        valid_pixels = (vis_mask != 0).view(-1)
-        # import pdb; pdb.set_trace()
-        cv2.imwrite(
-            f"v2_db_sketch{self.cnt}.png",
-            (sketch_image.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_db_uv{self.cnt}.png",
-            (interp_data["uv"][..., 0].cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_db_uv2{self.cnt}.png",
-            (interp_data["uv"][..., 1].cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_db_color{self.cnt}.png",
-            (image.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_db_cos{self.cnt}.png",
-            (cos.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v2_db_mask{self.cnt}.png",
-            (vis_mask.cpu().numpy() * 255).astype(np.uint8),
-        )
-        # import pdb; pdb.set_trace()
-        return (
-            self._scatter_texture(interp_data["uv"], image, valid_pixels),
-            self._scatter_texture(interp_data["uv"], cos, valid_pixels),
-        )
-    def _scatter_texture(self, uv, data, mask):
-        def __filter_data(data, mask):
-            return data.view(-1, data.shape[-1])[mask]
-        return _bilinear_interpolation_scattering(
-            self.texture_wh[1],
-            self.texture_wh[0],
-            __filter_data(uv, mask)[..., [1, 0]],
-            __filter_data(data, mask),
-        )
-    @torch.no_grad()
-    def fast_bake_texture(
-        self, textures: list[torch.Tensor], confidence_maps: list[torch.Tensor]
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        channel = textures[0].shape[-1]
-        texture_merge = torch.zeros(self.texture_wh + (channel,)).to(
-            self.device
-        )
-        trust_map_merge = torch.zeros(self.texture_wh + (1,)).to(self.device)
-        for texture, cos_map in zip(textures, confidence_maps):
-            view_sum = (cos_map > 0).sum()
-            painted_sum = ((cos_map > 0) * (trust_map_merge > 0)).sum()
-            if painted_sum / view_sum > 0.99:
-                continue
-            texture_merge += texture * cos_map
-            trust_map_merge += cos_map
-        texture_merge = texture_merge / torch.clamp(trust_map_merge, min=1e-8)
-        return texture_merge, trust_map_merge > 1e-8
-    def uv_inpaint(
-        self, texture: torch.Tensor, mask: torch.Tensor
-    ) -> np.ndarray:
-        texture_np = texture.cpu().numpy()
-        mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
-        vertices, faces, uv_map = self.get_mesh_attrs()
-        # import pdb; pdb.set_trace()
-        texture_np, mask_np = _texture_inpaint_smooth(
-            texture_np, mask_np, vertices, faces, uv_map
-        )
-        texture_np = texture_np.clip(0, 1)
-        texture_np = cv2.inpaint(
-            (texture_np * 255).astype(np.uint8),
-            255 - mask_np,
-            3,
-            cv2.INPAINT_NS,
-        )
-        return texture_np
-    def __call__(
-        self, colors: list[Image.Image], input_mesh: str, output_path: str
-    ) -> trimesh.Trimesh:
-        self.load_mesh(input_mesh)
-        textures, weighted_cos_maps = [], []
-        for color, cam_elev, cam_azim, weight in zip(
-            colors, self.camera_elevs, self.camera_azims, self.view_weights
-        ):
-            texture, cos_map = self.back_project(color, cam_elev, cam_azim)
-            cv2.imwrite(
-                f"v2_texture{self.cnt}.png",
-                (texture.cpu().numpy() * 255).astype(np.uint8),
-            )
-            cv2.imwrite(
-                f"v2_texture_cos{self.cnt}.png",
-                (cos_map.cpu().numpy() * 255).astype(np.uint8),
-            )
-            # import pdb; pdb.set_trace()
-            textures.append(texture)
-            weighted_cos_maps.append(weight * (cos_map**4))
-        texture, mask = self.fast_bake_texture(textures, weighted_cos_maps)
-        texture_np = self.uv_inpaint(texture, mask)
-        texture_np = post_process_texture(texture_np)
-        vertices, faces, uvs = self.get_mesh_attrs(self.scale, self.center)
-        # import pdb; pdb.set_trace()
-        cv2.imwrite("v2_texture_np.png", texture_np)
-        textured_mesh = save_mesh_with_mtl(
-            vertices, faces, uvs, texture_np, output_path
-        )
-        return textured_mesh
-class Image_Super_Net:
-    def __init__(self, device="cuda"):
-        from diffusers import StableDiffusionUpscalePipeline
-        self.up_pipeline_x4 = StableDiffusionUpscalePipeline.from_pretrained(
-            "stabilityai/stable-diffusion-x4-upscaler",
-            torch_dtype=torch.float16,
-        ).to(device)
-        self.up_pipeline_x4.set_progress_bar_config(disable=True)
-    def __call__(self, image, prompt=""):
-        with torch.no_grad():
-            upscaled_image = self.up_pipeline_x4(
-                prompt=[prompt],
-                image=image,
-                num_inference_steps=10,
-            ).images[0]
-        return upscaled_image
-class Image_GANNet:
-    def __init__(self, outscale: int):
-        from basicsr.archs.rrdbnet_arch import RRDBNet
-        from realesrgan import RealESRGANer
-        self.outscale = outscale
-        model = RRDBNet(
-            num_in_ch=3,
-            num_out_ch=3,
-            num_feat=64,
-            num_block=23,
-            num_grow_ch=32,
-            scale=4,
-        )
-        self.upsampler = RealESRGANer(
-            scale=4,
-            model_path="/horizon-bucket/robot_lab/users/xinjie.wang/weights/super_resolution/RealESRGAN_x4plus.pth",  # noqa
-            model=model,
-            pre_pad=0,
-            half=True,
-        )
-    def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        output, _ = self.upsampler.enhance(image, outscale=self.outscale)
-        return Image.fromarray(output)
-if __name__ == "__main__":
-    device = "cuda"
-    color_path = "outputs/texture_mesh_gen/multi_view/color_sample0.png"
-    mesh_path = "outputs/texture_mesh_gen/texture_mesh/kettle_color.glb"
-    output_path = "robot_test_v2/robot.obj"
-    target_image_size = (2048, 2048)
-    super_model = Image_GANNet(outscale=4)
-    multiviews = get_images_from_file(color_path, img_size=512)
-    texture_backer = TextureBacker(
-        camera_elevs=[20, 20, 20, -10, -10, -10],
-        camera_azims=[-180, -60, 60, -120, 0, 120],
-        view_weights=[1, 0.2, 0.2, 0.2, 1, 0.2],
-        camera_distance=5,
-        camera_fov=30,
-        render_wh=(2048, 2048),
-        texture_wh=(2048, 2048),
-    )
-    multiviews = [super_model(img) for img in multiviews]
-    multiviews = [img.convert("RGB") for img in multiviews]
-    textured_mesh = texture_backer(multiviews, mesh_path, output_path)

asset3d_gen/data/backup/backproject_v3.py DELETED Viewed

@@ -1,866 +0,0 @@
-import logging
-import math
-from typing import Union
-import custom_rasterizer as cr
-import cv2
-import numpy as np
-import torch
-import torch.nn.functional as F
-import trimesh
-import xatlas
-from PIL import Image
-from asset3d_gen.data.utils import (
-    get_images_from_file,
-    normalize_vertices_array,
-    post_process_texture,
-    save_mesh_with_mtl,
-)
-logging.basicConfig(
-    format="%(asctime)s - %(levelname)s - %(message)s", level=logging.INFO
-)
-logger = logging.getLogger(__name__)
-__all__ = ["TextureBacker", "Image_Super_Net", "Image_GANNet"]
-import math
-import numpy as np
-def get_perspective_projection(
-    fov: float, aspect_wh: float, near: float = 0.01, far: float = 100
-) -> np.ndarray:
-    """Compute the perspective projection matrix for 3D rendering."""
-    fov_rad = math.radians(fov)
-    tan_half_fov = math.tan(fov_rad / 2.0)
-    return np.array(
-        [
-            [1.0 / (tan_half_fov * aspect_wh), 0.0, 0.0, 0.0],
-            [0.0, 1.0 / tan_half_fov, 0.0, 0.0],
-            [
-                0.0,
-                0.0,
-                -(far + near) / (far - near),
-                -(2.0 * far * near) / (far - near),
-            ],
-            [0.0, 0.0, -1.0, 0.0],
-        ],
-        dtype=np.float32,
-    )
-def transform_vertices(
-    mtx: torch.Tensor, pos: torch.Tensor, keepdim: bool = False
-) -> torch.Tensor:
-    """Transform 3D vertices using a projection matrix."""
-    t_mtx = torch.as_tensor(mtx, device=pos.device, dtype=pos.dtype)
-    if pos.size(-1) == 3:
-        pos = torch.cat([pos, torch.ones_like(pos[..., :1])], dim=-1)
-    result = pos @ t_mtx.T
-    return result if keepdim else result.unsqueeze(0)
-def compute_w2c_matrix(
-    elev_deg: float, azim_deg: float, cam_dist: float
-) -> np.ndarray:
-    """Compute w2c 4x4 transformation matrix from spherical coordinates."""
-    elev_rad = math.radians(-elev_deg)
-    azim_rad = math.radians(azim_deg)
-    sin_elev = math.sin(elev_rad)
-    cos_elev = math.cos(elev_rad)
-    sin_azim = math.sin(azim_rad)
-    cos_azim = math.cos(azim_rad)
-    cam_pos = np.array(
-        [
-            cam_dist * cos_elev * cos_azim,
-            cam_dist * cos_elev * sin_azim,
-            cam_dist * sin_elev,
-        ]
-    )
-    look_dir = -cam_pos / np.linalg.norm(cam_pos)
-    right_dir = np.cross(look_dir, [0, 0, 1])
-    right_dir /= np.linalg.norm(right_dir)
-    up_dir = np.cross(right_dir, look_dir)
-    c2w = np.eye(4)
-    c2w[:3, 0] = right_dir
-    c2w[:3, 1] = up_dir
-    c2w[:3, 2] = -look_dir
-    c2w[:3, 3] = cam_pos
-    try:
-        w2c = np.linalg.inv(c2w)
-    except np.linalg.LinAlgError as e:
-        raise ArithmeticError("Failed to invert camera-to-world matrix") from e
-    return w2c.astype(np.float32)
-def _bilinear_interpolation_scattering(
-    image_h: int, image_w: int, coords: torch.Tensor, values: torch.Tensor
-) -> torch.Tensor:
-    """Bilinear interpolation scattering for grid-based value accumulation."""
-    device = values.device
-    dtype = values.dtype
-    C = values.shape[-1]
-    indices = coords * torch.tensor(
-        [image_h - 1, image_w - 1], dtype=dtype, device=device
-    )
-    i, j = indices.unbind(-1)
-    i0, j0 = (
-        indices.floor()
-        .long()
-        .clamp(0, image_h - 2)
-        .clamp(0, image_w - 2)
-        .unbind(-1)
-    )
-    i1, j1 = i0 + 1, j0 + 1
-    w_i = i - i0.float()
-    w_j = j - j0.float()
-    weights = torch.stack(
-        [(1 - w_i) * (1 - w_j), (1 - w_i) * w_j, w_i * (1 - w_j), w_i * w_j],
-        dim=1,
-    )
-    indices_comb = torch.stack(
-        [
-            torch.stack([i0, j0], dim=1),
-            torch.stack([i0, j1], dim=1),
-            torch.stack([i1, j0], dim=1),
-            torch.stack([i1, j1], dim=1),
-        ],
-        dim=1,
-    )
-    grid = torch.zeros(image_h, image_w, C, device=device, dtype=dtype)
-    cnt = torch.zeros(image_h, image_w, 1, device=device, dtype=dtype)
-    for k in range(4):
-        idx = indices_comb[:, k]
-        w = weights[:, k].unsqueeze(-1)
-        stride = torch.tensor([image_w, 1], device=device, dtype=torch.long)
-        flat_idx = (idx * stride).sum(-1)
-        grid.view(-1, C).scatter_add_(
-            0, flat_idx.unsqueeze(-1).expand(-1, C), values * w
-        )
-        cnt.view(-1, 1).scatter_add_(0, flat_idx.unsqueeze(-1), w)
-    mask = cnt.squeeze(-1) > 0
-    grid[mask] = grid[mask] / cnt[mask].repeat(1, C)
-    return grid
-def _texture_inpaint_smooth(
-    texture: np.ndarray,
-    mask: np.ndarray,
-    vertices: np.ndarray,
-    faces: np.ndarray,
-    uv_map: np.ndarray,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Perform texture inpainting using vertex-based color propagation."""
-    image_h, image_w, C = texture.shape
-    N = vertices.shape[0]
-    # Initialize vertex data structures
-    vtx_mask = np.zeros(N, dtype=np.float32)
-    vtx_colors = np.zeros((N, C), dtype=np.float32)
-    unprocessed = []
-    adjacency = [[] for _ in range(N)]
-    # Build adjacency graph and initial color assignment
-    for face_idx in range(faces.shape[0]):
-        for k in range(3):
-            uv_idx_k = faces[face_idx, k]
-            v_idx = faces[face_idx, k]
-            # Convert UV to pixel coordinates with boundary clamping
-            u = np.clip(
-                int(round(uv_map[uv_idx_k, 0] * (image_w - 1))), 0, image_w - 1
-            )
-            v = np.clip(
-                int(round((1.0 - uv_map[uv_idx_k, 1]) * (image_h - 1))),
-                0,
-                image_h - 1,
-            )
-            if mask[v, u]:
-                vtx_mask[v_idx] = 1.0
-                vtx_colors[v_idx] = texture[v, u]
-            elif v_idx not in unprocessed:
-                unprocessed.append(v_idx)
-            # Build undirected adjacency graph
-            neighbor = faces[face_idx, (k + 1) % 3]
-            if neighbor not in adjacency[v_idx]:
-                adjacency[v_idx].append(neighbor)
-            if v_idx not in adjacency[neighbor]:
-                adjacency[neighbor].append(v_idx)
-    # Color propagation with dynamic stopping
-    remaining_iters, prev_count = 2, 0
-    while remaining_iters > 0:
-        current_unprocessed = []
-        for v_idx in unprocessed:
-            valid_neighbors = [n for n in adjacency[v_idx] if vtx_mask[n] > 0]
-            if not valid_neighbors:
-                current_unprocessed.append(v_idx)
-                continue
-            # Calculate inverse square distance weights
-            neighbors_pos = vertices[valid_neighbors]
-            dist_sq = np.sum((vertices[v_idx] - neighbors_pos) ** 2, axis=1)
-            weights = 1 / np.maximum(dist_sq, 1e-8)
-            vtx_colors[v_idx] = np.average(
-                vtx_colors[valid_neighbors], weights=weights, axis=0
-            )
-            vtx_mask[v_idx] = 1.0
-        # Update iteration control
-        if len(current_unprocessed) == prev_count:
-            remaining_iters -= 1
-        else:
-            remaining_iters = min(remaining_iters + 1, 2)
-        prev_count = len(current_unprocessed)
-        unprocessed = current_unprocessed
-    # Generate output texture
-    inpainted_texture, updated_mask = texture.copy(), mask.copy()
-    for face_idx in range(faces.shape[0]):
-        for k in range(3):
-            v_idx = faces[face_idx, k]
-            if not vtx_mask[v_idx]:
-                continue
-            # UV coordinate conversion
-            uv_idx_k = faces[face_idx, k]
-            u = np.clip(
-                int(round(uv_map[uv_idx_k, 0] * (image_w - 1))), 0, image_w - 1
-            )
-            v = np.clip(
-                int(round((1.0 - uv_map[uv_idx_k, 1]) * (image_h - 1))),
-                0,
-                image_h - 1,
-            )
-            inpainted_texture[v, u] = vtx_colors[v_idx]
-            updated_mask[v, u] = 255
-    return inpainted_texture, updated_mask
-class TextureBacker:
-    """Texture baking pipeline for multi-view projection and fusion."""
-    def __init__(
-        self,
-        camera_elevs: list[float],
-        camera_azims: list[float],
-        camera_distance: int,
-        camera_fov: float,
-        view_weights: list[float] = None,
-        render_wh: tuple[int, int] = (2048, 2048),
-        texture_wh: tuple[int, int] = (2048, 2048),
-        use_antialias: bool = True,
-        bake_angle_thresh: int = 75,
-        device="cuda",
-    ):
-        self.camera_elevs = camera_elevs
-        self.camera_azims = camera_azims
-        self.view_weights = (
-            view_weights
-            if view_weights is not None
-            else [1] * len(camera_elevs)
-        )
-        self.device = device
-        self.render_wh = render_wh
-        self.texture_wh = texture_wh
-        self.camera_distance = camera_distance
-        self.use_antialias = use_antialias
-        self.bake_angle_thresh = bake_angle_thresh
-        self.bake_unreliable_kernel_size = int(
-            (2 / 512) * max(self.render_wh[0], self.render_wh[1])
-        )
-        self.camera_proj_mat = get_perspective_projection(
-            camera_fov,
-            self.render_wh[1] / self.render_wh[0],
-        )
-        self.cnt = 0
-    def rasterize_mesh(
-        self,
-        vertex: torch.Tensor,
-        face: torch.Tensor,
-        resolution: tuple[int, int],
-    ) -> torch.Tensor:
-        vertex = vertex[None] if vertex.ndim == 2 else vertex
-        indices, weights = cr.rasterize(vertex, face, resolution)
-        return torch.cat(
-            [weights, indices.unsqueeze(-1).to(weights.dtype)], dim=-1
-        ).unsqueeze(0)
-    def raster_interpolate(
-        self, uv: torch.Tensor, rast_out: torch.Tensor, faces: torch.Tensor
-    ) -> torch.Tensor:
-        barycentric = rast_out[0, ..., :-1]
-        findices = rast_out[0, ..., -1]
-        if uv.dim() == 2:
-            uv = uv.unsqueeze(0)
-        return cr.interpolate(uv, findices, barycentric, faces)[0]
-    def load_mesh(self, mesh_path: str) -> None:
-        mesh = trimesh.load(mesh_path)
-        if isinstance(mesh, trimesh.Scene):
-            mesh = mesh.dump(concatenate=True)
-        mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
-        self.scale, self.center = scale, center
-        vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
-        mesh.vertices = mesh.vertices[vmapping]
-        mesh.faces = indices
-        mesh.visual.uv = uvs
-        self.vertices = torch.from_numpy(mesh.vertices).to(self.device).float()
-        self.faces = torch.from_numpy(mesh.faces).to(self.device).to(torch.int)
-        self.uv_map = torch.from_numpy(mesh.visual.uv).to(self.device).float()
-        # Transformation of coordinate system
-        self.vertices[:, [0, 1]] = -self.vertices[:, [0, 1]]
-        self.vertices[:, [1, 2]] = self.vertices[:, [2, 1]]
-        self.uv_map[:, 1] = 1 - self.uv_map[:, 1]
-    def get_mesh_attrs(
-        self,
-        scale: float = None,
-        center: np.ndarray = None,
-    ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
-        vertices = self.vertices.cpu().numpy()
-        faces = self.faces.cpu().numpy()
-        uv_map = self.uv_map.cpu().numpy()
-        if scale is not None:
-            vertices = vertices / scale
-        if center is not None:
-            vertices = vertices + center
-        return vertices, faces, uv_map
-    def _render_depth_edges(self, depth_image: torch.Tensor) -> torch.Tensor:
-        depth_image_np = depth_image.cpu().numpy()
-        depth_image_np = (depth_image_np * 255).astype(np.uint8)
-        depth_edges = cv2.Canny(depth_image_np, 30, 80)
-        sketch_image = (
-            torch.from_numpy(depth_edges).to(depth_image.device).float() / 255
-        )
-        sketch_image = sketch_image.unsqueeze(-1)
-        return sketch_image
-    def back_project(
-        self, image: Image.Image, elev: float, azim: float
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        image = torch.as_tensor(image, device=self.device, dtype=torch.float32)
-        if image.ndim == 2:
-            image = image.unsqueeze(-1)
-        image = image / 255.0
-        view_mat = compute_w2c_matrix(elev, azim, self.camera_distance)
-        pos_cam = transform_vertices(view_mat, self.vertices, keepdim=True)
-        pos_clip = transform_vertices(self.camera_proj_mat, pos_cam)
-        pos_cam = pos_cam[:, :3] / pos_cam[:, 3:]
-        v0, v1, v2 = (pos_cam[self.faces[:, i]] for i in range(3))
-        face_norm = F.normalize(torch.cross(v1 - v0, v2 - v0, dim=-1), dim=-1)
-        vertex_norm = (
-            torch.from_numpy(
-                trimesh.geometry.mean_vertex_normals(
-                    len(pos_cam), self.faces.cpu(), face_norm.cpu()
-                )
-            )
-            .to(self.device)
-            .contiguous()
-        )
-        rast_out = self.rasterize_mesh(pos_clip, self.faces, image.shape[:2])
-        vis_mask = torch.clamp(rast_out[..., -1:], 0, 1)[0]
-        interp_data = {
-            "normal": self.raster_interpolate(
-                vertex_norm[None], rast_out, self.faces
-            ),
-            "uv": self.raster_interpolate(
-                self.uv_map[None], rast_out, self.faces
-            ),
-            "depth": self.raster_interpolate(
-                pos_cam[:, 2].reshape(1, -1, 1), rast_out, self.faces
-            ),
-        }
-        valid_depth = interp_data["depth"][vis_mask > 0]
-        depth_norm = (interp_data["depth"] - valid_depth.min()) / (
-            valid_depth.max() - valid_depth.min()
-        )
-        depth_norm[vis_mask <= 0] = 0
-        sketch_image = self._render_depth_edges(depth_norm * vis_mask)
-        # cv2.imwrite("vis_mask.png", (vis_mask.cpu().numpy() * 255).astype(np.uint8))
-        # cv2.imwrite("normal.png", (interp_data['normal'].cpu().numpy() * 255).astype(np.uint8))
-        # cv2.imwrite("depth.png", (depth_norm.cpu().numpy() * 255).astype(np.uint8))
-        # cv2.imwrite("uv.png", (interp_data['uv'][..., 0].cpu().numpy() * 255).astype(np.uint8))
-        # import pdb; pdb.set_trace()
-        cos = F.cosine_similarity(
-            torch.tensor([[0, 0, -1]], device=self.device),
-            interp_data["normal"].view(-1, 3),
-        ).view_as(interp_data["normal"][..., :1])
-        cos[cos < np.cos(np.radians(self.bake_angle_thresh))] = 0
-        k = self.bake_unreliable_kernel_size * 2 + 1
-        kernel = torch.ones((1, 1, k, k), device=self.device)
-        vis_mask = vis_mask.permute(2, 0, 1).unsqueeze(0).float()
-        vis_mask = F.conv2d(
-            1.0 - vis_mask,
-            kernel,
-            padding=k // 2,
-        )
-        vis_mask = 1.0 - (vis_mask > 0).float()
-        vis_mask = vis_mask.squeeze(0).permute(1, 2, 0)
-        sketch_image = sketch_image.permute(2, 0, 1).unsqueeze(0)
-        sketch_image = F.conv2d(sketch_image, kernel, padding=k // 2)
-        sketch_image = (sketch_image > 0).float()
-        sketch_image = sketch_image.squeeze(0).permute(1, 2, 0)
-        vis_mask = vis_mask * (sketch_image < 0.5)
-        cos[vis_mask == 0] = 0
-        valid_pixels = (vis_mask != 0).view(-1)
-        return (
-            self._scatter_texture(interp_data["uv"], image, valid_pixels),
-            self._scatter_texture(interp_data["uv"], cos, valid_pixels),
-        )
-    def back_project2(
-        self, image, vis_mask, depth, normal, uv
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        image = torch.as_tensor(image, device=self.device, dtype=torch.float32)
-        if image.ndim == 2:
-            image = image.unsqueeze(-1)
-        image = image / 255.0
-        depth_inv = (1.0 - depth) * vis_mask
-        sketch_image = self._render_depth_edges(depth_inv)
-        cv2.imwrite(
-            f"v3_depth_inv{self.cnt}.png",
-            (depth_inv.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cos = F.cosine_similarity(
-            torch.tensor([[0, 0, 1]], device=self.device),
-            normal.view(-1, 3),
-        ).view_as(normal[..., :1])
-        cos[cos < np.cos(np.radians(self.bake_angle_thresh))] = 0
-        # import pdb; pdb.set_trace()
-        # cv2.imwrite(f"v3_cos{self.cnt}.png", (cos.cpu().numpy() * 255).astype(np.uint8))
-        # cv2.imwrite(f"v3_sketch{self.cnt}.png", (sketch_image.cpu().numpy() * 255).astype(np.uint8))
-        # cos2 = cv2.imread(f"v2_cos{self.cnt+1}.png", cv2.IMREAD_GRAYSCALE)
-        # cos2 = torch.from_numpy(cos2[..., None]).to(self.device).float() / 255
-        # cos = cos2
-        self.cnt += 1
-        k = self.bake_unreliable_kernel_size * 2 + 1
-        kernel = torch.ones((1, 1, k, k), device=self.device)
-        vis_mask = vis_mask.permute(2, 0, 1).unsqueeze(0).float()
-        vis_mask = F.conv2d(
-            1.0 - vis_mask,
-            kernel,
-            padding=k // 2,
-        )
-        vis_mask = 1.0 - (vis_mask > 0).float()
-        vis_mask = vis_mask.squeeze(0).permute(1, 2, 0)
-        sketch_image = sketch_image.permute(2, 0, 1).unsqueeze(0)
-        sketch_image = F.conv2d(sketch_image, kernel, padding=k // 2)
-        sketch_image = (sketch_image > 0).float()
-        sketch_image = sketch_image.squeeze(0).permute(1, 2, 0)
-        vis_mask = vis_mask * (sketch_image < 0.5)
-        # import pdb; pdb.set_trace()
-        cv2.imwrite(
-            f"v3_db_sketch{self.cnt}.png",
-            (sketch_image.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cos[vis_mask == 0] = 0
-        # import pdb; pdb.set_trace()
-        # vis_mask = cv2.imread(f"v2_db_mask{self.cnt}.png", cv2.IMREAD_GRAYSCALE)
-        # vis_mask = torch.from_numpy(vis_mask[..., None]).to(self.device).float() / 255
-        # cos2 = cv2.imread(f"v2_db_cos{self.cnt}.png", cv2.IMREAD_GRAYSCALE)
-        # cos2 = torch.from_numpy(cos2[..., None]).to(self.device).float() / 255
-        # cos = cos2
-        valid_pixels = (vis_mask != 0).view(-1)
-        # import pdb; pdb.set_trace()
-        cv2.imwrite(
-            f"v3_db_uv{self.cnt}.png",
-            (uv[..., 0].cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v3_db_uv2{self.cnt}.png",
-            (uv[..., 1].cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v3_db_color{self.cnt}.png",
-            (image.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v3_db_cos{self.cnt}.png",
-            (cos.cpu().numpy() * 255).astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v3_db_mask{self.cnt}.png",
-            (vis_mask.cpu().numpy() * 255).astype(np.uint8),
-        )
-        return (
-            self._scatter_texture(uv, image, valid_pixels),
-            self._scatter_texture(uv, cos, valid_pixels),
-        )
-    def _scatter_texture(self, uv, data, mask):
-        def __filter_data(data, mask):
-            return data.view(-1, data.shape[-1])[mask]
-        return _bilinear_interpolation_scattering(
-            self.texture_wh[1],
-            self.texture_wh[0],
-            __filter_data(uv, mask)[..., [1, 0]],
-            __filter_data(data, mask),
-        )
-    @torch.no_grad()
-    def fast_bake_texture(
-        self, textures: list[torch.Tensor], confidence_maps: list[torch.Tensor]
-    ) -> tuple[torch.Tensor, torch.Tensor]:
-        channel = textures[0].shape[-1]
-        texture_merge = torch.zeros(self.texture_wh + (channel,)).to(
-            self.device
-        )
-        trust_map_merge = torch.zeros(self.texture_wh + (1,)).to(self.device)
-        for texture, cos_map in zip(textures, confidence_maps):
-            view_sum = (cos_map > 0).sum()
-            painted_sum = ((cos_map > 0) * (trust_map_merge > 0)).sum()
-            if painted_sum / view_sum > 0.99:
-                continue
-            texture_merge += texture * cos_map
-            trust_map_merge += cos_map
-        texture_merge = texture_merge / torch.clamp(trust_map_merge, min=1e-8)
-        return texture_merge, trust_map_merge > 1e-8
-    def uv_inpaint(
-        self, texture: torch.Tensor, mask: torch.Tensor
-    ) -> np.ndarray:
-        texture_np = texture.cpu().numpy()
-        mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
-        vertices, faces, uv_map = self.get_mesh_attrs()
-        # import pdb; pdb.set_trace()
-        texture_np, mask_np = _texture_inpaint_smooth(
-            texture_np, mask_np, vertices, faces, uv_map
-        )
-        texture_np = texture_np.clip(0, 1)
-        texture_np = cv2.inpaint(
-            (texture_np * 255).astype(np.uint8),
-            255 - mask_np,
-            3,
-            cv2.INPAINT_NS,
-        )
-        return texture_np
-    def __call__(
-        self, colors: list[Image.Image], input_mesh: str, output_path: str
-    ) -> trimesh.Trimesh:
-        self.load_mesh(input_mesh)
-        textures, weighted_cos_maps = [], []
-        for color, cam_elev, cam_azim, weight in zip(
-            colors, self.camera_elevs, self.camera_azims, self.view_weights
-        ):
-            texture, cos_map = self.back_project(color, cam_elev, cam_azim)
-            textures.append(texture)
-            weighted_cos_maps.append(weight * (cos_map**4))
-        texture, mask = self.fast_bake_texture(textures, weighted_cos_maps)
-        texture_np = self.uv_inpaint(texture, mask)
-        texture_np = post_process_texture(texture_np)
-        vertices, faces, uv_map = self.get_mesh_attrs(self.scale, self.center)
-        # import pdb; pdb.set_trace()
-        textured_mesh = save_mesh_with_mtl(
-            vertices, faces, uv_map, texture_np, output_path
-        )
-        return textured_mesh
-    def forward(
-        self,
-        colors: list[Image.Image],
-        masks,
-        depths,
-        normals,
-        uvs,
-    ) -> trimesh.Trimesh:
-        textures, weighted_cos_maps = [], []
-        for color, mask, depth, normal, uv, weight in zip(
-            colors, masks, depths, normals, uvs, self.view_weights
-        ):
-            texture, cos_map = self.back_project2(
-                color, mask, depth, normal, uv
-            )
-            cv2.imwrite(
-                f"v3_texture{self.cnt}.png",
-                (texture.cpu().numpy() * 255).astype(np.uint8),
-            )
-            cv2.imwrite(
-                f"v3_texture_cos{self.cnt}.png",
-                (cos_map.cpu().numpy() * 255).astype(np.uint8),
-            )
-            textures.append(texture)
-            weighted_cos_maps.append(weight * (cos_map**4))
-        texture, mask = self.fast_bake_texture(textures, weighted_cos_maps)
-        texture_np = self.uv_inpaint(texture, mask)
-        texture_np = post_process_texture(texture_np)
-        vertices, faces, uv_map = self.get_mesh_attrs(self.scale, self.center)
-        # import pdb; pdb.set_trace()
-        cv2.imwrite("v3_texture_np.png", texture_np)
-        textured_mesh = save_mesh_with_mtl(
-            vertices, faces, uv_map, texture_np, output_path
-        )
-        return textured_mesh
-class Image_Super_Net:
-    def __init__(self, device="cuda"):
-        from diffusers import StableDiffusionUpscalePipeline
-        self.up_pipeline_x4 = StableDiffusionUpscalePipeline.from_pretrained(
-            "stabilityai/stable-diffusion-x4-upscaler",
-            torch_dtype=torch.float16,
-        ).to(device)
-        self.up_pipeline_x4.set_progress_bar_config(disable=True)
-    def __call__(self, image, prompt=""):
-        with torch.no_grad():
-            upscaled_image = self.up_pipeline_x4(
-                prompt=[prompt],
-                image=image,
-                num_inference_steps=10,
-            ).images[0]
-        return upscaled_image
-class Image_GANNet:
-    def __init__(self, outscale: int):
-        from basicsr.archs.rrdbnet_arch import RRDBNet
-        from realesrgan import RealESRGANer
-        self.outscale = outscale
-        model = RRDBNet(
-            num_in_ch=3,
-            num_out_ch=3,
-            num_feat=64,
-            num_block=23,
-            num_grow_ch=32,
-            scale=4,
-        )
-        self.upsampler = RealESRGANer(
-            scale=4,
-            model_path="/horizon-bucket/robot_lab/users/xinjie.wang/weights/super_resolution/RealESRGAN_x4plus.pth",  # noqa
-            model=model,
-            pre_pad=0,
-            half=True,
-        )
-    def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        output, _ = self.upsampler.enhance(image, outscale=self.outscale)
-        return Image.fromarray(output)
-if __name__ == "__main__":
-    device = "cuda"
-    color_path = "outputs/texture_mesh_gen/multi_view/color_sample0.png"
-    mesh_path = "outputs/texture_mesh_gen/texture_mesh/kettle_color.glb"
-    output_path = "robot_test_v6/robot.obj"
-    target_image_size = (2048, 2048)
-    super_model = Image_GANNet(outscale=4)
-    multiviews = get_images_from_file(color_path, img_size=512)
-    multiviews = [super_model(img) for img in multiviews]
-    multiviews = [img.convert("RGB") for img in multiviews]
-    from asset3d_gen.data.utils import (
-        CameraSetting,
-        init_kal_camera,
-        DiffrastRender,
-    )
-    import nvdiffrast.torch as dr
-    camera_params = CameraSetting(
-        num_images=6,
-        elevation=[20.0, -10.0],
-        distance=5,
-        resolution_hw=(2048, 2048),
-        fov=math.radians(30),
-        device="cuda",
-    )
-    camera = init_kal_camera(camera_params)
-    mv = camera.view_matrix()  # (n 4 4) world2cam
-    p = camera.intrinsics.projection_matrix()
-    # NOTE: add a negative sign at P[0, 2] as the y axis is flipped in `nvdiffrast` output.  # noqa
-    p[:, 1, 1] = -p[:, 1, 1]
-    renderer = DiffrastRender(
-        p_matrix=p,
-        mv_matrix=mv,
-        resolution_hw=camera_params.resolution_hw,
-        context=dr.RasterizeCudaContext(),
-        mask_thresh=0.5,
-        grad_db=False,
-        device=camera_params.device,
-        antialias_mask=True,
-    )
-    mesh = trimesh.load(mesh_path)
-    if isinstance(mesh, trimesh.Scene):
-        mesh = mesh.dump(concatenate=True)
-    mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
-    vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
-    uvs[:, 1] = 1 - uvs[:, 1]
-    mesh.vertices = mesh.vertices[vmapping]
-    mesh.faces = indices
-    mesh.visual.uv = uvs
-    vertices = torch.from_numpy(mesh.vertices).to(camera_params.device).float()
-    faces = (
-        torch.from_numpy(mesh.faces).to(camera_params.device).to(torch.int64)
-    )
-    uvs = torch.from_numpy(mesh.visual.uv).to(camera_params.device).float()
-    rendered_view_normals = []
-    rast, vertices_clip = renderer.compute_dr_raster(vertices, faces)
-    for idx in range(len(mv)):
-        pos_cam = transform_vertices(mv[idx], vertices, keepdim=True)
-        pos_cam = pos_cam[:, :3] / pos_cam[:, 3:]
-        v0, v1, v2 = (pos_cam[faces[:, i]] for i in range(3))
-        face_norm = F.normalize(torch.cross(v1 - v0, v2 - v0, dim=-1), dim=-1)
-        vertex_norm = (
-            torch.from_numpy(
-                trimesh.geometry.mean_vertex_normals(
-                    len(pos_cam), faces.cpu(), face_norm.cpu()
-                )
-            )
-            .to(camera_params.device)
-            .contiguous()
-        )
-        im_base_normals, _ = dr.interpolate(
-            vertex_norm[None, ...].float(),
-            rast[idx : idx + 1],
-            faces.to(torch.int32),
-        )
-        rendered_view_normals.append(im_base_normals)
-    rendered_view_normals = torch.cat(rendered_view_normals, dim=0)
-    rendered_depth, masks = renderer.render_depth(vertices, faces)
-    norm_depths = []
-    for idx in range(len(rendered_depth)):
-        norm_depth = renderer.normalize_map_by_mask(
-            rendered_depth[idx : idx + 1], masks[idx : idx + 1]
-        )
-        norm_depths.append(norm_depth)
-    norm_depths = torch.cat(norm_depths, dim=0)
-    render_uvs, _ = renderer.render_uv(vertices, faces, uvs)
-    for index in range(6):
-        cv2.imwrite(
-            f"v3_mask{index}.png",
-            (masks[index] * 255).cpu().numpy().astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v3_normalv2{index}.png",
-            (rendered_view_normals[index] * 255)
-            .cpu()
-            .numpy()
-            .astype(np.uint8)[..., ::-1],
-        )
-        cv2.imwrite(
-            f"v3_depth{index}.png",
-            (norm_depths[index] * 255).cpu().numpy().astype(np.uint8),
-        )
-        cv2.imwrite(
-            f"v3_uv{index}.png",
-            (render_uvs[index, ..., 0] * 255).cpu().numpy().astype(np.uint8),
-        )
-        multiviews[index].save(f"v3_color{index}.png")
-    texture_backer = TextureBacker(
-        camera_elevs=[20, 20, 20, -10, -10, -10],
-        camera_azims=[-180, -60, 60, -120, 0, 120],
-        view_weights=[1, 0.2, 0.2, 0.2, 1, 0.2],
-        camera_distance=5,
-        camera_fov=30,
-        render_wh=(2048, 2048),
-        texture_wh=(2048, 2048),
-    )
-    texture_backer.vertices = vertices
-    texture_backer.faces = faces
-    uvs[:, 1] = 1.0 - uvs[:, 1]
-    texture_backer.uv_map = uvs
-    texture_backer.center = center
-    texture_backer.scale = scale
-    textured_mesh = texture_backer.forward(
-        multiviews, masks, norm_depths, rendered_view_normals, render_uvs
-    )
-    # multiviews = [super_model(img) for img in multiviews]
-    # multiviews = [img.convert("RGB") for img in multiviews]
-    # textured_mesh = texture_backer(multiviews, mesh_path, output_path)

asset3d_gen/data/backup/backprojectv2.py DELETED Viewed

@@ -1,835 +0,0 @@
-from PIL import Image
-import torch
-import torch.nn.functional as F
-import numpy as np
-import math
-import trimesh
-import cv2
-import xatlas
-from typing import Union
-def get_perspective_projection_matrix(fovy, aspect_wh, near, far):
-    fovy_rad = math.radians(fovy)
-    return np.array(
-        [
-            [1.0 / (math.tan(fovy_rad / 2.0) * aspect_wh), 0, 0, 0],
-            [0, 1.0 / math.tan(fovy_rad / 2.0), 0, 0],
-            [
-                0,
-                0,
-                -(far + near) / (far - near),
-                -2.0 * far * near / (far - near),
-            ],
-            [0, 0, -1, 0],
-        ]
-    ).astype(np.float32)
-def load_mesh(mesh):
-    vtx_pos = mesh.vertices if hasattr(mesh, "vertices") else None
-    pos_idx = mesh.faces if hasattr(mesh, "faces") else None
-    vtx_uv = mesh.visual.uv if hasattr(mesh.visual, "uv") else None
-    uv_idx = mesh.faces if hasattr(mesh, "faces") else None
-    texture_data = None
-    return vtx_pos, pos_idx, vtx_uv, uv_idx, texture_data
-def save_mesh(mesh, texture_data):
-    material = trimesh.visual.texture.SimpleMaterial(
-        image=texture_data, diffuse=(255, 255, 255)
-    )
-    texture_visuals = trimesh.visual.TextureVisuals(
-        uv=mesh.visual.uv, image=texture_data, material=material
-    )
-    mesh.visual = texture_visuals
-    return mesh
-def transform_pos(mtx, pos, keepdim=False):
-    t_mtx = (
-        torch.from_numpy(mtx).to(pos.device)
-        if isinstance(mtx, np.ndarray)
-        else mtx
-    )
-    if pos.shape[-1] == 3:
-        posw = torch.cat(
-            [pos, torch.ones([pos.shape[0], 1]).to(pos.device)], axis=1
-        )
-    else:
-        posw = pos
-    if keepdim:
-        return torch.matmul(posw, t_mtx.t())[...]
-    else:
-        return torch.matmul(posw, t_mtx.t())[None, ...]
-def get_mv_matrix(elev, azim, camera_distance, center=None):
-    elev = -elev
-    elev_rad = math.radians(elev)
-    azim_rad = math.radians(azim)
-    camera_position = np.array(
-        [
-            camera_distance * math.cos(elev_rad) * math.cos(azim_rad),
-            camera_distance * math.cos(elev_rad) * math.sin(azim_rad),
-            camera_distance * math.sin(elev_rad),
-        ]
-    )
-    if center is None:
-        center = np.array([0, 0, 0])
-    else:
-        center = np.array(center)
-    lookat = center - camera_position
-    lookat = lookat / np.linalg.norm(lookat)
-    up = np.array([0, 0, 1.0])
-    right = np.cross(lookat, up)
-    right = right / np.linalg.norm(right)
-    up = np.cross(right, lookat)
-    up = up / np.linalg.norm(up)
-    c2w = np.concatenate(
-        [np.stack([right, up, -lookat], axis=-1), camera_position[:, None]],
-        axis=-1,
-    )
-    w2c = np.zeros((4, 4))
-    w2c[:3, :3] = np.transpose(c2w[:3, :3], (1, 0))
-    w2c[:3, 3:] = -np.matmul(np.transpose(c2w[:3, :3], (1, 0)), c2w[:3, 3:])
-    w2c[3, 3] = 1.0
-    return w2c.astype(np.float32)
-def stride_from_shape(shape):
-    stride = [1]
-    for x in reversed(shape[1:]):
-        stride.append(stride[-1] * x)
-    return list(reversed(stride))
-def scatter_add_nd_with_count(input, count, indices, values, weights=None):
-    # input: [..., C], D dimension + C channel
-    # count: [..., 1], D dimension
-    # indices: [N, D], long
-    # values: [N, C]
-    D = indices.shape[-1]
-    C = input.shape[-1]
-    size = input.shape[:-1]
-    stride = stride_from_shape(size)
-    assert len(size) == D
-    input = input.view(-1, C)  # [HW, C]
-    count = count.view(-1, 1)
-    flatten_indices = (
-        indices * torch.tensor(stride, dtype=torch.long, device=indices.device)
-    ).sum(
-        -1
-    )  # [N]
-    if weights is None:
-        weights = torch.ones_like(values[..., :1])
-    input.scatter_add_(0, flatten_indices.unsqueeze(1).repeat(1, C), values)
-    count.scatter_add_(0, flatten_indices.unsqueeze(1), weights)
-    return input.view(*size, C), count.view(*size, 1)
-def linear_grid_put_2d(H, W, coords, values, return_count=False):
-    # coords: [N, 2], float in [0, 1]
-    # values: [N, C]
-    C = values.shape[-1]
-    indices = coords * torch.tensor(
-        [H - 1, W - 1], dtype=torch.float32, device=coords.device
-    )
-    indices_00 = indices.floor().long()  # [N, 2]
-    indices_00[:, 0].clamp_(0, H - 2)
-    indices_00[:, 1].clamp_(0, W - 2)
-    indices_01 = indices_00 + torch.tensor(
-        [0, 1], dtype=torch.long, device=indices.device
-    )
-    indices_10 = indices_00 + torch.tensor(
-        [1, 0], dtype=torch.long, device=indices.device
-    )
-    indices_11 = indices_00 + torch.tensor(
-        [1, 1], dtype=torch.long, device=indices.device
-    )
-    h = indices[..., 0] - indices_00[..., 0].float()
-    w = indices[..., 1] - indices_00[..., 1].float()
-    w_00 = (1 - h) * (1 - w)
-    w_01 = (1 - h) * w
-    w_10 = h * (1 - w)
-    w_11 = h * w
-    result = torch.zeros(
-        H, W, C, device=values.device, dtype=values.dtype
-    )  # [H, W, C]
-    count = torch.zeros(
-        H, W, 1, device=values.device, dtype=values.dtype
-    )  # [H, W, 1]
-    weights = torch.ones_like(values[..., :1])  # [N, 1]
-    result, count = scatter_add_nd_with_count(
-        result,
-        count,
-        indices_00,
-        values * w_00.unsqueeze(1),
-        weights * w_00.unsqueeze(1),
-    )
-    result, count = scatter_add_nd_with_count(
-        result,
-        count,
-        indices_01,
-        values * w_01.unsqueeze(1),
-        weights * w_01.unsqueeze(1),
-    )
-    result, count = scatter_add_nd_with_count(
-        result,
-        count,
-        indices_10,
-        values * w_10.unsqueeze(1),
-        weights * w_10.unsqueeze(1),
-    )
-    result, count = scatter_add_nd_with_count(
-        result,
-        count,
-        indices_11,
-        values * w_11.unsqueeze(1),
-        weights * w_11.unsqueeze(1),
-    )
-    if return_count:
-        return result, count
-    mask = count.squeeze(-1) > 0
-    result[mask] = result[mask] / count[mask].repeat(1, C)
-    return result
-def meshVerticeInpaint_smooth(texture, mask, vtx_pos, vtx_uv, pos_idx, uv_idx):
-    texture_height, texture_width, texture_channel = texture.shape
-    vtx_num = vtx_pos.shape[0]
-    vtx_mask = np.zeros(vtx_num, dtype=np.float32)
-    vtx_color = [
-        np.zeros(texture_channel, dtype=np.float32) for _ in range(vtx_num)
-    ]
-    uncolored_vtxs = []
-    G = [[] for _ in range(vtx_num)]
-    for i in range(uv_idx.shape[0]):
-        for k in range(3):
-            vtx_uv_idx = uv_idx[i, k]
-            vtx_idx = pos_idx[i, k]
-            uv_v = int(round(vtx_uv[vtx_uv_idx, 0] * (texture_width - 1)))
-            uv_u = int(
-                round((1.0 - vtx_uv[vtx_uv_idx, 1]) * (texture_height - 1))
-            )
-            if mask[uv_u, uv_v] > 0:
-                vtx_mask[vtx_idx] = 1.0
-                vtx_color[vtx_idx] = texture[uv_u, uv_v]
-            else:
-                uncolored_vtxs.append(vtx_idx)
-            G[pos_idx[i, k]].append(pos_idx[i, (k + 1) % 3])
-    smooth_count = 2
-    last_uncolored_vtx_count = 0
-    while smooth_count > 0:
-        uncolored_vtx_count = 0
-        for vtx_idx in uncolored_vtxs:
-            sum_color = np.zeros(texture_channel, dtype=np.float32)
-            total_weight = 0.0
-            vtx_0 = vtx_pos[vtx_idx]
-            for connected_idx in G[vtx_idx]:
-                if vtx_mask[connected_idx] > 0:
-                    vtx1 = vtx_pos[connected_idx]
-                    dist = np.sqrt(np.sum((vtx_0 - vtx1) ** 2))
-                    dist_weight = 1.0 / max(dist, 1e-4)
-                    dist_weight *= dist_weight
-                    sum_color += vtx_color[connected_idx] * dist_weight
-                    total_weight += dist_weight
-            if total_weight > 0:
-                vtx_color[vtx_idx] = sum_color / total_weight
-                vtx_mask[vtx_idx] = 1.0
-            else:
-                uncolored_vtx_count += 1
-        if last_uncolored_vtx_count == uncolored_vtx_count:
-            smooth_count -= 1
-        else:
-            smooth_count += 1
-        last_uncolored_vtx_count = uncolored_vtx_count
-    new_texture = texture.copy()
-    new_mask = mask.copy()
-    for face_idx in range(uv_idx.shape[0]):
-        for k in range(3):
-            vtx_uv_idx = uv_idx[face_idx, k]
-            vtx_idx = pos_idx[face_idx, k]
-            if vtx_mask[vtx_idx] == 1.0:
-                uv_v = int(round(vtx_uv[vtx_uv_idx, 0] * (texture_width - 1)))
-                uv_u = int(
-                    round((1.0 - vtx_uv[vtx_uv_idx, 1]) * (texture_height - 1))
-                )
-                new_texture[uv_u, uv_v] = vtx_color[vtx_idx]
-                new_mask[uv_u, uv_v] = 255
-    return new_texture, new_mask
-def mesh_uv_wrap(mesh):
-    if isinstance(mesh, trimesh.Scene):
-        mesh = mesh.dump(concatenate=True)
-    if len(mesh.faces) > 500000000:
-        raise ValueError(
-            "The mesh has more than 500,000,000 faces, which is not supported."
-        )
-    vmapping, indices, uvs = xatlas.parametrize(mesh.vertices, mesh.faces)
-    mesh.vertices = mesh.vertices[vmapping]
-    mesh.faces = indices
-    mesh.visual.uv = uvs
-    return mesh
-class MeshRender:
-    def __init__(
-        self,
-        camera_distance=1.45,
-        default_resolution=1024,
-        texture_size=1024,
-        use_antialias=True,
-        max_mip_level=None,
-        filter_mode="linear",
-        bake_mode="linear",
-        raster_mode="cr",
-        device="cuda",
-    ):
-        self.device = device
-        self.set_default_render_resolution(default_resolution)
-        self.set_default_texture_resolution(texture_size)
-        self.camera_distance = camera_distance
-        self.use_antialias = use_antialias
-        self.max_mip_level = max_mip_level
-        self.filter_mode = filter_mode
-        self.bake_angle_thres = 75
-        self.bake_unreliable_kernel_size = int(
-            (2 / 512)
-            * max(self.default_resolution[0], self.default_resolution[1])
-        )
-        self.bake_mode = bake_mode
-        self.raster_mode = raster_mode
-        if self.raster_mode == "cr":
-            import custom_rasterizer as cr
-            self.raster = cr
-        else:
-            raise f"No raster named {self.raster_mode}"
-        fov = 30
-        self.camera_proj_mat = get_perspective_projection_matrix(
-            fov,
-            self.default_resolution[1] / self.default_resolution[0],
-            0.01,
-            100.0,
-        )
-    def raster_rasterize(
-        self, pos, tri, resolution, ranges=None, grad_db=True
-    ):
-        if self.raster_mode == "cr":
-            rast_out_db = None
-            if pos.dim() == 2:
-                pos = pos.unsqueeze(0)
-            findices, barycentric = self.raster.rasterize(pos, tri, resolution)
-            rast_out = torch.cat((barycentric, findices.unsqueeze(-1)), dim=-1)
-            rast_out = rast_out.unsqueeze(0)
-        else:
-            raise f"No raster named {self.raster_mode}"
-        return rast_out, rast_out_db
-    def raster_interpolate(
-        self, uv, rast_out, uv_idx, rast_db=None, diff_attrs=None
-    ):
-        if self.raster_mode == "cr":
-            textd = None
-            barycentric = rast_out[0, ..., :-1]
-            findices = rast_out[0, ..., -1]
-            if uv.dim() == 2:
-                uv = uv.unsqueeze(0)
-            textc = self.raster.interpolate(uv, findices, barycentric, uv_idx)
-        else:
-            raise f"No raster named {self.raster_mode}"
-        return textc, textd
-    def load_mesh(
-        self,
-        mesh,
-    ):
-        vtx_pos, pos_idx, vtx_uv, uv_idx, texture_data = load_mesh(mesh)
-        self.mesh_copy = mesh
-        self.set_mesh(
-            vtx_pos,
-            pos_idx,
-            vtx_uv=vtx_uv,
-            uv_idx=uv_idx,
-        )
-        if texture_data is not None:
-            self.set_texture(texture_data)
-    def save_mesh(self):
-        texture_data = self.get_texture()
-        texture_data = Image.fromarray((texture_data * 255).astype(np.uint8))
-        return save_mesh(self.mesh_copy, texture_data)
-    def set_mesh(
-        self,
-        vtx_pos,
-        pos_idx,
-        vtx_uv=None,
-        uv_idx=None,
-    ):
-        self.vtx_pos = torch.from_numpy(vtx_pos).to(self.device).float()
-        self.pos_idx = torch.from_numpy(pos_idx).to(self.device).to(torch.int)
-        if (vtx_uv is not None) and (uv_idx is not None):
-            self.vtx_uv = torch.from_numpy(vtx_uv).to(self.device).float()
-            self.uv_idx = (
-                torch.from_numpy(uv_idx).to(self.device).to(torch.int)
-            )
-        else:
-            self.vtx_uv = None
-            self.uv_idx = None
-        self.vtx_pos[:, [0, 1]] = -self.vtx_pos[:, [0, 1]]
-        self.vtx_pos[:, [1, 2]] = self.vtx_pos[:, [2, 1]]
-        if (vtx_uv is not None) and (uv_idx is not None):
-            self.vtx_uv[:, 1] = 1.0 - self.vtx_uv[:, 1]
-    def set_texture(self, tex):
-        if isinstance(tex, np.ndarray):
-            tex = Image.fromarray((tex * 255).astype(np.uint8))
-        elif isinstance(tex, torch.Tensor):
-            tex = tex.cpu().numpy()
-            tex = Image.fromarray((tex * 255).astype(np.uint8))
-        tex = tex.resize(self.texture_size).convert("RGB")
-        tex = np.array(tex) / 255.0
-        self.tex = torch.from_numpy(tex).to(self.device)
-        self.tex = self.tex.float()
-    def set_default_render_resolution(self, default_resolution):
-        if isinstance(default_resolution, int):
-            default_resolution = (default_resolution, default_resolution)
-        self.default_resolution = default_resolution
-    def set_default_texture_resolution(self, texture_size):
-        if isinstance(texture_size, int):
-            texture_size = (texture_size, texture_size)
-        self.texture_size = texture_size
-    def get_mesh(self):
-        vtx_pos = self.vtx_pos.cpu().numpy()
-        pos_idx = self.pos_idx.cpu().numpy()
-        vtx_uv = self.vtx_uv.cpu().numpy()
-        uv_idx = self.uv_idx.cpu().numpy()
-        # 坐标变换的逆变换
-        vtx_pos[:, [1, 2]] = vtx_pos[:, [2, 1]]
-        vtx_pos[:, [0, 1]] = -vtx_pos[:, [0, 1]]
-        vtx_uv[:, 1] = 1.0 - vtx_uv[:, 1]
-        return vtx_pos, pos_idx, vtx_uv, uv_idx
-    def get_texture(self):
-        return self.tex.cpu().numpy()
-    def render_sketch_from_depth(self, depth_image):
-        depth_image_np = depth_image.cpu().numpy()
-        depth_image_np = (depth_image_np * 255).astype(np.uint8)
-        depth_edges = cv2.Canny(depth_image_np, 30, 80)
-        combined_edges = depth_edges
-        sketch_image = (
-            torch.from_numpy(combined_edges).to(depth_image.device).float()
-            / 255.0
-        )
-        sketch_image = sketch_image.unsqueeze(-1)
-        return sketch_image
-    def back_project(
-        self, image, elev, azim, camera_distance=None, center=None, method=None
-    ):
-        if isinstance(image, Image.Image):
-            image = torch.tensor(np.array(image) / 255.0)
-        elif isinstance(image, np.ndarray):
-            image = torch.tensor(image)
-        if image.dim() == 2:
-            image = image.unsqueeze(-1)
-        image = image.float().to(self.device)
-        resolution = image.shape[:2]
-        channel = image.shape[-1]
-        texture = torch.zeros(self.texture_size + (channel,)).to(self.device)
-        cos_map = torch.zeros(self.texture_size + (1,)).to(self.device)
-        proj = self.camera_proj_mat
-        r_mv = get_mv_matrix(
-            elev=elev,
-            azim=azim,
-            camera_distance=(
-                self.camera_distance
-                if camera_distance is None
-                else camera_distance
-            ),
-            center=center,
-        )
-        pos_camera = transform_pos(r_mv, self.vtx_pos, keepdim=True)
-        pos_clip = transform_pos(proj, pos_camera)
-        pos_camera = pos_camera[:, :3] / pos_camera[:, 3:4]
-        v0 = pos_camera[self.pos_idx[:, 0], :]
-        v1 = pos_camera[self.pos_idx[:, 1], :]
-        v2 = pos_camera[self.pos_idx[:, 2], :]
-        face_normals = F.normalize(
-            torch.cross(v1 - v0, v2 - v0, dim=-1), dim=-1
-        )
-        vertex_normals = trimesh.geometry.mean_vertex_normals(
-            vertex_count=self.vtx_pos.shape[0],
-            faces=self.pos_idx.cpu(),
-            face_normals=face_normals.cpu(),
-        )
-        vertex_normals = (
-            torch.from_numpy(vertex_normals)
-            .float()
-            .to(self.device)
-            .contiguous()
-        )
-        tex_depth = pos_camera[:, 2].reshape(1, -1, 1).contiguous()
-        rast_out, rast_out_db = self.raster_rasterize(
-            pos_clip, self.pos_idx, resolution=resolution
-        )
-        visible_mask = torch.clamp(rast_out[..., -1:], 0, 1)[0, ...]
-        normal, _ = self.raster_interpolate(
-            vertex_normals[None, ...], rast_out, self.pos_idx
-        )
-        normal = normal[0, ...]
-        uv, _ = self.raster_interpolate(
-            self.vtx_uv[None, ...], rast_out, self.uv_idx
-        )
-        depth, _ = self.raster_interpolate(tex_depth, rast_out, self.pos_idx)
-        depth = depth[0, ...]
-        depth_max, depth_min = (
-            depth[visible_mask > 0].max(),
-            depth[visible_mask > 0].min(),
-        )
-        depth_normalized = (depth - depth_min) / (depth_max - depth_min)
-        depth_image = depth_normalized * visible_mask  # Mask out background.
-        sketch_image = self.render_sketch_from_depth(depth_image)
-        cv2.imwrite("d_depth.png", depth_image.cpu().numpy() * 255)
-        cv2.imwrite("d_normal.png", normal.cpu().numpy() * 255)
-        cv2.imwrite(
-            "d_image.png", image.cpu().numpy()[..., :3][..., ::-1] * 255
-        )
-        cv2.imwrite("d_sketch_image.png", sketch_image.cpu().numpy() * 255)
-        cv2.imwrite("d_uv1.png", uv.cpu().numpy()[0, ..., 0] * 255)
-        cv2.imwrite("d_uv2.png", uv.cpu().numpy()[0, ..., 1] * 255)
-        # p uv[0,...,0].mean(axis=0)
-        # import pdb; pdb.set_trace()
-        # depth_image = None
-        # normal = None
-        # image = None
-        sketch_image = self.render_sketch_from_depth(depth_image)
-        channel = image.shape[-1]
-        lookat = torch.tensor([[0, 0, -1]], device=self.device)
-        cos_image = torch.nn.functional.cosine_similarity(
-            lookat, normal.view(-1, 3)
-        )
-        cos_image = cos_image.view(normal.shape[0], normal.shape[1], 1)
-        cos_thres = np.cos(self.bake_angle_thres / 180 * np.pi)
-        cos_image[cos_image < cos_thres] = 0
-        # shrink
-        kernel_size = self.bake_unreliable_kernel_size * 2 + 1
-        kernel = torch.ones(
-            (1, 1, kernel_size, kernel_size), dtype=torch.float32
-        ).to(sketch_image.device)
-        visible_mask = visible_mask.permute(2, 0, 1).unsqueeze(0).float()
-        visible_mask = F.conv2d(
-            1.0 - visible_mask, kernel, padding=kernel_size // 2
-        )
-        visible_mask = 1.0 - (visible_mask > 0).float()  # 二值化
-        visible_mask = visible_mask.squeeze(0).permute(1, 2, 0)
-        sketch_image = sketch_image.permute(2, 0, 1).unsqueeze(0)
-        sketch_image = F.conv2d(sketch_image, kernel, padding=kernel_size // 2)
-        sketch_image = (sketch_image > 0).float()  # 二值化
-        sketch_image = sketch_image.squeeze(0).permute(1, 2, 0)
-        visible_mask = visible_mask * (sketch_image < 0.5)
-        cos_image[visible_mask == 0] = 0
-        proj_mask = (visible_mask != 0).view(-1)
-        uv = uv.squeeze(0).contiguous().view(-1, 2)[proj_mask]
-        image = image.squeeze(0).contiguous().view(-1, channel)[proj_mask]
-        cos_image = cos_image.contiguous().view(-1, 1)[proj_mask]
-        sketch_image = sketch_image.contiguous().view(-1, 1)[proj_mask]
-        import pdb
-        pdb.set_trace()
-        texture = linear_grid_put_2d(
-            self.texture_size[1], self.texture_size[0], uv[..., [1, 0]], image
-        )
-        cos_map = linear_grid_put_2d(
-            self.texture_size[1],
-            self.texture_size[0],
-            uv[..., [1, 0]],
-            cos_image,
-        )
-        boundary_map = linear_grid_put_2d(
-            self.texture_size[1],
-            self.texture_size[0],
-            uv[..., [1, 0]],
-            sketch_image,
-        )
-        return texture, cos_map, boundary_map
-    @torch.no_grad()
-    def fast_bake_texture(self, textures, cos_maps):
-        channel = textures[0].shape[-1]
-        texture_merge = torch.zeros(self.texture_size + (channel,)).to(
-            self.device
-        )
-        trust_map_merge = torch.zeros(self.texture_size + (1,)).to(self.device)
-        for texture, cos_map in zip(textures, cos_maps):
-            view_sum = (cos_map > 0).sum()
-            painted_sum = ((cos_map > 0) * (trust_map_merge > 0)).sum()
-            if painted_sum / view_sum > 0.99:
-                continue
-            texture_merge += texture * cos_map
-            trust_map_merge += cos_map
-        texture_merge = texture_merge / torch.clamp(trust_map_merge, min=1e-8)
-        return texture_merge, trust_map_merge > 1e-8
-    def uv_inpaint(self, texture, mask):
-        if isinstance(texture, torch.Tensor):
-            texture_np = texture.cpu().numpy()
-        elif isinstance(texture, np.ndarray):
-            texture_np = texture
-        elif isinstance(texture, Image.Image):
-            texture_np = np.array(texture) / 255.0
-        vtx_pos, pos_idx, vtx_uv, uv_idx = self.get_mesh()
-        texture_np, mask = meshVerticeInpaint_smooth(
-            texture_np, mask, vtx_pos, vtx_uv, pos_idx, uv_idx
-        )
-        texture_np = cv2.inpaint(
-            (texture_np * 255).astype(np.uint8), 255 - mask, 3, cv2.INPAINT_NS
-        )
-        return texture_np
-def get_images_from_file(img_path: str, img_size: int) -> list[np.array]:
-    input_image = Image.open(img_path)
-    view_images = np.array(input_image)
-    view_images = np.concatenate(
-        [view_images[:img_size, ...], view_images[img_size:, ...]], axis=1
-    )
-    images = np.split(view_images, view_images.shape[1] // img_size, axis=1)
-    return images
-def bake_from_multiview(
-    render, views, camera_elevs, camera_azims, view_weights, method="fast"
-):
-    project_textures, project_weighted_cos_maps = [], []
-    project_boundary_maps = []
-    for view, camera_elev, camera_azim, weight in zip(
-        views, camera_elevs, camera_azims, view_weights
-    ):
-        project_texture, project_cos_map, project_boundary_map = (
-            render.back_project(view, camera_elev, camera_azim)
-        )
-        project_cos_map = weight * (project_cos_map**4)
-        project_textures.append(project_texture)
-        project_weighted_cos_maps.append(project_cos_map)
-        project_boundary_maps.append(project_boundary_map)
-    if method == "fast":
-        texture, ori_trust_map = render.fast_bake_texture(
-            project_textures, project_weighted_cos_maps
-        )
-    else:
-        raise f"no method {method}"
-    return texture, ori_trust_map > 1e-8
-def post_process(texture: np.ndarray, iter: int = 2) -> np.ndarray:
-    for _ in range(iter):
-        texture = cv2.fastNlMeansDenoisingColored(texture, None, 11, 11, 9, 25)
-        texture = cv2.bilateralFilter(
-            texture, d=7, sigmaColor=80, sigmaSpace=80
-        )
-    return texture
-class Image_Super_Net:
-    def __init__(self, device="cuda"):
-        from diffusers import StableDiffusionUpscalePipeline
-        self.up_pipeline_x4 = StableDiffusionUpscalePipeline.from_pretrained(
-            "stabilityai/stable-diffusion-x4-upscaler",
-            torch_dtype=torch.float16,
-        ).to(device)
-        self.up_pipeline_x4.set_progress_bar_config(disable=True)
-    def __call__(self, image, prompt=""):
-        with torch.no_grad():
-            upscaled_image = self.up_pipeline_x4(
-                prompt=[prompt],
-                image=image,
-                num_inference_steps=10,
-            ).images[0]
-        return upscaled_image
-class Image_GANNet:
-    def __init__(self, outscale: int):
-        from realesrgan import RealESRGANer
-        from basicsr.archs.rrdbnet_arch import RRDBNet
-        self.outscale = outscale
-        model = RRDBNet(
-            num_in_ch=3,
-            num_out_ch=3,
-            num_feat=64,
-            num_block=23,
-            num_grow_ch=32,
-            scale=4,
-        )
-        self.upsampler = RealESRGANer(
-            scale=4,
-            model_path="/home/users/xinjie.wang/xinjie/Real-ESRGAN/weights/RealESRGAN_x4plus.pth",
-            model=model,
-            pre_pad=0,
-            half=True,
-        )
-    def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
-        if isinstance(image, Image.Image):
-            image = np.array(image)
-        output, _ = self.upsampler.enhance(image, outscale=self.outscale)
-        return Image.fromarray(output)
-if __name__ == "__main__":
-    device = "cuda"
-    # super_model = Image_Super_Net(device)
-    super_model = Image_GANNet(outscale=4)
-    selected_camera_elevs = [20, 20, 20, -10, -10, -10]
-    selected_camera_azims = [-180, -60, 60, -120, 0, 120]
-    selected_view_weights = [1, 0.2, 0.2, 0.2, 1, 0.2]
-    # selected_view_weights = [1, 0.1, 0.5, 0.1, 0.05, 0.05]
-    multiviews = get_images_from_file(
-        "scripts/apps/texture_sessions/mfq4e7u4ko/multi_view/color_sample1.png",
-        512,
-    )
-    target_image_size = (2048, 2048)
-    render = MeshRender(
-        camera_distance=5,
-        default_resolution=2048,
-        texture_size=2048,
-    )
-    mesh = trimesh.load("scripts/apps/assets/example_texture/meshes/robot.obj")
-    from asset3d_gen.data.utils import normalize_vertices_array
-    mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
-    mesh = mesh_uv_wrap(mesh)
-    render.load_mesh(mesh)
-    # multiviews = [Image.fromarray(img) for img in multiviews]
-    # multiviews = [Image.fromarray(img).convert("RGB") for img in multiviews]
-    # for idx, img in enumerate(multiviews):
-    #     img.save(f"robot/raw/res_{idx}.png")
-    multiviews = [super_model(img) for img in multiviews]
-    multiviews = [img.convert("RGB") for img in multiviews]
-    for idx, img in enumerate(multiviews):
-        img.save(f"robot/super_gan_res_{idx}.png")
-    texture, mask = bake_from_multiview(
-        render,
-        multiviews,
-        selected_camera_elevs,
-        selected_camera_azims,
-        selected_view_weights,
-    )
-    texture_np = (texture.cpu().numpy() * 255).astype(np.uint8)[..., :3][
-        ..., ::-1
-    ]
-    cv2.imwrite("robot/raw_texture.png", texture_np)
-    print("texture done.")
-    mask_np = (mask.squeeze(-1).cpu().numpy() * 255).astype(np.uint8)
-    texture_np = render.uv_inpaint(texture, mask_np)
-    cv2.imwrite("robot/inpaint_texture.png", texture_np[..., ::-1])
-    # texture_np = post_process(texture_np, 2)
-    # cv2.imwrite("robot/inpaint_conv_texture.png", texture_np[..., ::-1])
-    print("inpaint done.")
-    texture = torch.tensor(texture_np / 255).float().to(texture.device)
-    render.set_texture(texture)
-    textured_mesh = render.save_mesh()
-    _ = textured_mesh.export("robot/robot.obj")

asset3d_gen/data/backup/gpt_qwen.py DELETED Viewed

@@ -1,70 +0,0 @@
-import torch
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor
-from qwen_vl_utils import process_vision_info
-import os
-os.environ["https_proxy"] = "10.9.0.31:8838"
-# # default: Load the model on the available device(s)
-# model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-#     "Qwen/Qwen2.5-VL-7B-Instruct", torch_dtype="auto", device_map="auto"
-# )
-# We recommend enabling flash_attention_2 for better acceleration and memory saving, especially in multi-image and video scenarios.
-model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-    "Qwen/Qwen2.5-VL-7B-Instruct",
-    torch_dtype=torch.bfloat16,
-    attn_implementation="flash_attention_2",
-    device_map="auto",
-)
-# default processer
-processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
-# The default range for the number of visual tokens per image in the model is 4-16384.
-# You can set min_pixels and max_pixels according to your needs, such as a token range of 256-1280, to balance performance and cost.
-# min_pixels = 256*28*28
-# max_pixels = 1280*28*28
-# processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct", min_pixels=min_pixels, max_pixels=max_pixels)
-messages = [
-    {
-        "role": "user",
-        "content": [
-            {
-                "type": "image",
-                "image": "outputs/text2image/demo_objects/bed/sample_0.jpg",
-            },
-            {
-                "type": "image",
-                "image": "outputs/imageto3d/v2/cups/sample_69/URDF_sample_69/qa_renders/image_color/003.png",
-            },
-            {"type": "text", "text": "Describe the secend image."},
-        ],
-    }
-]
-# Preparation for inference
-text = processor.apply_chat_template(
-    messages, tokenize=False, add_generation_prompt=True
-)
-image_inputs, video_inputs = process_vision_info(messages)
-inputs = processor(
-    text=[text],
-    images=image_inputs,
-    videos=video_inputs,
-    padding=True,
-    return_tensors="pt",
-)
-inputs = inputs.to("cuda")
-# Inference: Generation of the output
-generated_ids = model.generate(**inputs, max_new_tokens=128)
-generated_ids_trimmed = [
-    out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
-]
-output_text = processor.batch_decode(
-    generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
-)
-print(output_text)

asset3d_gen/data/backup/quat.py DELETED Viewed

@@ -1,49 +0,0 @@
-import numpy as np
-def quaternion_rotation_x_counterclockwise(angle_degrees):
-    angle_radians = np.radians(angle_degrees)
-    w = np.cos(angle_radians / 2)
-    x = np.sin(angle_radians / 2)
-    y, z = 0.0, 0.0
-    return np.array([x, y, z, w]).round(4).tolist()
-def quaternion_rotation_y_counterclockwise(angle_degrees):
-    angle_radians = np.radians(angle_degrees)
-    w = np.cos(angle_radians / 2)
-    y = np.sin(angle_radians / 2)
-    x, z = 0.0, 0.0
-    return np.array([x, y, z, w]).round(4).tolist()
-def quaternion_rotation_z_counterclockwise(angle_degrees):
-    angle_radians = np.radians(angle_degrees)
-    w = np.cos(angle_radians / 2)
-    z = np.sin(angle_radians / 2)
-    x, y = 0.0, 0.0
-    return np.array([x, y, z, w]).round(4).tolist()
-def quaternion_multiply(q1, q2):
-    x1, y1, z1, w1 = q1
-    x2, y2, z2, w2 = q2
-    w = w1*w2 - x1*x2 - y1*y2 - z1*z2
-    x = w1*x2 + x1*w2 + y1*z2 - z1*y2
-    y = w1*y2 - x1*z2 + y1*w2 + z1*x2
-    z = w1*z2 + x1*y2 - y1*x2 + z1*w2
-    return np.array([w, x, y, z])
-angle = 180
-print(f"X轴逆时针旋转{angle}度: {quaternion_rotation_x_counterclockwise(angle)}")
-print(f"Y轴逆时针旋转{angle}度: {quaternion_rotation_y_counterclockwise(angle)}")
-print(f"Z轴逆时针旋转{angle}度: {quaternion_rotation_z_counterclockwise(angle)}")
-q_1 = np.array([1.0, 0.0, 0.0, 0.0])
-q_2 = np.array([0.0, 0.0, 1.0, 0.0])
-q_total = quaternion_multiply(q_2, q_1)
-print(q_total.round(4).tolist())

asset3d_gen/data/differentiable_render.py CHANGED Viewed

@@ -353,13 +353,11 @@ def parse_args():
         "--mesh_path",
         type=str,
         nargs="+",
-        required=True,
         help="Paths to the mesh files for rendering.",
     )
     parser.add_argument(
         "--output_root",
         type=str,
-        required=True,
         help="Root directory for output",
     )
     parser.add_argument(
@@ -446,7 +444,7 @@ def parse_args():
     args = parser.parse_args()
-    if args.uuid is None:
         args.uuid = []
         for path in args.mesh_path:
             uuid = os.path.basename(path).split(".")[0]
@@ -455,8 +453,11 @@ def parse_args():
     return args
-def entrypoint() -> None:
     args = parse_args()
     camera_settings = CameraSetting(
         num_images=args.num_images,

         "--mesh_path",
         type=str,
         nargs="+",
         help="Paths to the mesh files for rendering.",
     )
     parser.add_argument(
         "--output_root",
         type=str,
         help="Root directory for output",
     )
     parser.add_argument(
     args = parser.parse_args()
+    if args.uuid is None and args.mesh_path is not None:
         args.uuid = []
         for path in args.mesh_path:
             uuid = os.path.basename(path).split(".")[0]
     return args
+def entrypoint(**kwargs) -> None:
     args = parse_args()
+    for k, v in kwargs.items():
+        if hasattr(args, k) and v is not None:
+            setattr(args, k, v)
     camera_settings = CameraSetting(
         num_images=args.num_images,

asset3d_gen/data/mesh_operator.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import logging
 from typing import Tuple, Union
 import igraph
 import numpy as np
 import pyvista as pv
@@ -384,6 +384,7 @@ class MeshFixer(object):
             dtype=torch.int32,
         )
     def __call__(
         self,
         filter_ratio: float,

 import logging
 from typing import Tuple, Union
+import spaces
 import igraph
 import numpy as np
 import pyvista as pv
             dtype=torch.int32,
         )
+    @spaces.GPU
     def __call__(
         self,
         filter_ratio: float,

asset3d_gen/models/delight_model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 from typing import Union
 import cv2
 import numpy as np
 import torch
@@ -102,6 +102,7 @@ class DelightingModel(object):
         return new_image
     @torch.no_grad()
     def __call__(
         self,

 import os
 from typing import Union
+import spaces
 import cv2
 import numpy as np
 import torch
         return new_image
+    @spaces.GPU
     @torch.no_grad()
     def __call__(
         self,

asset3d_gen/models/sr_model.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import logging
 import os
 from typing import Union
 import numpy as np
 import torch
 from huggingface_hub import snapshot_download
@@ -35,6 +35,7 @@ class ImageStableSR:
         self.up_pipeline_x4.set_progress_bar_config(disable=True)
         # self.up_pipeline_x4.enable_model_cpu_offload()
     def __call__(
         self,
         image: Union[Image.Image, np.ndarray],
@@ -105,6 +106,7 @@ class ImageRealESRGAN:
             half=True,
         )
     def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
         if isinstance(image, Image.Image):
             image = np.array(image)

 import logging
 import os
 from typing import Union
+import spaces
 import numpy as np
 import torch
 from huggingface_hub import snapshot_download
         self.up_pipeline_x4.set_progress_bar_config(disable=True)
         # self.up_pipeline_x4.enable_model_cpu_offload()
+    @spaces.GPU
     def __call__(
         self,
         image: Union[Image.Image, np.ndarray],
             half=True,
         )
+    @spaces.GPU
     def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
         if isinstance(image, Image.Image):
             image = np.array(image)

asset3d_gen/scripts/render_gs.py CHANGED Viewed

@@ -2,7 +2,7 @@ import argparse
 import logging
 import math
 import os
 import cv2
 import numpy as np
 import torch
@@ -94,6 +94,7 @@ def load_gs_model(
     return gs_model
 def entrypoint(input_gs: str = None, output_path: str = None) -> None:
     args = parse_args()
     if isinstance(input_gs, str):

 import logging
 import math
 import os
+import spaces
 import cv2
 import numpy as np
 import torch
     return gs_model
+@spaces.GPU
 def entrypoint(input_gs: str = None, output_path: str = None) -> None:
     args = parse_args()
     if isinstance(input_gs, str):

common.py CHANGED Viewed

@@ -16,6 +16,7 @@ from easydict import EasyDict as edict
 from PIL import Image
 from tqdm import tqdm
 from asset3d_gen.data.backproject_v2 import entrypoint as backproject_api
 from asset3d_gen.models.delight_model import DelightingModel
 from asset3d_gen.models.gs_model import GaussianOperator
 from asset3d_gen.models.segment_model import (
@@ -71,7 +72,7 @@ os.environ["TORCH_EXTENSIONS_DIR"] = os.path.expanduser(
     "~/.cache/torch_extensions"
 )
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
-os.environ['SPCONV_ALGO'] = 'native'
 MAX_SEED = 100000
 DELIGHT = DelightingModel()
@@ -82,18 +83,25 @@ def download_kolors_weights() -> None:
     logger.info(f"Download kolors weights from huggingface...")
     subprocess.run(
         [
-            "huggingface-cli", "download", "--resume-download",
-            "Kwai-Kolors/Kolors", "--local-dir", "weights/Kolors"
         ],
-        check=True
     )
     subprocess.run(
         [
-            "huggingface-cli", "download", "--resume-download",
-            "Kwai-Kolors/Kolors-IP-Adapter-Plus", "--local-dir",
-            "weights/Kolors-IP-Adapter-Plus"
         ],
-        check=True
     )
@@ -121,9 +129,7 @@ elif os.getenv("GRADIO_APP") == "textto3d":
     if not os.path.exists(text_model_dir):
         download_kolors_weights()
-    PIPELINE_IMG_IP = build_text2img_ip_pipeline(
-        text_model_dir, ref_scale=0.3
-    )
     PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
@@ -156,7 +162,7 @@ os.makedirs(TMP_DIR, exist_ok=True)
 lighting_css = """
 <style>
 #lighter_mesh canvas {
-    filter: brightness(1.6) !important;
 }
 </style>
 """
@@ -299,7 +305,6 @@ def get_cached_image(image_path: str) -> Image.Image:
     return Image.open(image_path).resize((512, 512))
-@spaces.GPU
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         "gaussian": {
@@ -318,7 +323,7 @@ def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
 @spaces.GPU
-def unpack_state(state: dict) -> tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state["gaussian"]["aabb"],
         sh_degree=state["gaussian"]["sh_degree"],
@@ -327,17 +332,17 @@ def unpack_state(state: dict) -> tuple[Gaussian, edict, str]:
         opacity_bias=state["gaussian"]["opacity_bias"],
         scaling_activation=state["gaussian"]["scaling_activation"],
     )
-    gs._xyz = torch.tensor(state["gaussian"]["_xyz"], device="cuda")
     gs._features_dc = torch.tensor(
-        state["gaussian"]["_features_dc"], device="cuda"
     )
-    gs._scaling = torch.tensor(state["gaussian"]["_scaling"], device="cuda")
-    gs._rotation = torch.tensor(state["gaussian"]["_rotation"], device="cuda")
-    gs._opacity = torch.tensor(state["gaussian"]["_opacity"], device="cuda")
     mesh = edict(
-        vertices=torch.tensor(state["mesh"]["vertices"], device="cuda"),
-        faces=torch.tensor(state["mesh"]["faces"], device="cuda"),
     )
     return gs, mesh
@@ -484,7 +489,6 @@ def extract_3d_representations(
     return mesh_glb_path, gs_path, mesh_obj_path, aligned_gs_path
-@spaces.GPU
 def extract_3d_representations_v2(
     state: dict,
     enable_delight: bool,
@@ -492,7 +496,7 @@ def extract_3d_representations_v2(
 ):
     output_root = TMP_DIR
     user_dir = os.path.join(output_root, str(req.session_hash))
-    gs_model, mesh_model = unpack_state(state)
     filename = "sample"
     gs_path = os.path.join(user_dir, f"{filename}_gs.ply")
@@ -538,12 +542,9 @@ def extract_3d_representations_v2(
     mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
     mesh.export(mesh_glb_path)
-    torch.cuda.empty_cache()
     return mesh_glb_path, gs_path, mesh_obj_path, aligned_gs_path
-@spaces.GPU
 def extract_urdf(
     gs_path: str,
     mesh_obj_path: str,
@@ -556,7 +557,8 @@ def extract_urdf(
     output_root = TMP_DIR
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
-    # Convert to URDF and recover attrs by gpt4o
     filename = "sample"
     urdf_convertor = URDFGenerator(GPT_CLIENT, render_view_num=4)
     asset_attrs = {
@@ -635,8 +637,6 @@ def extract_urdf(
         output_zip=f"{output_root}/urdf_{filename}.zip",
     )
-    torch.cuda.empty_cache()
     estimated_type = urdf_convertor.estimated_attrs["category"]
     estimated_height = urdf_convertor.estimated_attrs["height"]
     estimated_mass = urdf_convertor.estimated_attrs["mass"]
@@ -660,7 +660,6 @@ def text2image_fn(
     ip_adapt_scale: float = 0.3,
     image_wh: int | tuple[int, int] = [1024, 1024],
     n_sample: int = 3,
-    postprocess: bool = True,
     req: gr.Request = None,
 ):
     if isinstance(image_wh, int):
@@ -683,10 +682,10 @@ def text2image_fn(
         image_wh=image_wh,
         infer_step=infer_step,
     )
-    if postprocess:
-        for idx in range(len(images)):
-            image = images[idx]
-            images[idx] = preprocess_image_fn(image, req)
     save_paths = []
     for idx, image in enumerate(images):
@@ -705,18 +704,11 @@ def text2image_fn(
 @spaces.GPU
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
-    command = [
-        "drender-cli",
-        "--mesh_path",
-        mesh_path,
-        "--output_root",
-        f"{output_root}/condition",
-        "--uuid",
-        f"{uuid}",
-    ]
-    _ = subprocess.run(
-        command, capture_output=True, text=True, encoding="utf-8"
     )
     gc.collect()
@@ -764,7 +756,6 @@ def generate_texture_mvimages(
     return img_save_paths + img_save_paths
-@spaces.GPU
 def backproject_texture(
     mesh_path: str,
     input_image: str,
@@ -864,32 +855,19 @@ def render_result_video(
 ) -> str:
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     output_dir = os.path.join(output_root, "texture_mesh")
-    command = [
-        "drender-cli",
-        "--mesh_path",
-        mesh_path,
-        "--output_root",
-        output_dir,
-        "--num_images",
-        "90",
-        "--elevation",
-        "20",
-        "--with_mtl",
-        "--pbr_light_factor",
-        "1.",
-        "--uuid",
-        f"{uuid}",
-        "--gen_color_mp4",
-        "--gen_glonormal_mp4",
-        "--distance",
-        "5.5",
-        "--resolution_hw",
-        f"{video_size}",
-        f"{video_size}",
-    ]
-    _ = subprocess.run(
-        command, capture_output=True, text=True, encoding="utf-8"
     )
     gc.collect()

 from PIL import Image
 from tqdm import tqdm
 from asset3d_gen.data.backproject_v2 import entrypoint as backproject_api
+from asset3d_gen.data.differentiable_render import entrypoint as render_api
 from asset3d_gen.models.delight_model import DelightingModel
 from asset3d_gen.models.gs_model import GaussianOperator
 from asset3d_gen.models.segment_model import (
     "~/.cache/torch_extensions"
 )
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
+os.environ["SPCONV_ALGO"] = "native"
 MAX_SEED = 100000
 DELIGHT = DelightingModel()
     logger.info(f"Download kolors weights from huggingface...")
     subprocess.run(
         [
+            "huggingface-cli",
+            "download",
+            "--resume-download",
+            "Kwai-Kolors/Kolors",
+            "--local-dir",
+            "weights/Kolors",
         ],
+        check=True,
     )
     subprocess.run(
         [
+            "huggingface-cli",
+            "download",
+            "--resume-download",
+            "Kwai-Kolors/Kolors-IP-Adapter-Plus",
+            "--local-dir",
+            "weights/Kolors-IP-Adapter-Plus",
         ],
+        check=True,
     )
     if not os.path.exists(text_model_dir):
         download_kolors_weights()
+    PIPELINE_IMG_IP = build_text2img_ip_pipeline(text_model_dir, ref_scale=0.3)
     PIPELINE_IMG = build_text2img_pipeline(text_model_dir)
     SEG_CHECKER = ImageSegChecker(GPT_CLIENT)
     GEO_CHECKER = MeshGeoChecker(GPT_CLIENT)
 lighting_css = """
 <style>
 #lighter_mesh canvas {
+    filter: brightness(1.8) !important;
 }
 </style>
 """
     return Image.open(image_path).resize((512, 512))
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         "gaussian": {
 @spaces.GPU
+def unpack_state(state: dict, device: str = "cuda") -> tuple[Gaussian, dict]:
     gs = Gaussian(
         aabb=state["gaussian"]["aabb"],
         sh_degree=state["gaussian"]["sh_degree"],
         opacity_bias=state["gaussian"]["opacity_bias"],
         scaling_activation=state["gaussian"]["scaling_activation"],
     )
+    gs._xyz = torch.tensor(state["gaussian"]["_xyz"], device=device)
     gs._features_dc = torch.tensor(
+        state["gaussian"]["_features_dc"], device=device
     )
+    gs._scaling = torch.tensor(state["gaussian"]["_scaling"], device=device)
+    gs._rotation = torch.tensor(state["gaussian"]["_rotation"], device=device)
+    gs._opacity = torch.tensor(state["gaussian"]["_opacity"], device=device)
     mesh = edict(
+        vertices=torch.tensor(state["mesh"]["vertices"], device=device),
+        faces=torch.tensor(state["mesh"]["faces"], device=device),
     )
     return gs, mesh
     return mesh_glb_path, gs_path, mesh_obj_path, aligned_gs_path
 def extract_3d_representations_v2(
     state: dict,
     enable_delight: bool,
 ):
     output_root = TMP_DIR
     user_dir = os.path.join(output_root, str(req.session_hash))
+    gs_model, mesh_model = unpack_state(state, device="cpu")
     filename = "sample"
     gs_path = os.path.join(user_dir, f"{filename}_gs.ply")
     mesh_glb_path = os.path.join(user_dir, f"{filename}.glb")
     mesh.export(mesh_glb_path)
     return mesh_glb_path, gs_path, mesh_obj_path, aligned_gs_path
 def extract_urdf(
     gs_path: str,
     mesh_obj_path: str,
     output_root = TMP_DIR
     if req is not None:
         output_root = os.path.join(output_root, str(req.session_hash))
+    # Convert to URDF and recover attrs by GPT.
     filename = "sample"
     urdf_convertor = URDFGenerator(GPT_CLIENT, render_view_num=4)
     asset_attrs = {
         output_zip=f"{output_root}/urdf_{filename}.zip",
     )
     estimated_type = urdf_convertor.estimated_attrs["category"]
     estimated_height = urdf_convertor.estimated_attrs["height"]
     estimated_mass = urdf_convertor.estimated_attrs["mass"]
     ip_adapt_scale: float = 0.3,
     image_wh: int | tuple[int, int] = [1024, 1024],
     n_sample: int = 3,
     req: gr.Request = None,
 ):
     if isinstance(image_wh, int):
         image_wh=image_wh,
         infer_step=infer_step,
     )
+    for idx in range(len(images)):
+        image = images[idx]
+        images[idx], _ = preprocess_image_fn(image)
     save_paths = []
     for idx, image in enumerate(images):
 @spaces.GPU
 def generate_condition(mesh_path: str, req: gr.Request, uuid: str = "sample"):
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
+    _ = render_api(
+        mesh_path=mesh_path,
+        output_root=f"{output_root}/condition",
+        uuid=str(uuid),
     )
     gc.collect()
     return img_save_paths + img_save_paths
 def backproject_texture(
     mesh_path: str,
     input_image: str,
 ) -> str:
     output_root = os.path.join(TMP_DIR, str(req.session_hash))
     output_dir = os.path.join(output_root, "texture_mesh")
+    _ = render_api(
+        mesh_path=mesh_path,
+        output_root=output_dir,
+        num_images=90,
+        elevation=[20],
+        with_mtl=True,
+        pbr_light_factor=1,
+        uuid=str(uuid),
+        gen_color_mp4=True,
+        gen_glonormal_mp4=True,
+        distance=5.5,
+        resolution_hw=(video_size, video_size),
     )
     gc.collect()

requirements.txt CHANGED Viewed

@@ -3,9 +3,10 @@
 torch==2.4.0
 torchvision==0.19.0
-xformers==0.0.27.post2
 pytorch-lightning==2.4.0
 spconv-cu120==2.3.6
 dataclasses_json
 easydict
 opencv-python>4.5

 torch==2.4.0
 torchvision==0.19.0
+xformers==0.0.28.post1
 pytorch-lightning==2.4.0
 spconv-cu120==2.3.6
+triton==2.1.0
 dataclasses_json
 easydict
 opencv-python>4.5