Spaces:

stepfun-ai
/

Step1X-3D

Running on Zero

App Files Files Community

ReubenSun commited on 3 days ago

Commit

5c326b3

1 Parent(s): 691c14e

Revert "texture sync"

Browse files

This reverts commit 55f226f582932e6ec64e096f296c54d47a59de80.

Files changed (11) hide show

step1x3d_texture/pipelines/ig2mv_sdxl_pipeline.py +3 -131
step1x3d_texture/pipelines/step1x_3d_texture_synthesis_pipeline.py +8 -17
step1x3d_texture/{texture_sync → renderer}/__init__.py +0 -0
step1x3d_texture/renderer/geometry.py +151 -0
step1x3d_texture/renderer/project.py +875 -0
step1x3d_texture/renderer/shader.py +127 -0
step1x3d_texture/{texture_sync → renderer}/voronoi.py +0 -0
step1x3d_texture/texture_sync/geometry.py +0 -141
step1x3d_texture/texture_sync/project.py +0 -521
step1x3d_texture/texture_sync/shader.py +0 -118
step1x3d_texture/texture_sync/step_sync.py +0 -125

step1x3d_texture/pipelines/ig2mv_sdxl_pipeline.py CHANGED Viewed

@@ -51,20 +51,6 @@ from ..models.attention_processor import (
     DecoupledMVRowSelfAttnProcessor2_0,
     set_unet_2d_condition_attn_processor,
 )
-import random
-from ..texture_sync.project import UVProjection as UVP
-from ..texture_sync.step_sync import step_tex_sync
-from trimesh import Trimesh
-from torchvision.transforms import Compose, Resize, GaussianBlur, InterpolationMode
-from diffusers.utils import (
-	BaseOutput,
-    numpy_to_pil,
-	pt_to_pil,
-	is_accelerate_available,
-	is_accelerate_version,
-	logging,
-	replace_example_docstring
-	)
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -84,27 +70,6 @@ def retrieve_latents(
         raise AttributeError("Could not access latents of provided encoder_output")
-@torch.no_grad()
-def composite_rendered_view(scheduler, backgrounds, foregrounds, masks, t):
-    composited_images = []
-    for i, (background, foreground, mask) in enumerate(zip(backgrounds, foregrounds, masks)):
-        if t > 0:
-            alphas_cumprod = scheduler.alphas_cumprod[t]
-            noise = torch.normal(0, 1, background.shape, device=background.device)
-            background = (1-alphas_cumprod) * noise + alphas_cumprod * background
-        composited = foreground * mask + background * (1-mask)
-        composited_images.append(composited)
-    composited_tensor = torch.stack(composited_images)
-    return composited_tensor
-@torch.no_grad()
-def encode_latents(vae, imgs):
-    imgs = (imgs-0.5)*2
-    latents = vae.encode(imgs).latent_dist.sample()
-    latents = vae.config.scaling_factor * latents
-    return latents
 class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
     def __init__(
         self,
@@ -344,8 +309,6 @@ class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
         # Image condition
         reference_image: Optional[PipelineImageInput] = None,
         reference_conditioning_scale: Optional[float] = 1.0,
-        mesh: Optional[Trimesh] = None,
-        texture_sync_config: Optional[dict] = None,
         **kwargs,
     ):
         r"""
@@ -593,27 +556,6 @@ class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
             latents,
         )
-        # texture patams init
-        texture_size = texture_sync_config["texture_size"]
-        latent_size = texture_sync_config["latent_size"]
-        elevations = texture_sync_config["elevations"]
-        azimuths = texture_sync_config["azimuths"]
-        texture_sync_ratio = texture_sync_config["texture_sync_ratio"]
-        camera_poses = [(elv, azim) for elv, azim in zip(elevations, azimuths)]
-        uvp = UVP(texture_size=texture_size, render_size=latent_size, sampling_mode="nearest", channels=4, device=self._execution_device)
-        uvp.load_mesh(mesh, scale_factor=1.0, autouv=True)
-        uvp.set_cameras_and_render_settings(camera_poses, centers=None, camera_distance=texture_sync_config["camera_distance"], scale=((1.0, 1.0, 1.0),))
-        latent_tex = uvp.set_noise_texture()
-        noise_views = uvp.render_textured_views()
-        foregrounds = [view[:-1] for view in noise_views]
-        masks = [view[-1:] for view in noise_views]
-        if texture_sync_ratio>0:
-            composited_tensor = composite_rendered_view(self.scheduler, latents, foregrounds, masks, int(timesteps[0].cpu().item())+1)
-            latents = composited_tensor.type(latents.dtype)
-        uvp.to("cpu")
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
@@ -767,36 +709,6 @@ class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
             ).to(device=device, dtype=latents.dtype)
         self._num_timesteps = len(timesteps)
-        # texture sync params
-        exp_start = texture_sync_config["exp_start"]
-        exp_end = texture_sync_config["exp_end"]
-        shuffle_background_change = texture_sync_config["shuffle_background_change"]
-        shuffle_background_end = texture_sync_config["shuffle_background_end"]
-        num_timesteps = self.scheduler.config.num_train_timesteps
-        uvp.to(self._execution_device)
-        color_constants = {"black": [-1, -1, -1], "white": [1, 1, 1], "maroon": [0, -1, -1],
-			"red": [1, -1, -1], "olive": [0, 0, -1], "yellow": [1, 1, -1],
-			"green": [-1, 0, -1], "lime": [-1 ,1, -1], "teal": [-1, 0, 0],
-			"aqua": [-1, 1, 1], "navy": [-1, -1, 0], "blue": [-1, -1, 1],
-			"purple": [0, -1 , 0], "fuchsia": [1, -1, 1]}
-        color_names = list(color_constants.keys())
-        background_colors = [random.choice(list(color_constants.keys())) for i in range(len(camera_poses))]
-        intermediate_results = []
-        self.upcast_vae()
-        self.vae.config.force_upcast = True
-        color_images = torch.FloatTensor([color_constants[name] for name in color_names]).reshape(-1,3,1,1).to(dtype=torch.float32, device=self._execution_device)
-        color_images = torch.ones(
-            (1,1,latent_size*8, latent_size*8),
-            device=self._execution_device,
-            dtype=torch.float32
-        ) * color_images
-        color_images = ((0.5*color_images)+0.5)
-        color_latents = encode_latents(self.vae, color_images).to(dtype=self.text_encoder_2.dtype)
-        color_latents = {color[0]:color[1] for color in zip(color_names, [latent for latent in color_latents])}
         with self.progress_bar(total=num_inference_steps) as progress_bar:
             for i, t in enumerate(timesteps):
                 if self.interrupt:
@@ -856,49 +768,9 @@ class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
                 # compute the previous noisy sample x_t -> x_t-1
                 latents_dtype = latents.dtype
-                # texture sync
-                current_exp = ((exp_end-exp_start) * i / num_inference_steps) + exp_start
-                if t > (1-texture_sync_ratio)*num_timesteps:
-                    step_results = step_tex_sync(
-                        scheduler=self.scheduler,
-                        uvp=uvp,
-                        model_output=noise_pred,
-                        timestep=t,
-                        sample=latents,
-                        texture=latent_tex,
-                        return_dict=True,
-                        main_views=[],
-                        exp= current_exp,
-                        **extra_step_kwargs
-                    )
-                    pred_original_sample = step_results["pred_original_sample"]
-                    latents = step_results["prev_sample"]
-                    latent_tex = step_results["prev_tex"]
-                    # Composit latent foreground with random color background
-                    background_latents = [color_latents[color] for color in background_colors]
-                    composited_tensor = composite_rendered_view(self.scheduler, background_latents, latents, masks, t)
-                    latents = composited_tensor.type(latents.dtype)
-                    intermediate_results.append((latents.to("cpu"), pred_original_sample.to("cpu")))
-                else:
-                    step_results = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=True)
-                    pred_original_sample = step_results["pred_original_sample"]
-                    latents = step_results["prev_sample"]
-                    latent_tex = None
-                    intermediate_results.append((latents.to("cpu"), pred_original_sample.to("cpu")))
-                # 2. Shuffle background colors; only black and white used after certain timestep
-                if (1-t/num_timesteps) < shuffle_background_change:
-                    background_colors = [random.choice(list(color_constants.keys())) for i in range(len(camera_poses))]
-                elif (1-t/num_timesteps) < shuffle_background_end:
-                    background_colors = [random.choice(["black","white"]) for i in range(len(camera_poses))]
-                else:
-                    background_colors = background_colors
-                del noise_pred
                 if latents.dtype != latents_dtype:
                     if torch.backends.mps.is_available():
                         # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272

     DecoupledMVRowSelfAttnProcessor2_0,
     set_unet_2d_condition_attn_processor,
 )
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
         raise AttributeError("Could not access latents of provided encoder_output")
 class IG2MVSDXLPipeline(StableDiffusionXLPipeline, CustomAdapterMixin):
     def __init__(
         self,
         # Image condition
         reference_image: Optional[PipelineImageInput] = None,
         reference_conditioning_scale: Optional[float] = 1.0,
         **kwargs,
     ):
         r"""
             latents,
         )
         # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
         extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
             ).to(device=device, dtype=latents.dtype)
         self._num_timesteps = len(timesteps)
         with self.progress_bar(total=num_inference_steps) as progress_bar:
             for i, t in enumerate(timesteps):
                 if self.interrupt:
                 # compute the previous noisy sample x_t -> x_t-1
                 latents_dtype = latents.dtype
+                latents = self.scheduler.step(
+                    noise_pred, t, latents, **extra_step_kwargs, return_dict=False
+                )[0]
                 if latents.dtype != latents_dtype:
                     if torch.backends.mps.is_available():
                         # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272

step1x3d_texture/pipelines/step1x_3d_texture_synthesis_pipeline.py CHANGED Viewed

@@ -24,6 +24,7 @@ import trimesh
 import xatlas
 import scipy.sparse
 from scipy.sparse.linalg import spsolve
 from step1x3d_geometry.models.pipelines.pipeline_utils import smart_load_model
@@ -35,7 +36,7 @@ class Step1X3DTextureConfig:
         self.unet_model = None
         self.lora_model = None
         self.adapter_path = "stepfun-ai/Step1X-3D"
-        self.scheduler = "ddpm"
         self.num_views = 6
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.dtype = torch.float16
@@ -60,20 +61,6 @@ class Step1X3DTextureConfig:
         self.bake_exp = 4
         self.merge_method = "fast"
-        # texture sync params
-        self.texture_sync_config = {
-            "texture_size": 1536,
-            "latent_size": 768//8,
-            "elevations": [0, 0, 0, 0, 90, -90],
-            "azimuths": [0, 90, 180, 270, 0, 0],
-            "texture_sync_ratio": 0.5,
-            "exp_end": 6.0,
-            "exp_start": 0,
-            "shuffle_background_change": 0.4,
-            "shuffle_background_end": 0.99,
-            "camera_distance": 1.8
-        }
 class Step1X3DTexturePipeline:
     def __init__(self, config):
@@ -133,9 +120,11 @@ class Step1X3DTexturePipeline:
         if unet_model is not None:
             pipe_kwargs["unet"] = UNet2DConditionModel.from_pretrained(unet_model)
         # Prepare pipeline
         pipe = IG2MVSDXLPipeline.from_pretrained(base_model, **pipe_kwargs)
         # Load scheduler if provided
         scheduler_class = None
         if scheduler == "ddpm":
@@ -149,11 +138,14 @@ class Step1X3DTexturePipeline:
             shift_scale=8.0,
             scheduler_class=scheduler_class,
         )
         pipe.init_custom_adapter(
             num_views=num_views,
             self_attn_processor=DecoupledMVRowColSelfAttnProcessor2_0,
         )
         pipe.load_custom_adapter(adapter_path, "step1x-3d-ig2v.safetensors")
         pipe.to(device=device, dtype=dtype)
         pipe.cond_encoder.to(device=device, dtype=dtype)
@@ -290,7 +282,6 @@ class Step1X3DTexturePipeline:
             negative_prompt=negative_prompt,
             cross_attention_kwargs={"scale": lora_scale},
             mesh=mesh_bp,
-            texture_sync_config=self.config.texture_sync_config,
             **pipe_kwargs,
         ).images
@@ -368,7 +359,7 @@ class Step1X3DTexturePipeline:
                 width=768,
                 num_inference_steps=self.config.num_inference_steps,
                 guidance_scale=self.config.guidance_scale,
-                seed= seed if seed is not None else self.config.seed,
                 lora_scale=self.config.lora_scale,
                 reference_conditioning_scale=self.config.reference_conditioning_scale,
                 negative_prompt=self.config.negative_prompt,

 import xatlas
 import scipy.sparse
 from scipy.sparse.linalg import spsolve
 from step1x3d_geometry.models.pipelines.pipeline_utils import smart_load_model
         self.unet_model = None
         self.lora_model = None
         self.adapter_path = "stepfun-ai/Step1X-3D"
+        self.scheduler = None
         self.num_views = 6
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         self.dtype = torch.float16
         self.bake_exp = 4
         self.merge_method = "fast"
 class Step1X3DTexturePipeline:
     def __init__(self, config):
         if unet_model is not None:
             pipe_kwargs["unet"] = UNet2DConditionModel.from_pretrained(unet_model)
+        print('VAE Loaded!')
         # Prepare pipeline
         pipe = IG2MVSDXLPipeline.from_pretrained(base_model, **pipe_kwargs)
+        print('Base model Loaded!')
         # Load scheduler if provided
         scheduler_class = None
         if scheduler == "ddpm":
             shift_scale=8.0,
             scheduler_class=scheduler_class,
         )
+        print('Scheduler Loaded!')
         pipe.init_custom_adapter(
             num_views=num_views,
             self_attn_processor=DecoupledMVRowColSelfAttnProcessor2_0,
         )
+        print(f'Load adapter from {adapter_path}/step1x-3d-ig2v.safetensors')
         pipe.load_custom_adapter(adapter_path, "step1x-3d-ig2v.safetensors")
+        print(f'Load adapter successed!')
         pipe.to(device=device, dtype=dtype)
         pipe.cond_encoder.to(device=device, dtype=dtype)
             negative_prompt=negative_prompt,
             cross_attention_kwargs={"scale": lora_scale},
             mesh=mesh_bp,
             **pipe_kwargs,
         ).images
                 width=768,
                 num_inference_steps=self.config.num_inference_steps,
                 guidance_scale=self.config.guidance_scale,
+                seed=seed if seed is not None else self.config.seed,
                 lora_scale=self.config.lora_scale,
                 reference_conditioning_scale=self.config.reference_conditioning_scale,
                 negative_prompt=self.config.negative_prompt,

step1x3d_texture/{texture_sync → renderer}/__init__.py RENAMED Viewed

File without changes

step1x3d_texture/renderer/geometry.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import torch
+import pytorch3d
+import torch.nn.functional as F
+from pytorch3d.ops import interpolate_face_attributes
+from pytorch3d.renderer import (
+    look_at_view_transform,
+    FoVPerspectiveCameras,
+    AmbientLights,
+    PointLights,
+    DirectionalLights,
+    Materials,
+    RasterizationSettings,
+    MeshRenderer,
+    MeshRasterizer,
+    SoftPhongShader,
+    SoftSilhouetteShader,
+    HardPhongShader,
+    TexturesVertex,
+    TexturesUV,
+    Materials,
+)
+from pytorch3d.renderer.blending import BlendParams, hard_rgb_blend
+from pytorch3d.renderer.utils import convert_to_tensors_and_broadcast, TensorProperties
+from pytorch3d.renderer.mesh.shader import ShaderBase
+def get_cos_angle(points, normals, camera_position):
+    """
+    calculate cosine similarity between view->surface and surface normal.
+    """
+    if points.shape != normals.shape:
+        msg = "Expected points and normals to have the same shape: got %r, %r"
+        raise ValueError(msg % (points.shape, normals.shape))
+    # Ensure all inputs have same batch dimension as points
+    matched_tensors = convert_to_tensors_and_broadcast(
+        points, camera_position, device=points.device
+    )
+    _, camera_position = matched_tensors
+    # Reshape direction and color so they have all the arbitrary intermediate
+    # dimensions as points. Assume first dim = batch dim and last dim = 3.
+    points_dims = points.shape[1:-1]
+    expand_dims = (-1,) + (1,) * len(points_dims)
+    if camera_position.shape != normals.shape:
+        camera_position = camera_position.view(expand_dims + (3,))
+    normals = F.normalize(normals, p=2, dim=-1, eps=1e-6)
+    # Calculate the cosine value.
+    view_direction = camera_position - points
+    view_direction = F.normalize(view_direction, p=2, dim=-1, eps=1e-6)
+    cos_angle = torch.sum(view_direction * normals, dim=-1, keepdim=True)
+    cos_angle = cos_angle.clamp(0, 1)
+    # Cosine of the angle between the reflected light ray and the viewer
+    return cos_angle
+def _geometry_shading_with_pixels(
+    meshes, fragments, lights, cameras, materials, texels
+):
+    """
+    Render pixel space vertex position, normal(world), depth, and cos angle
+    Args:
+            meshes: Batch of meshes
+            fragments: Fragments named tuple with the outputs of rasterization
+            lights: Lights class containing a batch of lights
+            cameras: Cameras class containing a batch of cameras
+            materials: Materials class containing a batch of material properties
+            texels: texture per pixel of shape (N, H, W, K, 3)
+    Returns:
+            colors: (N, H, W, K, 3)
+            pixel_coords: (N, H, W, K, 3), camera coordinates of each intersection.
+    """
+    verts = meshes.verts_packed()  # (V, 3)
+    faces = meshes.faces_packed()  # (F, 3)
+    vertex_normals = meshes.verts_normals_packed()  # (V, 3)
+    faces_verts = verts[faces]
+    faces_normals = vertex_normals[faces]
+    pixel_coords_in_camera = interpolate_face_attributes(
+        fragments.pix_to_face, fragments.bary_coords, faces_verts
+    )
+    pixel_normals = interpolate_face_attributes(
+        fragments.pix_to_face, fragments.bary_coords, faces_normals
+    )
+    cos_angles = get_cos_angle(
+        pixel_coords_in_camera, pixel_normals, cameras.get_camera_center()
+    )
+    return pixel_coords_in_camera, pixel_normals, fragments.zbuf[..., None], cos_angles
+class HardGeometryShader(ShaderBase):
+    """
+    renders common geometric informations.
+    """
+    def forward(self, fragments, meshes, **kwargs):
+        cameras = super()._get_cameras(**kwargs)
+        texels = self.texel_from_uv(fragments, meshes)
+        lights = kwargs.get("lights", self.lights)
+        materials = kwargs.get("materials", self.materials)
+        blend_params = kwargs.get("blend_params", self.blend_params)
+        verts, normals, depths, cos_angles = _geometry_shading_with_pixels(
+            meshes=meshes,
+            fragments=fragments,
+            texels=texels,
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+        )
+        texels = meshes.sample_textures(fragments)
+        verts = hard_rgb_blend(verts, fragments, blend_params)
+        normals = hard_rgb_blend(normals, fragments, blend_params)
+        depths = hard_rgb_blend(depths, fragments, blend_params)
+        cos_angles = hard_rgb_blend(cos_angles, fragments, blend_params)
+        from IPython import embed
+        embed()
+        texels = hard_rgb_blend(texels, fragments, blend_params)
+        return verts, normals, depths, cos_angles, texels, fragments
+    def texel_from_uv(self, fragments, meshes):
+        texture_tmp = meshes.textures
+        maps_tmp = texture_tmp.maps_padded()
+        uv_color = [[[1, 0], [1, 1]], [[0, 0], [0, 1]]]
+        uv_color = (
+            torch.FloatTensor(uv_color).to(maps_tmp[0].device).type(maps_tmp[0].dtype)
+        )
+        uv_texture = TexturesUV(
+            [uv_color.clone() for t in maps_tmp],
+            texture_tmp.faces_uvs_padded(),
+            texture_tmp.verts_uvs_padded(),
+            sampling_mode="bilinear",
+        )
+        meshes.textures = uv_texture
+        texels = meshes.sample_textures(fragments)
+        meshes.textures = texture_tmp
+        texels = torch.cat((texels, texels[..., -1:] * 0), dim=-1)
+        return texels

step1x3d_texture/renderer/project.py ADDED Viewed

	@@ -0,0 +1,875 @@

+import torch
+import pytorch3d
+from pytorch3d.io import load_objs_as_meshes, load_obj, save_obj, IO
+from pytorch3d.structures import Meshes
+from pytorch3d.renderer import (
+    look_at_view_transform,
+    FoVPerspectiveCameras,
+    FoVOrthographicCameras,
+    AmbientLights,
+    PointLights,
+    DirectionalLights,
+    Materials,
+    RasterizationSettings,
+    MeshRenderer,
+    MeshRasterizer,
+    TexturesUV,
+)
+from .geometry import HardGeometryShader
+from .shader import HardNChannelFlatShader
+from .voronoi import voronoi_solve
+import torch.nn.functional as F
+import open3d as o3d
+import pdb
+import kaolin as kal
+import numpy as np
+import torch
+from pytorch3d.renderer.cameras import FoVOrthographicCameras
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from pytorch3d.common.datatypes import Device
+import math
+import torch.nn.functional as F
+from trimesh import Trimesh
+from pytorch3d.structures import Meshes
+import os
+LIST_TYPE = Union[list, np.ndarray, torch.Tensor]
+_R = torch.eye(3)[None]  # (1, 3, 3)
+_T = torch.zeros(1, 3)  # (1, 3)
+_BatchFloatType = Union[float, Sequence[float], torch.Tensor]
+class CustomOrthographicCameras(FoVOrthographicCameras):
+    def compute_projection_matrix(
+        self, znear, zfar, max_x, min_x, max_y, min_y, scale_xyz
+    ) -> torch.Tensor:
+        """
+        自定义正交投影矩阵计算，继承并修改深度通道参数
+        参数维度说明:
+        - znear/zfar: (N,)
+        - max_x/min_x: (N,)
+        - max_y/min_y: (N,)
+        - scale_xyz: (N, 3)
+        """
+        K = torch.zeros((self._N, 4, 4), dtype=torch.float32, device=self.device)
+        ones = torch.ones((self._N), dtype=torch.float32, device=self.device)
+        # NOTE: OpenGL flips handedness of coordinate system between camera
+        # space and NDC space so z sign is -ve. In PyTorch3D we maintain a
+        # right handed coordinate system throughout.
+        z_sign = +1.0
+        K[:, 0, 0] = (2.0 / (max_x - min_x)) * scale_xyz[:, 0]
+        K[:, 1, 1] = (2.0 / (max_y - min_y)) * scale_xyz[:, 1]
+        K[:, 0, 3] = -(max_x + min_x) / (max_x - min_x)
+        K[:, 1, 3] = -(max_y + min_y) / (max_y - min_y)
+        K[:, 3, 3] = ones
+        # NOTE: This maps the z coordinate to the range [0, 1] and replaces the
+        # the OpenGL z normalization to [-1, 1]
+        K[:, 2, 2] = -2 * (1.0 / (zfar - znear)) * scale_xyz[:, 2]
+        K[:, 2, 3] = -(znear + zfar) / (zfar - znear)
+        return K
+    def __init__(
+        self,
+        znear: _BatchFloatType = 1.0,
+        zfar: _BatchFloatType = 100.0,
+        max_y: _BatchFloatType = 1.0,
+        min_y: _BatchFloatType = -1.0,
+        max_x: _BatchFloatType = 1.0,
+        min_x: _BatchFloatType = -1.0,
+        scale_xyz=((1.0, 1.0, 1.0),),  # (N, 3)
+        R: torch.Tensor = _R,
+        T: torch.Tensor = _T,
+        K: Optional[torch.Tensor] = None,
+        device: Device = "cpu",
+    ):
+        # 继承父类初始化逻辑
+        super().__init__(
+            znear=znear,
+            zfar=zfar,
+            max_y=max_y,
+            min_y=min_y,
+            max_x=max_x,
+            min_x=min_x,
+            scale_xyz=scale_xyz,
+            R=R,
+            T=T,
+            K=K,
+            device=device,
+        )
+def erode_torch_batch(binary_img_batch, kernel_size):
+    pad = (kernel_size - 1) // 2
+    bin_img = F.pad(
+        binary_img_batch.unsqueeze(1), pad=[pad, pad, pad, pad], mode="reflect"
+    )
+    out = -F.max_pool2d(-bin_img, kernel_size=kernel_size, stride=1, padding=0)
+    out = out.squeeze(1)
+    return out
+def dilate_torch_batch(binary_img_batch, kernel_size):
+    pad = (kernel_size - 1) // 2
+    bin_img = F.pad(binary_img_batch, pad=[pad, pad, pad, pad], mode="reflect")
+    out = F.max_pool2d(bin_img, kernel_size=kernel_size, stride=1, padding=0)
+    out = out.squeeze()
+    return out
+# Pytorch3D based renderering functions, managed in a class
+# Render size is recommended to be the same as your latent view size
+# DO NOT USE "bilinear" sampling when you are handling latents.
+# Stable Diffusion has 4 latent channels so use channels=4
+class UVProjection:
+    def __init__(
+        self,
+        texture_size=96,
+        render_size=64,
+        sampling_mode="nearest",
+        channels=3,
+        device=None,
+    ):
+        self.channels = channels
+        self.device = device or torch.device("cpu")
+        self.lights = AmbientLights(
+            ambient_color=((1.0,) * channels,), device=self.device
+        )
+        self.target_size = (texture_size, texture_size)
+        self.render_size = render_size
+        self.sampling_mode = sampling_mode
+    # Load obj mesh, rescale the mesh to fit into the bounding box
+    def load_mesh(self, mesh, scale_factor=2.0, auto_center=True, autouv=False):
+        if isinstance(mesh, Trimesh):
+            vertices = torch.tensor(mesh.vertices, dtype=torch.float32).to(self.device)
+            faces = torch.tensor(mesh.faces, dtype=torch.int64).to(self.device)
+            mesh = Meshes(verts=[vertices], faces=[faces])
+            verts = mesh.verts_packed()
+            mesh = mesh.update_padded(verts[None, :, :])
+        elif isinstance(mesh, str) and os.path.isfile(mesh):
+            mesh = load_objs_as_meshes([mesh_path], device=self.device)
+            if auto_center:
+                verts = mesh.verts_packed()
+                max_bb = (verts - 0).max(0)[0]
+                min_bb = (verts - 0).min(0)[0]
+                scale = (max_bb - min_bb).max() / 2
+                center = (max_bb + min_bb) / 2
+                mesh.offset_verts_(-center)
+                mesh.scale_verts_((scale_factor / float(scale)))
+            else:
+                mesh.scale_verts_((scale_factor))
+        if autouv or (mesh.textures is None):
+            mesh = self.uv_unwrap(mesh)
+        self.mesh = mesh
+    def load_glb_mesh(
+        self, mesh_path, trimesh, scale_factor=1.0, auto_center=True, autouv=False
+    ):
+        from pytorch3d.io.experimental_gltf_io import MeshGlbFormat
+        io = IO()
+        io.register_meshes_format(MeshGlbFormat())
+        with open(mesh_path, "rb") as f:
+            mesh = io.load_mesh(f, include_textures=True, device=self.device)
+        if auto_center:
+            verts = mesh.verts_packed()
+            max_bb = (verts - 0).max(0)[0]
+            min_bb = (verts - 0).min(0)[0]
+            scale = (max_bb - min_bb).max() / 2
+            center = (max_bb + min_bb) / 2
+            mesh.offset_verts_(-center)
+            mesh.scale_verts_((scale_factor / float(scale)))
+            verts = mesh.verts_packed()
+            # T = torch.tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]], device=verts.device, dtype=verts.dtype)
+            # T = torch.tensor([[0, 0, 1], [0, 1, 0], [-1, 0, 0]], device=verts.device, dtype=verts.dtype)
+            # verts = verts @ T
+            mesh = mesh.update_padded(verts[None, :, :])
+        else:
+            mesh.scale_verts_((scale_factor))
+        if autouv or (mesh.textures is None):
+            mesh = self.uv_unwrap(mesh)
+        self.mesh = mesh
+    # Save obj mesh
+    def save_mesh(self, mesh_path, texture):
+        save_obj(
+            mesh_path,
+            self.mesh.verts_list()[0],
+            self.mesh.faces_list()[0],
+            verts_uvs=self.mesh.textures.verts_uvs_list()[0],
+            faces_uvs=self.mesh.textures.faces_uvs_list()[0],
+            texture_map=texture,
+        )
+    # Code referred to TEXTure code (https://github.com/TEXTurePaper/TEXTurePaper.git)
+    def uv_unwrap(self, mesh):
+        verts_list = mesh.verts_list()[0]
+        faces_list = mesh.faces_list()[0]
+        import xatlas
+        import numpy as np
+        v_np = verts_list.cpu().numpy()
+        f_np = faces_list.int().cpu().numpy()
+        atlas = xatlas.Atlas()
+        atlas.add_mesh(v_np, f_np)
+        chart_options = xatlas.ChartOptions()
+        chart_options.max_iterations = 4
+        atlas.generate(chart_options=chart_options)
+        vmapping, ft_np, vt_np = atlas[0]  # [N], [M, 3], [N, 2]
+        vt = (
+            torch.from_numpy(vt_np.astype(np.float32))
+            .type(verts_list.dtype)
+            .to(mesh.device)
+        )
+        ft = (
+            torch.from_numpy(ft_np.astype(np.int64))
+            .type(faces_list.dtype)
+            .to(mesh.device)
+        )
+        new_map = torch.zeros(self.target_size + (self.channels,), device=mesh.device)
+        new_tex = TexturesUV([new_map], [ft], [vt], sampling_mode=self.sampling_mode)
+        mesh.textures = new_tex
+        return mesh
+    """
+		A functions that disconnect faces in the mesh according to
+		its UV seams. The number of vertices are made equal to the
+		number of unique vertices its UV layout, while the faces list
+		is intact.
+	"""
+    def disconnect_faces(self):
+        mesh = self.mesh
+        verts_list = mesh.verts_list()
+        faces_list = mesh.faces_list()
+        verts_uvs_list = mesh.textures.verts_uvs_list()
+        faces_uvs_list = mesh.textures.faces_uvs_list()
+        packed_list = [v[f] for v, f in zip(verts_list, faces_list)]
+        verts_disconnect_list = [
+            torch.zeros(
+                (verts_uvs_list[i].shape[0], 3),
+                dtype=verts_list[0].dtype,
+                device=verts_list[0].device,
+            )
+            for i in range(len(verts_list))
+        ]
+        for i in range(len(verts_list)):
+            verts_disconnect_list[i][faces_uvs_list] = packed_list[i]
+        assert not mesh.has_verts_normals(), "Not implemented for vertex normals"
+        self.mesh_d = Meshes(verts_disconnect_list, faces_uvs_list, mesh.textures)
+        return self.mesh_d
+    """
+		A function that construct a temp mesh for back-projection.
+		Take a disconnected mesh and a rasterizer, the function calculates
+		the projected faces as the UV, as use its original UV with pseudo
+		z value as world space geometry.
+	"""
+    def construct_uv_mesh(self):
+        mesh = self.mesh_d
+        verts_list = mesh.verts_list()
+        verts_uvs_list = mesh.textures.verts_uvs_list()
+        # faces_list = [torch.flip(faces, [-1]) for faces in mesh.faces_list()]
+        new_verts_list = []
+        for i, (verts, verts_uv) in enumerate(zip(verts_list, verts_uvs_list)):
+            verts = verts.clone()
+            verts_uv = verts_uv.clone()
+            verts[..., 0:2] = verts_uv[..., :]
+            verts = (verts - 0.5) * 2
+            verts[..., 2] *= 1
+            new_verts_list.append(verts)
+        textures_uv = mesh.textures.clone()
+        self.mesh_uv = Meshes(new_verts_list, mesh.faces_list(), textures_uv)
+        return self.mesh_uv
+    # Set texture for the current mesh.
+    def set_texture_map(self, texture):
+        new_map = texture.permute(1, 2, 0)
+        new_map = new_map.to(self.device)
+        new_tex = TexturesUV(
+            [new_map],
+            self.mesh.textures.faces_uvs_padded(),
+            self.mesh.textures.verts_uvs_padded(),
+            sampling_mode=self.sampling_mode,
+        )
+        self.mesh.textures = new_tex
+    # Set the initial normal noise texture
+    # No generator here for replication of the experiment result. Add one as you wish
+    def set_noise_texture(self, channels=None):
+        if not channels:
+            channels = self.channels
+        noise_texture = torch.normal(
+            0, 1, (channels,) + self.target_size, device=self.device
+        )
+        self.set_texture_map(noise_texture)
+        return noise_texture
+    # Set the cameras given the camera poses and centers
+    def set_cameras(self, camera_poses, centers=None, camera_distance=2.7, scale=None):
+        elev = torch.FloatTensor([pose[0] for pose in camera_poses])
+        azim = torch.FloatTensor([pose[1] for pose in camera_poses])
+        print("camera_distance:{}".format(camera_distance))
+        R, T = look_at_view_transform(
+            dist=camera_distance, elev=elev, azim=azim, at=centers or ((0, 0, 0),)
+        )
+        # flip_mat = torch.from_numpy(np.diag([-1.0, 1.0, -1.0]) ).type(torch.FloatTensor).to(R.device)
+        # R = R@flip_mat
+        # R = R.permute(0, 2, 1)
+        # T = T*torch.from_numpy(np.array([-1.0, 1.0, -1.0])).type(torch.FloatTensor).to(R.device)
+        # print("v R size:{}, v T size:{}".format(R.size(), T.size()))
+        # c2w = self.get_c2w(elev, [camera_distance]*len(elev), azim)
+        # w2c = torch.linalg.inv(c2w)
+        # R, T= w2c[:, :3, :3], w2c[:, :3, 3]
+        print("R size:{}, T size:{}".format(R.size(), T.size()))
+        # self.cameras = CustomOrthographicCameras(device=self.device, R=R, T=T, scale_xyz=scale or ((1,1,1),), znear=0.1, min_x=-0.55, max_x=0.55, min_y=-0.55, max_y=0.55)
+        self.cameras = FoVOrthographicCameras(
+            device=self.device, R=R, T=T, scale_xyz=scale or ((1, 1, 1),)
+        )
+    # Set all necessary internal data for rendering and texture baking
+    # Can be used to refresh after changing camera positions
+    def set_cameras_and_render_settings(
+        self,
+        camera_poses,
+        centers=None,
+        camera_distance=2.7,
+        render_size=None,
+        scale=None,
+    ):
+        self.set_cameras(camera_poses, centers, camera_distance, scale=scale)
+        if render_size is None:
+            render_size = self.render_size
+        if not hasattr(self, "renderer"):
+            self.setup_renderer(size=render_size)
+        if not hasattr(self, "mesh_d"):
+            self.disconnect_faces()
+        if not hasattr(self, "mesh_uv"):
+            self.construct_uv_mesh()
+        self.calculate_tex_gradient()
+        self.calculate_visible_triangle_mask()
+        _, _, _, cos_maps, _, _ = self.render_geometry()
+        self.calculate_cos_angle_weights(cos_maps)
+    # Setup renderers for rendering
+    # max faces per bin set to 30000 to avoid overflow in many test cases.
+    # You can use default value to let pytorch3d handle that for you.
+    def setup_renderer(
+        self,
+        size=64,
+        blur=0.0,
+        face_per_pix=1,
+        perspective_correct=False,
+        channels=None,
+    ):
+        if not channels:
+            channels = self.channels
+        self.raster_settings = RasterizationSettings(
+            image_size=size,
+            blur_radius=blur,
+            faces_per_pixel=face_per_pix,
+            perspective_correct=perspective_correct,
+            cull_backfaces=True,
+            max_faces_per_bin=30000,
+        )
+        self.renderer = MeshRenderer(
+            rasterizer=MeshRasterizer(
+                cameras=self.cameras,
+                raster_settings=self.raster_settings,
+            ),
+            shader=HardNChannelFlatShader(
+                device=self.device,
+                cameras=self.cameras,
+                lights=self.lights,
+                channels=channels,
+                # materials=materials
+            ),
+        )
+    # Bake screen-space cosine weights to UV space
+    # May be able to reimplement using the generic "bake_texture" function, but it works so leave it here for now
+    @torch.enable_grad()
+    def calculate_cos_angle_weights(self, cos_angles, fill=True, channels=None):
+        if not channels:
+            channels = self.channels
+        cos_maps = []
+        tmp_mesh = self.mesh.clone()
+        for i in range(len(self.cameras)):
+            zero_map = torch.zeros(
+                self.target_size + (channels,), device=self.device, requires_grad=True
+            )
+            optimizer = torch.optim.SGD([zero_map], lr=1, momentum=0)
+            optimizer.zero_grad()
+            zero_tex = TexturesUV(
+                [zero_map],
+                self.mesh.textures.faces_uvs_padded(),
+                self.mesh.textures.verts_uvs_padded(),
+                sampling_mode=self.sampling_mode,
+            )
+            tmp_mesh.textures = zero_tex
+            images_predicted = self.renderer(
+                tmp_mesh, cameras=self.cameras[i], lights=self.lights
+            )
+            loss = torch.sum((cos_angles[i, :, :, 0:1] ** 1 - images_predicted) ** 2)
+            loss.backward()
+            optimizer.step()
+            if fill:
+                zero_map = zero_map.detach() / (self.gradient_maps[i] + 1e-8)
+                zero_map = voronoi_solve(
+                    zero_map, self.gradient_maps[i][..., 0], self.device
+                )
+            else:
+                zero_map = zero_map.detach() / (self.gradient_maps[i] + 1e-8)
+            cos_maps.append(zero_map)
+        self.cos_maps = cos_maps
+    # Get geometric info from fragment shader
+    # Can be used for generating conditioning image and cosine weights
+    # Returns some information you may not need, remember to release them for memory saving
+    @torch.no_grad()
+    def render_geometry(self, image_size=None):
+        if image_size:
+            size = self.renderer.rasterizer.raster_settings.image_size
+            self.renderer.rasterizer.raster_settings.image_size = image_size
+        shader = self.renderer.shader
+        self.renderer.shader = HardGeometryShader(
+            device=self.device, cameras=self.cameras[0], lights=self.lights
+        )
+        tmp_mesh = self.mesh.clone()
+        verts, normals, depths, cos_angles, texels, fragments = self.renderer(
+            tmp_mesh.extend(len(self.cameras)), cameras=self.cameras, lights=self.lights
+        )
+        self.renderer.shader = shader
+        if image_size:
+            self.renderer.rasterizer.raster_settings.image_size = size
+        return verts, normals, depths, cos_angles, texels, fragments
+    # Project world normal to view space and normalize
+    @torch.no_grad()
+    def decode_view_normal(self, normals):
+        w2v_mat = self.cameras.get_full_projection_transform()
+        normals_view = torch.clone(normals)[:, :, :, 0:3]
+        normals_view = normals_view.reshape(normals_view.shape[0], -1, 3)
+        normals_view = w2v_mat.transform_normals(normals_view)
+        normals_view = normals_view.reshape(normals.shape[0:3] + (3,))
+        normals_view[:, :, :, 2] *= -1
+        normals = (normals_view[..., 0:3] + 1) * normals[
+            ..., 3:
+        ] / 2 + torch.FloatTensor(((((0.5, 0.5, 1))))).to(self.device) * (
+            1 - normals[..., 3:]
+        )
+        # normals = torch.cat([normal for normal in normals], dim=1)
+        normals = normals.clamp(0, 1)
+        return normals
+    # Normalize absolute depth to inverse depth
+    @torch.no_grad()
+    def decode_normalized_depth(self, depths, batched_norm=False):
+        view_z, mask = depths.unbind(-1)
+        view_z = view_z * mask + 100 * (1 - mask)
+        inv_z = 1 / view_z
+        inv_z_min = inv_z * mask + 100 * (1 - mask)
+        if not batched_norm:
+            max_ = torch.max(inv_z, 1, keepdim=True)
+            max_ = torch.max(max_[0], 2, keepdim=True)[0]
+            min_ = torch.min(inv_z_min, 1, keepdim=True)
+            min_ = torch.min(min_[0], 2, keepdim=True)[0]
+        else:
+            max_ = torch.max(inv_z)
+            min_ = torch.min(inv_z_min)
+        inv_z = (inv_z - min_) / (max_ - min_)
+        inv_z = inv_z.clamp(0, 1)
+        inv_z = inv_z[..., None].repeat(1, 1, 1, 3)
+        return inv_z
+    # Multiple screen pixels could pass gradient to a same texel
+    # We can precalculate this gradient strength and use it to normalize gradients when we bake textures
+    @torch.enable_grad()
+    def calculate_tex_gradient(self, channels=None):
+        if not channels:
+            channels = self.channels
+        tmp_mesh = self.mesh.clone()
+        gradient_maps = []
+        for i in range(len(self.cameras)):
+            zero_map = torch.zeros(
+                self.target_size + (channels,), device=self.device, requires_grad=True
+            )
+            optimizer = torch.optim.SGD([zero_map], lr=1, momentum=0)
+            optimizer.zero_grad()
+            zero_tex = TexturesUV(
+                [zero_map],
+                self.mesh.textures.faces_uvs_padded(),
+                self.mesh.textures.verts_uvs_padded(),
+                sampling_mode=self.sampling_mode,
+            )
+            tmp_mesh.textures = zero_tex
+            images_predicted = self.renderer(
+                tmp_mesh, cameras=self.cameras[i], lights=self.lights
+            )
+            loss = torch.sum((1 - images_predicted) ** 2)
+            loss.backward()
+            optimizer.step()
+            gradient_maps.append(zero_map.detach())
+        self.gradient_maps = gradient_maps
+    # Get the UV space masks of triangles visible in each view
+    # First get face ids from each view, then filter pixels on UV space to generate masks
+    @torch.no_grad()
+    def get_c2w(
+        self,
+        elevation_deg: LIST_TYPE,
+        distance: LIST_TYPE,
+        azimuth_deg: Optional[LIST_TYPE],
+        num_views: Optional[int] = 1,
+        device: Optional[str] = None,
+    ) -> torch.FloatTensor:
+        if azimuth_deg is None:
+            assert (
+                num_views is not None
+            ), "num_views must be provided if azimuth_deg is None."
+            azimuth_deg = torch.linspace(
+                0, 360, num_views + 1, dtype=torch.float32, device=device
+            )[:-1]
+        else:
+            num_views = len(azimuth_deg)
+        def list_to_pt(
+            x: LIST_TYPE,
+            dtype: Optional[torch.dtype] = None,
+            device: Optional[str] = None,
+        ) -> torch.Tensor:
+            if isinstance(x, list) or isinstance(x, np.ndarray):
+                return torch.tensor(x, dtype=dtype, device=device)
+            return x.to(dtype=dtype)
+        azimuth_deg = list_to_pt(azimuth_deg, dtype=torch.float32, device=device)
+        elevation_deg = list_to_pt(elevation_deg, dtype=torch.float32, device=device)
+        camera_distances = list_to_pt(distance, dtype=torch.float32, device=device)
+        elevation = elevation_deg * math.pi / 180
+        azimuth = azimuth_deg * math.pi / 180
+        camera_positions = torch.stack(
+            [
+                camera_distances * torch.cos(elevation) * torch.cos(azimuth),
+                camera_distances * torch.cos(elevation) * torch.sin(azimuth),
+                camera_distances * torch.sin(elevation),
+            ],
+            dim=-1,
+        )
+        center = torch.zeros_like(camera_positions)
+        up = torch.tensor([0, 0, 1], dtype=torch.float32, device=device)[
+            None, :
+        ].repeat(num_views, 1)
+        lookat = F.normalize(center - camera_positions, dim=-1)
+        right = F.normalize(torch.cross(lookat, up, dim=-1), dim=-1)
+        up = F.normalize(torch.cross(right, lookat, dim=-1), dim=-1)
+        c2w3x4 = torch.cat(
+            [torch.stack([right, up, -lookat], dim=-1), camera_positions[:, :, None]],
+            dim=-1,
+        )
+        c2w = torch.cat([c2w3x4, torch.zeros_like(c2w3x4[:, :1])], dim=1)
+        c2w[:, 3, 3] = 1.0
+        return c2w
+    @torch.no_grad()
+    def calculate_visible_triangle_mask(self, channels=None, image_size=(512, 512)):
+        if not channels:
+            channels = self.channels
+        pix2face_list = []
+        for i in range(len(self.cameras)):
+            self.renderer.rasterizer.raster_settings.image_size = image_size
+            pix2face = self.renderer.rasterizer(
+                self.mesh_d, cameras=self.cameras[i]
+            ).pix_to_face
+            self.renderer.rasterizer.raster_settings.image_size = self.render_size
+            pix2face_list.append(pix2face)
+        if not hasattr(self, "mesh_uv"):
+            self.construct_uv_mesh()
+        raster_settings = RasterizationSettings(
+            image_size=self.target_size,
+            blur_radius=0,
+            faces_per_pixel=1,
+            perspective_correct=False,
+            cull_backfaces=False,
+            max_faces_per_bin=30000,
+        )
+        R, T = look_at_view_transform(dist=2, elev=0, azim=0)
+        # flip_mat = torch.from_numpy(np.diag([-1.0, 1.0, -1.0]) ).type(torch.FloatTensor).to(R.device)
+        # R = R@flip_mat
+        # T = T*torch.tensor(np.array([-1.0, 1.0, -1.0])).type(torch.FloatTensor).to(R.device)
+        # c2w = self.get_c2w([0], [1.8], [0])
+        # w2c = torch.linalg.inv(c2w)[:, :3,:]
+        # R, T= w2c[:, :3,:3], w2c[:, :3, 3]
+        # print("R size:{}, T size:{}".format(R.size(), T.size()))
+        cameras = FoVOrthographicCameras(device=self.device, R=R, T=T)
+        # cameras = CustomOrthographicCameras(device=self.device, R=R, T=T)
+        # cameras = CustomOrthographicCameras(device=self.device, R=R, T=T, znear=0.1, min_x=-0.55, max_x=0.55, min_y=-0.55, max_y=0.55)
+        rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings)
+        uv_pix2face = rasterizer(self.mesh_uv).pix_to_face
+        visible_triangles = []
+        for i in range(len(pix2face_list)):
+            valid_faceid = torch.unique(pix2face_list[i])
+            valid_faceid = valid_faceid[1:] if valid_faceid[0] == -1 else valid_faceid
+            mask = torch.isin(uv_pix2face[0], valid_faceid, assume_unique=False)
+            # uv_pix2face[0][~mask] = -1
+            triangle_mask = torch.ones(self.target_size + (1,), device=self.device)
+            triangle_mask[~mask] = 0
+            triangle_mask[:, 1:][triangle_mask[:, :-1] > 0] = 1
+            triangle_mask[:, :-1][triangle_mask[:, 1:] > 0] = 1
+            triangle_mask[1:, :][triangle_mask[:-1, :] > 0] = 1
+            triangle_mask[:-1, :][triangle_mask[1:, :] > 0] = 1
+            visible_triangles.append(triangle_mask)
+        self.visible_triangles = visible_triangles
+    # Render the current mesh and texture from current cameras
+    def render_textured_views(self):
+        meshes = self.mesh.extend(len(self.cameras))
+        images_predicted = self.renderer(
+            meshes, cameras=self.cameras, lights=self.lights
+        )
+        return [image.permute(2, 0, 1) for image in images_predicted]
+    @torch.no_grad()
+    def get_point_validation_by_o3d(
+        self, points, eye_position, hidden_point_removal_radius=200
+    ):
+        point_visibility = torch.zeros((points.shape[0]), device=points.device).bool()
+        pcd = o3d.geometry.PointCloud(
+            points=o3d.utility.Vector3dVector(points.cpu().numpy())
+        )
+        camera_pose = (
+            eye_position.get_camera_center().squeeze().cpu().numpy().astype(np.float64)
+        )
+        # o3d_camera = [0, 0, diameter]
+        diameter = np.linalg.norm(
+            np.asarray(pcd.get_max_bound()) - np.asarray(pcd.get_min_bound())
+        )
+        radius = diameter * 200  # The radius of the sperical projection
+        _, pt_map = pcd.hidden_point_removal(camera_pose, radius)
+        visible_point_ids = np.array(pt_map)
+        point_visibility[visible_point_ids] = True
+        return point_visibility
+    @torch.no_grad()
+    def hidden_judge(self, camera, texture_dim):
+        mesh = self.mesh
+        verts = mesh.verts_packed()
+        faces = mesh.faces_packed()
+        verts_uv = mesh.textures.verts_uvs_padded()[0]  # 获取打包后的 UV 坐标 (V, 2)
+        faces_uv = mesh.textures.faces_uvs_padded()[0]
+        uv_face_attr = torch.index_select(
+            verts_uv, 0, faces_uv.view(-1)
+        )  # 选择对应顶点的 UV 坐标
+        uv_face_attr = uv_face_attr.view(
+            faces.shape[0], faces_uv.shape[1], 2
+        ).unsqueeze(0)
+        x, y, z = verts[:, 0], verts[:, 1], verts[:, 2]
+        mesh_out_of_range = False
+        if (
+            x.min() < -1
+            or x.max() > 1
+            or y.min() < -1
+            or y.max() > 1
+            or z.min() < -1
+            or z.max() > 1
+        ):
+            mesh_out_of_range = True
+        face_vertices_world = kal.ops.mesh.index_vertices_by_faces(
+            verts.unsqueeze(0), faces
+        )
+        face_vertices_z = torch.zeros_like(
+            face_vertices_world[:, :, :, -1], device=verts.device
+        )
+        uv_position, face_idx = kal.render.mesh.rasterize(
+            texture_dim,
+            texture_dim,
+            face_vertices_z,
+            uv_face_attr * 2 - 1,
+            face_features=face_vertices_world,
+        )
+        uv_position = torch.clamp(uv_position, -1, 1)
+        uv_position[face_idx == -1] = 0
+        points = uv_position.reshape(-1, 3)
+        mask = points[:, 0] != 0
+        valid_points = points[mask]
+        # np.save("tmp/pcd.npy", valid_points.cpu().numpy())
+        # print(camera.get_camera_center())
+        points_visibility = self.get_point_validation_by_o3d(
+            valid_points, camera
+        ).float()
+        visibility_map = torch.zeros((texture_dim * texture_dim,)).to(self.device)
+        visibility_map[mask] = points_visibility
+        visibility_map = visibility_map.reshape((texture_dim, texture_dim))
+        return visibility_map
+    @torch.enable_grad()
+    def bake_texture(
+        self,
+        views=None,
+        main_views=[],
+        cos_weighted=True,
+        channels=None,
+        exp=None,
+        noisy=False,
+        generator=None,
+        smooth_colorize=False,
+    ):
+        if not exp:
+            exp = 1
+        if not channels:
+            channels = self.channels
+        views = [view.permute(1, 2, 0) for view in views]
+        tmp_mesh = self.mesh
+        bake_maps = [
+            torch.zeros(
+                self.target_size + (views[0].shape[2],),
+                device=self.device,
+                requires_grad=True,
+            )
+            for view in views
+        ]
+        optimizer = torch.optim.SGD(bake_maps, lr=1, momentum=0)
+        optimizer.zero_grad()
+        loss = 0
+        for i in range(len(self.cameras)):
+            bake_tex = TexturesUV(
+                [bake_maps[i]],
+                tmp_mesh.textures.faces_uvs_padded(),
+                tmp_mesh.textures.verts_uvs_padded(),
+                sampling_mode=self.sampling_mode,
+            )
+            tmp_mesh.textures = bake_tex
+            images_predicted = self.renderer(
+                tmp_mesh,
+                cameras=self.cameras[i],
+                lights=self.lights,
+                device=self.device,
+            )
+            predicted_rgb = images_predicted[..., :-1]
+            loss += (((predicted_rgb[...] - views[i])) ** 2).sum()
+        loss.backward(retain_graph=False)
+        optimizer.step()
+        total_weights = 0
+        baked = 0
+        for i in range(len(bake_maps)):
+            normalized_baked_map = bake_maps[i].detach() / (
+                self.gradient_maps[i] + 1e-8
+            )
+            bake_map = voronoi_solve(
+                normalized_baked_map, self.gradient_maps[i][..., 0], self.device
+            )
+            # bake_map = voronoi_solve(normalized_baked_map, self.visible_triangles[i].squeeze())
+            weight = self.visible_triangles[i] * (self.cos_maps[i]) ** exp
+            if smooth_colorize:
+                visibility_map = self.hidden_judge(
+                    self.cameras[i], self.target_size[0]
+                ).unsqueeze(-1)
+                weight *= visibility_map
+            if noisy:
+                noise = (
+                    torch.rand(weight.shape[:-1] + (1,), generator=generator)
+                    .type(weight.dtype)
+                    .to(weight.device)
+                )
+                weight *= noise
+            total_weights += weight
+            baked += bake_map * weight
+        baked /= total_weights + 1e-8
+        whole_visible_mask = None
+        if not smooth_colorize:
+            baked = voronoi_solve(baked, total_weights[..., 0], self.device)
+            tmp_mesh.textures = TexturesUV(
+                [baked],
+                tmp_mesh.textures.faces_uvs_padded(),
+                tmp_mesh.textures.verts_uvs_padded(),
+                sampling_mode=self.sampling_mode,
+            )
+        else:  # smooth colorize
+            baked = voronoi_solve(baked, total_weights[..., 0], self.device)
+            whole_visible_mask = self.visible_triangles[0].to(torch.int32)
+            for tensor in self.visible_triangles[1:]:
+                whole_visible_mask = torch.bitwise_or(
+                    whole_visible_mask, tensor.to(torch.int32)
+                )
+            baked *= whole_visible_mask
+            tmp_mesh.textures = TexturesUV(
+                [baked],
+                tmp_mesh.textures.faces_uvs_padded(),
+                tmp_mesh.textures.verts_uvs_padded(),
+                sampling_mode=self.sampling_mode,
+            )
+        extended_mesh = tmp_mesh.extend(len(self.cameras))
+        images_predicted = self.renderer(
+            extended_mesh, cameras=self.cameras, lights=self.lights
+        )
+        learned_views = [image.permute(2, 0, 1) for image in images_predicted]
+        return learned_views, baked.permute(2, 0, 1), total_weights.permute(2, 0, 1)
+    # Move the internel data to a specific device
+    def to(self, device):
+        for mesh_name in ["mesh", "mesh_d", "mesh_uv"]:
+            if hasattr(self, mesh_name):
+                mesh = getattr(self, mesh_name)
+                setattr(self, mesh_name, mesh.to(device))
+        for list_name in ["visible_triangles", "visibility_maps", "cos_maps"]:
+            if hasattr(self, list_name):
+                map_list = getattr(self, list_name)
+                for i in range(len(map_list)):
+                    map_list[i] = map_list[i].to(device)

step1x3d_texture/renderer/shader.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from typing import Optional
+import torch
+import pytorch3d
+from pytorch3d.io import load_objs_as_meshes, load_obj, save_obj
+from pytorch3d.ops import interpolate_face_attributes
+from pytorch3d.structures import Meshes
+from pytorch3d.renderer import (
+    look_at_view_transform,
+    FoVPerspectiveCameras,
+    AmbientLights,
+    PointLights,
+    DirectionalLights,
+    Materials,
+    RasterizationSettings,
+    MeshRenderer,
+    MeshRasterizer,
+    SoftPhongShader,
+    SoftSilhouetteShader,
+    HardPhongShader,
+    TexturesVertex,
+    TexturesUV,
+    Materials,
+)
+from pytorch3d.renderer.blending import BlendParams, hard_rgb_blend
+from pytorch3d.renderer.utils import convert_to_tensors_and_broadcast, TensorProperties
+from pytorch3d.renderer.lighting import AmbientLights
+from pytorch3d.renderer.materials import Materials
+from pytorch3d.renderer.mesh.shader import ShaderBase
+from pytorch3d.renderer.mesh.shading import _apply_lighting, flat_shading
+from pytorch3d.renderer.mesh.rasterizer import Fragments
+"""
+	Customized the original pytorch3d hard flat shader to support N channel flat shading
+"""
+class HardNChannelFlatShader(ShaderBase):
+    """
+    Per face lighting - the lighting model is applied using the average face
+    position and the face normal. The blending function hard assigns
+    the color of the closest face for each pixel.
+    To use the default values, simply initialize the shader with the desired
+    device e.g.
+    .. code-block::
+            shader = HardFlatShader(device=torch.device("cuda:0"))
+    """
+    def __init__(
+        self,
+        device="cpu",
+        cameras: Optional[TensorProperties] = None,
+        lights: Optional[TensorProperties] = None,
+        materials: Optional[Materials] = None,
+        blend_params: Optional[BlendParams] = None,
+        channels: int = 3,
+    ):
+        self.channels = channels
+        ones = ((1.0,) * channels,)
+        zeros = ((0.0,) * channels,)
+        if (
+            not isinstance(lights, AmbientLights)
+            or not lights.ambient_color.shape[-1] == channels
+        ):
+            lights = AmbientLights(
+                ambient_color=ones,
+                device=device,
+            )
+        if not materials or not materials.ambient_color.shape[-1] == channels:
+            materials = Materials(
+                device=device,
+                diffuse_color=zeros,
+                ambient_color=ones,
+                specular_color=zeros,
+                shininess=0.0,
+            )
+        blend_params_new = BlendParams(background_color=(1.0,) * channels)
+        if not isinstance(blend_params, BlendParams):
+            blend_params = blend_params_new
+        else:
+            background_color_ = blend_params.background_color
+            if (
+                isinstance(background_color_, Sequence[float])
+                and not len(background_color_) == channels
+            ):
+                blend_params = blend_params_new
+            if (
+                isinstance(background_color_, torch.Tensor)
+                and not background_color_.shape[-1] == channels
+            ):
+                blend_params = blend_params_new
+        super().__init__(
+            device,
+            cameras,
+            lights,
+            materials,
+            blend_params,
+        )
+    def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
+        cameras = super()._get_cameras(**kwargs)
+        texels = meshes.sample_textures(fragments)
+        lights = kwargs.get("lights", self.lights)
+        materials = kwargs.get("materials", self.materials)
+        blend_params = kwargs.get("blend_params", self.blend_params)
+        colors = flat_shading(
+            meshes=meshes,
+            fragments=fragments,
+            texels=texels,
+            lights=lights,
+            cameras=cameras,
+            materials=materials,
+        )
+        images = hard_rgb_blend(colors, fragments, blend_params)
+        return images

step1x3d_texture/{texture_sync → renderer}/voronoi.py RENAMED Viewed

File without changes

step1x3d_texture/texture_sync/geometry.py DELETED Viewed

@@ -1,141 +0,0 @@
-import torch
-import pytorch3d
-import torch.nn.functional as F
-from pytorch3d.ops import interpolate_face_attributes
-from pytorch3d.renderer import (
-	look_at_view_transform,
-	FoVPerspectiveCameras,
-	AmbientLights,
-	PointLights,
-	DirectionalLights,
-	Materials,
-	RasterizationSettings,
-	MeshRenderer,
-	MeshRasterizer,
-	SoftPhongShader,
-	SoftSilhouetteShader,
-	HardPhongShader,
-	TexturesVertex,
-	TexturesUV,
-	Materials,
-)
-from pytorch3d.renderer.blending import BlendParams,hard_rgb_blend
-from pytorch3d.renderer.utils import convert_to_tensors_and_broadcast, TensorProperties
-from pytorch3d.renderer.mesh.shader import ShaderBase
-def get_cos_angle(
-	points, normals, camera_position
-):
-	'''
-		calculate cosine similarity between view->surface and surface normal.
-	'''
-	if points.shape != normals.shape:
-		msg = "Expected points and normals to have the same shape: got %r, %r"
-		raise ValueError(msg % (points.shape, normals.shape))
-	# Ensure all inputs have same batch dimension as points
-	matched_tensors = convert_to_tensors_and_broadcast(
-		points, camera_position, device=points.device
-	)
-	_, camera_position = matched_tensors
-	# Reshape direction and color so they have all the arbitrary intermediate
-	# dimensions as points. Assume first dim = batch dim and last dim = 3.
-	points_dims = points.shape[1:-1]
-	expand_dims = (-1,) + (1,) * len(points_dims)
-	if camera_position.shape != normals.shape:
-		camera_position = camera_position.view(expand_dims + (3,))
-	normals = F.normalize(normals, p=2, dim=-1, eps=1e-6)
-	# Calculate the cosine value.
-	view_direction = camera_position - points
-	view_direction = F.normalize(view_direction, p=2, dim=-1, eps=1e-6)
-	cos_angle = torch.sum(view_direction * normals, dim=-1, keepdim=True)
-	cos_angle = cos_angle.clamp(0, 1)
-	# Cosine of the angle between the reflected light ray and the viewer
-	return cos_angle
-def _geometry_shading_with_pixels(
-	meshes, fragments, lights, cameras, materials, texels
-):
-	"""
-	Render pixel space vertex position, normal(world), depth, and cos angle
-	Args:
-		meshes: Batch of meshes
-		fragments: Fragments named tuple with the outputs of rasterization
-		lights: Lights class containing a batch of lights
-		cameras: Cameras class containing a batch of cameras
-		materials: Materials class containing a batch of material properties
-		texels: texture per pixel of shape (N, H, W, K, 3)
-	Returns:
-		colors: (N, H, W, K, 3)
-		pixel_coords: (N, H, W, K, 3), camera coordinates of each intersection.
-	"""
-	verts = meshes.verts_packed()  # (V, 3)
-	faces = meshes.faces_packed()  # (F, 3)
-	vertex_normals = meshes.verts_normals_packed()  # (V, 3)
-	faces_verts = verts[faces]
-	faces_normals = vertex_normals[faces]
-	pixel_coords_in_camera = interpolate_face_attributes(
-		fragments.pix_to_face, fragments.bary_coords, faces_verts
-	)
-	pixel_normals = interpolate_face_attributes(
-		fragments.pix_to_face, fragments.bary_coords, faces_normals
-	)
-	cos_angles = get_cos_angle(pixel_coords_in_camera, pixel_normals, cameras.get_camera_center())
-	return pixel_coords_in_camera, pixel_normals, fragments.zbuf[...,None], cos_angles
-class HardGeometryShader(ShaderBase):
-	"""
-	renders common geometric informations.
-	"""
-	def forward(self, fragments, meshes, **kwargs):
-		cameras = super()._get_cameras(**kwargs)
-		texels = self.texel_from_uv(fragments, meshes)
-		lights = kwargs.get("lights", self.lights)
-		materials = kwargs.get("materials", self.materials)
-		blend_params = kwargs.get("blend_params", self.blend_params)
-		verts, normals, depths, cos_angles = _geometry_shading_with_pixels(
-			meshes=meshes,
-			fragments=fragments,
-			texels=texels,
-			lights=lights,
-			cameras=cameras,
-			materials=materials,
-		)
-		verts = hard_rgb_blend(verts, fragments, blend_params)
-		normals = hard_rgb_blend(normals, fragments, blend_params)
-		depths = hard_rgb_blend(depths, fragments, blend_params)
-		cos_angles = hard_rgb_blend(cos_angles, fragments, blend_params)
-		texels = hard_rgb_blend(texels, fragments, blend_params)
-		return verts, normals, depths, cos_angles, texels, fragments
-	def texel_from_uv(self, fragments, meshes):
-		texture_tmp = meshes.textures
-		maps_tmp = texture_tmp.maps_padded()
-		uv_color = [ [[1,0],[1,1]],[[0,0],[0,1]] ]
-		uv_color = torch.FloatTensor(uv_color).to(maps_tmp[0].device).type(maps_tmp[0].dtype)
-		uv_texture = TexturesUV([uv_color.clone() for t in maps_tmp], texture_tmp.faces_uvs_padded(), texture_tmp.verts_uvs_padded(), sampling_mode="bilinear")
-		meshes.textures = uv_texture
-		texels = meshes.sample_textures(fragments)
-		meshes.textures = texture_tmp
-		texels  = torch.cat((texels, texels[...,-1:]*0), dim=-1)
-		return texels

step1x3d_texture/texture_sync/project.py DELETED Viewed

@@ -1,521 +0,0 @@
-import torch
-import pytorch3d
-from pytorch3d.io import load_objs_as_meshes, load_obj, save_obj, IO
-from pytorch3d.structures import Meshes
-from pytorch3d.renderer import (
-	look_at_view_transform,
-	FoVPerspectiveCameras,
-	FoVOrthographicCameras,
-	AmbientLights,
-	PointLights,
-	DirectionalLights,
-	Materials,
-	RasterizationSettings,
-	MeshRenderer,
-	MeshRasterizer,
-	TexturesUV
-)
-from .geometry import HardGeometryShader
-from .shader import HardNChannelFlatShader
-from .voronoi import voronoi_solve
-from trimesh import Trimesh
-# Pytorch3D based renderering functions, managed in a class
-# Render size is recommended to be the same as your latent view size
-# DO NOT USE "bilinear" sampling when you are handling latents.
-# Stable Diffusion has 4 latent channels so use channels=4
-class UVProjection():
-	def __init__(self, texture_size=96, render_size=64, sampling_mode="nearest", channels=3, device=None):
-		self.channels = channels
-		self.device = device or torch.device("cpu")
-		self.lights = AmbientLights(ambient_color=((1.0,)*channels,), device=self.device)
-		self.target_size = (texture_size,texture_size)
-		self.render_size = render_size
-		self.sampling_mode = sampling_mode
-	# # Load obj mesh, rescale the mesh to fit into the bounding box
-	# def load_mesh(self, mesh_path, scale_factor=2.0, auto_center=True, autouv=False):
-	# 	mesh = load_objs_as_meshes([mesh_path], device=self.device)
-	# 	if auto_center:
-	# 		verts = mesh.verts_packed()
-	# 		max_bb = (verts - 0).max(0)[0]
-	# 		min_bb = (verts - 0).min(0)[0]
-	# 		scale = (max_bb - min_bb).max()/2
-	# 		center = (max_bb+min_bb) /2
-	# 		mesh.offset_verts_(-center)
-	# 		mesh.scale_verts_((scale_factor / float(scale)))
-	# 	else:
-	# 		mesh.scale_verts_((scale_factor))
-	# 	if autouv or (mesh.textures is None):
-	# 		mesh = self.uv_unwrap(mesh)
-	# 	self.mesh = mesh
-		# Load obj mesh, rescale the mesh to fit into the bounding box
-	def load_mesh(self, mesh, scale_factor=2.0, auto_center=True, autouv=False, normals=None):
-		if isinstance(mesh, Trimesh):
-			vertices = torch.tensor(mesh.vertices, dtype=torch.float32).to(self.device)
-			faces = torch.tensor(mesh.faces, dtype=torch.int64).to(self.device)
-			if faces.ndim == 1:
-				faces = faces.unsqueeze(0)
-			mesh = Meshes(
-				verts=[vertices],
-				faces=[faces]
-			)
-			verts = mesh.verts_packed()
-			mesh = mesh.update_padded(verts[None,:, :])
-			# from pytorch3d.renderer.mesh.textures import TexturesVertex
-			# if normals is None:
-			# 	normals = mesh.verts_normals_packed()
-			# # set normals as vertext colors
-			# mesh.textures = TexturesVertex(verts_features=[normals / 2 + 0.5])
-		elif isinstance(mesh, str) and os.path.isfile(mesh):
-			mesh = load_objs_as_meshes([mesh_path], device=self.device)
-			if auto_center:
-				verts = mesh.verts_packed()
-				max_bb = (verts - 0).max(0)[0]
-				min_bb = (verts - 0).min(0)[0]
-				scale = (max_bb - min_bb).max()/2
-				center = (max_bb+min_bb) /2
-				mesh.offset_verts_(-center)
-				mesh.scale_verts_((scale_factor / float(scale)))
-			else:
-				mesh.scale_verts_((scale_factor))
-		if autouv or (mesh.textures is None):
-			mesh = self.uv_unwrap(mesh)
-		self.mesh = mesh
-	def load_glb_mesh(self, mesh_path, scale_factor=2.0, auto_center=True, autouv=False):
-		from pytorch3d.io.experimental_gltf_io import MeshGlbFormat
-		io = IO()
-		io.register_meshes_format(MeshGlbFormat())
-		with open(mesh_path, "rb") as f:
-			mesh = io.load_mesh(f, include_textures=True, device=self.device)
-		if auto_center:
-			verts = mesh.verts_packed()
-			max_bb = (verts - 0).max(0)[0]
-			min_bb = (verts - 0).min(0)[0]
-			scale = (max_bb - min_bb).max()/2
-			center = (max_bb+min_bb) /2
-			mesh.offset_verts_(-center)
-			mesh.scale_verts_((scale_factor / float(scale)))
-		else:
-			mesh.scale_verts_((scale_factor))
-		if autouv or (mesh.textures is None):
-			mesh = self.uv_unwrap(mesh)
-		self.mesh = mesh
-	# Save obj mesh
-	def save_mesh(self, mesh_path, texture):
-		save_obj(mesh_path,
-				self.mesh.verts_list()[0],
-				self.mesh.faces_list()[0],
-				verts_uvs= self.mesh.textures.verts_uvs_list()[0],
-				faces_uvs= self.mesh.textures.faces_uvs_list()[0],
-				texture_map=texture)
-	# Code referred to TEXTure code (https://github.com/TEXTurePaper/TEXTurePaper.git)
-	def uv_unwrap(self, mesh):
-		verts_list = mesh.verts_list()[0]
-		faces_list = mesh.faces_list()[0]
-		import xatlas
-		import numpy as np
-		v_np = verts_list.cpu().numpy()
-		f_np = faces_list.int().cpu().numpy()
-		atlas = xatlas.Atlas()
-		atlas.add_mesh(v_np, f_np)
-		chart_options = xatlas.ChartOptions()
-		chart_options.max_iterations = 4
-		atlas.generate(chart_options=chart_options)
-		vmapping, ft_np, vt_np = atlas[0]  # [N], [M, 3], [N, 2]
-		vt = torch.from_numpy(vt_np.astype(np.float32)).type(verts_list.dtype).to(mesh.device)
-		ft = torch.from_numpy(ft_np.astype(np.int64)).type(faces_list.dtype).to(mesh.device)
-		new_map = torch.zeros(self.target_size+(self.channels,), device=mesh.device)
-		new_tex = TexturesUV(
-			[new_map],
-			[ft],
-			[vt],
-			sampling_mode=self.sampling_mode
-			)
-		mesh.textures = new_tex
-		return mesh
-	'''
-		A functions that disconnect faces in the mesh according to
-		its UV seams. The number of vertices are made equal to the
-		number of unique vertices its UV layout, while the faces list
-		is intact.
-	'''
-	def disconnect_faces(self):
-		mesh = self.mesh
-		verts_list = mesh.verts_list()
-		faces_list = mesh.faces_list()
-		verts_uvs_list = mesh.textures.verts_uvs_list()
-		faces_uvs_list = mesh.textures.faces_uvs_list()
-		packed_list = [v[f] for v,f in zip(verts_list, faces_list)]
-		verts_disconnect_list = [
-			torch.zeros(
-				(verts_uvs_list[i].shape[0], 3),
-				dtype=verts_list[0].dtype,
-				device=verts_list[0].device
-			)
-			for i in range(len(verts_list))]
-		for i in range(len(verts_list)):
-			verts_disconnect_list[i][faces_uvs_list] = packed_list[i]
-		assert not mesh.has_verts_normals(), "Not implemented for vertex normals"
-		self.mesh_d = Meshes(verts_disconnect_list, faces_uvs_list, mesh.textures)
-		return self.mesh_d
-	'''
-		A function that construct a temp mesh for back-projection.
-		Take a disconnected mesh and a rasterizer, the function calculates
-		the projected faces as the UV, as use its original UV with pseudo
-		z value as world space geometry.
-	'''
-	def construct_uv_mesh(self):
-		mesh = self.mesh_d
-		verts_list = mesh.verts_list()
-		verts_uvs_list = mesh.textures.verts_uvs_list()
-		# faces_list = [torch.flip(faces, [-1]) for faces in mesh.faces_list()]
-		new_verts_list = []
-		for i, (verts, verts_uv) in enumerate(zip(verts_list, verts_uvs_list)):
-			verts = verts.clone()
-			verts_uv = verts_uv.clone()
-			verts[...,0:2] = verts_uv[...,:]
-			verts = (verts - 0.5) * 2
-			verts[...,2] *= 1
-			new_verts_list.append(verts)
-		textures_uv = mesh.textures.clone()
-		self.mesh_uv = Meshes(new_verts_list, mesh.faces_list(), textures_uv)
-		return self.mesh_uv
-	# Set texture for the current mesh.
-	def set_texture_map(self, texture):
-		new_map = texture.permute(1, 2, 0)
-		new_map = new_map.to(self.device)
-		new_tex = TexturesUV(
-			[new_map],
-			self.mesh.textures.faces_uvs_padded(),
-			self.mesh.textures.verts_uvs_padded(),
-			sampling_mode=self.sampling_mode
-			)
-		self.mesh.textures = new_tex
-	# Set the initial normal noise texture
-	# No generator here for replication of the experiment result. Add one as you wish
-	def set_noise_texture(self, channels=None):
-		if not channels:
-			channels = self.channels
-		noise_texture = torch.normal(0, 1, (channels,) + self.target_size, device=self.device)
-		self.set_texture_map(noise_texture)
-		return noise_texture
-	# Set the cameras given the camera poses and centers
-	def set_cameras(self, camera_poses, centers=None, camera_distance=2.7, scale=None):
-		elev = torch.FloatTensor([pose[0] for pose in camera_poses])
-		azim = torch.FloatTensor([pose[1] for pose in camera_poses])
-		R, T = look_at_view_transform(dist=camera_distance, elev=elev, azim=azim, at=centers or ((0,0,0),))
-		# self.cameras = FoVOrthographicCameras(device=self.device, R=R, T=T, scale_xyz=scale or ((1,1,1),))
-		self.cameras = FoVOrthographicCameras(device=self.device, R=R, T=T, scale_xyz=scale or ((1,1,1),), znear=0.1, min_x=-0.55, max_x=0.55, min_y=-0.55, max_y=0.55)
-	# Set all necessary internal data for rendering and texture baking
-	# Can be used to refresh after changing camera positions
-	def set_cameras_and_render_settings(self, camera_poses, centers=None, camera_distance=2.7, render_size=None, scale=None):
-		self.set_cameras(camera_poses, centers, camera_distance, scale=scale)
-		if render_size is None:
-			render_size = self.render_size
-		if not hasattr(self, "renderer"):
-			self.setup_renderer(size=render_size)
-		if not hasattr(self, "mesh_d"):
-			self.disconnect_faces()
-		if not hasattr(self, "mesh_uv"):
-			self.construct_uv_mesh()
-		self.calculate_tex_gradient()
-		self.calculate_visible_triangle_mask()
-		_,_,_,cos_maps,_, _ = self.render_geometry()
-		self.calculate_cos_angle_weights(cos_maps)
-	# Setup renderers for rendering
-	# max faces per bin set to 30000 to avoid overflow in many test cases.
-	# You can use default value to let pytorch3d handle that for you.
-	def setup_renderer(self, size=64, blur=0.0, face_per_pix=1, perspective_correct=False, channels=None):
-		if not channels:
-			channels = self.channels
-		self.raster_settings = RasterizationSettings(
-			image_size=size,
-			blur_radius=blur,
-			faces_per_pixel=face_per_pix,
-			perspective_correct=perspective_correct,
-			cull_backfaces=True,
-			max_faces_per_bin=30000,
-		)
-		self.renderer = MeshRenderer(
-			rasterizer=MeshRasterizer(
-				cameras=self.cameras,
-				raster_settings=self.raster_settings,
-			),
-			shader=HardNChannelFlatShader(
-				device=self.device,
-				cameras=self.cameras,
-				lights=self.lights,
-				channels=channels
-				# materials=materials
-			)
-		)
-	# Bake screen-space cosine weights to UV space
-	# May be able to reimplement using the generic "bake_texture" function, but it works so leave it here for now
-	@torch.enable_grad()
-	def calculate_cos_angle_weights(self, cos_angles, fill=True, channels=None):
-		if not channels:
-			channels = self.channels
-		cos_maps = []
-		tmp_mesh = self.mesh.clone()
-		for i in range(len(self.cameras)):
-			zero_map = torch.zeros(self.target_size+(channels,), device=self.device, requires_grad=True)
-			optimizer = torch.optim.SGD([zero_map], lr=1, momentum=0)
-			optimizer.zero_grad()
-			zero_tex = TexturesUV([zero_map], self.mesh.textures.faces_uvs_padded(), self.mesh.textures.verts_uvs_padded(), sampling_mode=self.sampling_mode)
-			tmp_mesh.textures = zero_tex
-			images_predicted = self.renderer(tmp_mesh, cameras=self.cameras[i], lights=self.lights)
-			loss = torch.sum((cos_angles[i,:,:,0:1]**1 - images_predicted)**2)
-			loss.backward()
-			optimizer.step()
-			if fill:
-				zero_map = zero_map.detach() / (self.gradient_maps[i] + 1E-8)
-				zero_map = voronoi_solve(zero_map, self.gradient_maps[i][...,0])
-			else:
-				zero_map = zero_map.detach() / (self.gradient_maps[i]+1E-8)
-			cos_maps.append(zero_map)
-		self.cos_maps = cos_maps
-	# Get geometric info from fragment shader
-	# Can be used for generating conditioning image and cosine weights
-	# Returns some information you may not need, remember to release them for memory saving
-	@torch.no_grad()
-	def render_geometry(self, image_size=None):
-		if image_size:
-			size = self.renderer.rasterizer.raster_settings.image_size
-			self.renderer.rasterizer.raster_settings.image_size = image_size
-		shader = self.renderer.shader
-		self.renderer.shader = HardGeometryShader(device=self.device, cameras=self.cameras[0], lights=self.lights)
-		tmp_mesh = self.mesh.clone()
-		verts, normals, depths, cos_angles, texels, fragments = self.renderer(tmp_mesh.extend(len(self.cameras)), cameras=self.cameras, lights=self.lights)
-		self.renderer.shader = shader
-		if image_size:
-			self.renderer.rasterizer.raster_settings.image_size = size
-		return verts, normals, depths, cos_angles, texels, fragments
-	# Project world normal to view space and normalize
-	@torch.no_grad()
-	def decode_view_normal(self, normals):
-		w2v_mat = self.cameras.get_full_projection_transform()
-		normals_view = torch.clone(normals)[:,:,:,0:3]
-		normals_view = normals_view.reshape(normals_view.shape[0], -1, 3)
-		normals_view = w2v_mat.transform_normals(normals_view)
-		normals_view = normals_view.reshape(normals.shape[0:3]+(3,))
-		normals_view[:,:,:,2] *= -1
-		normals = (normals_view[...,0:3]+1) * normals[...,3:] / 2 + torch.FloatTensor(((((0.5,0.5,1))))).to(self.device) * (1 - normals[...,3:])
-		# normals = torch.cat([normal for normal in normals], dim=1)
-		normals = normals.clamp(0, 1)
-		return normals
-	# Normalize absolute depth to inverse depth
-	@torch.no_grad()
-	def decode_normalized_depth(self, depths, batched_norm=False):
-		view_z, mask = depths.unbind(-1)
-		view_z = view_z * mask + 100 * (1-mask)
-		inv_z = 1 / view_z
-		inv_z_min = inv_z * mask + 100 * (1-mask)
-		if not batched_norm:
-			max_ = torch.max(inv_z, 1, keepdim=True)
-			max_ = torch.max(max_[0], 2, keepdim=True)[0]
-			min_ = torch.min(inv_z_min, 1, keepdim=True)
-			min_ = torch.min(min_[0], 2, keepdim=True)[0]
-		else:
-			max_ = torch.max(inv_z)
-			min_ = torch.min(inv_z_min)
-		inv_z = (inv_z - min_) / (max_ - min_)
-		inv_z = inv_z.clamp(0,1)
-		inv_z = inv_z[...,None].repeat(1,1,1,3)
-		return inv_z
-	# Multiple screen pixels could pass gradient to a same texel
-	# We can precalculate this gradient strength and use it to normalize gradients when we bake textures
-	@torch.enable_grad()
-	def calculate_tex_gradient(self, channels=None):
-		if not channels:
-			channels = self.channels
-		tmp_mesh = self.mesh.clone()
-		gradient_maps = []
-		for i in range(len(self.cameras)):
-			zero_map = torch.zeros(self.target_size+(channels,), device=self.device, requires_grad=True)
-			optimizer = torch.optim.SGD([zero_map], lr=1, momentum=0)
-			optimizer.zero_grad()
-			zero_tex = TexturesUV([zero_map], self.mesh.textures.faces_uvs_padded(), self.mesh.textures.verts_uvs_padded(), sampling_mode=self.sampling_mode)
-			tmp_mesh.textures = zero_tex
-			images_predicted = self.renderer(tmp_mesh, cameras=self.cameras[i], lights=self.lights)
-			loss = torch.sum((1 - images_predicted)**2)
-			loss.backward()
-			optimizer.step()
-			gradient_maps.append(zero_map.detach())
-		self.gradient_maps = gradient_maps
-	# Get the UV space masks of triangles visible in each view
-	# First get face ids from each view, then filter pixels on UV space to generate masks
-	@torch.no_grad()
-	def calculate_visible_triangle_mask(self, channels=None, image_size=(512,512)):
-		if not channels:
-			channels = self.channels
-		pix2face_list = []
-		for i in range(len(self.cameras)):
-			self.renderer.rasterizer.raster_settings.image_size=image_size
-			pix2face = self.renderer.rasterizer(self.mesh_d, cameras=self.cameras[i]).pix_to_face
-			self.renderer.rasterizer.raster_settings.image_size=self.render_size
-			pix2face_list.append(pix2face)
-		if not hasattr(self, "mesh_uv"):
-			self.construct_uv_mesh()
-		raster_settings = RasterizationSettings(
-			image_size=self.target_size,
-			blur_radius=0,
-			faces_per_pixel=1,
-			perspective_correct=False,
-			cull_backfaces=False,
-			max_faces_per_bin=30000,
-			)
-		R, T = look_at_view_transform(dist=2, elev=0, azim=0)
-		cameras = FoVOrthographicCameras(device=self.device, R=R, T=T)
-		rasterizer=MeshRasterizer(
-			cameras=cameras,
-			raster_settings=raster_settings
-		)
-		uv_pix2face = rasterizer(self.mesh_uv).pix_to_face
-		visible_triangles = []
-		for i in range(len(pix2face_list)):
-			valid_faceid = torch.unique(pix2face_list[i])
-			valid_faceid = valid_faceid[1:] if valid_faceid[0]==-1 else valid_faceid
-			mask = torch.isin(uv_pix2face[0], valid_faceid, assume_unique=False)
-			# uv_pix2face[0][~mask] = -1
-			triangle_mask = torch.ones(self.target_size+(1,), device=self.device)
-			triangle_mask[~mask] = 0
-			triangle_mask[:,1:][triangle_mask[:,:-1] > 0] = 1
-			triangle_mask[:,:-1][triangle_mask[:,1:] > 0] = 1
-			triangle_mask[1:,:][triangle_mask[:-1,:] > 0] = 1
-			triangle_mask[:-1,:][triangle_mask[1:,:] > 0] = 1
-			visible_triangles.append(triangle_mask)
-		self.visible_triangles = visible_triangles
-	# Render the current mesh and texture from current cameras
-	def render_textured_views(self):
-		meshes = self.mesh.extend(len(self.cameras))
-		images_predicted = self.renderer(meshes, cameras=self.cameras, lights=self.lights)
-		return [image.permute(2, 0, 1) for image in images_predicted]
-	# Bake views into a texture
-	# First bake into individual textures then combine based on cosine weight
-	@torch.enable_grad()
-	def bake_texture(self, views=None, main_views=[], cos_weighted=True, channels=None, exp=None, noisy=False, generator=None):
-		if not exp:
-			exp=1
-		if not channels:
-			channels = self.channels
-		views = [view.permute(1, 2, 0) for view in views]
-		tmp_mesh = self.mesh
-		bake_maps = [torch.zeros(self.target_size+(views[0].shape[2],), device=self.device, requires_grad=True) for view in views]
-		optimizer = torch.optim.SGD(bake_maps, lr=1, momentum=0)
-		optimizer.zero_grad()
-		loss = 0
-		for i in range(len(self.cameras)):
-			bake_tex = TexturesUV([bake_maps[i]], tmp_mesh.textures.faces_uvs_padded(), tmp_mesh.textures.verts_uvs_padded(), sampling_mode=self.sampling_mode)
-			tmp_mesh.textures = bake_tex
-			images_predicted = self.renderer(tmp_mesh, cameras=self.cameras[i], lights=self.lights, device=self.device)
-			predicted_rgb = images_predicted[..., :-1]
-			loss += (((predicted_rgb[...] - views[i]))**2).sum()
-		loss.backward(retain_graph=False)
-		optimizer.step()
-		total_weights = 0
-		baked = 0
-		for i in range(len(bake_maps)):
-			normalized_baked_map = bake_maps[i].detach() / (self.gradient_maps[i] + 1E-8)
-			bake_map = voronoi_solve(normalized_baked_map, self.gradient_maps[i][...,0])
-			weight = self.visible_triangles[i] * (self.cos_maps[i]) ** exp
-			if noisy:
-				noise = torch.rand(weight.shape[:-1]+(1,), generator=generator).type(weight.dtype).to(weight.device)
-				weight *= noise
-			total_weights += weight
-			baked += bake_map * weight
-		baked /= total_weights + 1E-8
-		baked = voronoi_solve(baked, total_weights[...,0])
-		bake_tex = TexturesUV([baked], tmp_mesh.textures.faces_uvs_padded(), tmp_mesh.textures.verts_uvs_padded(), sampling_mode=self.sampling_mode)
-		tmp_mesh.textures = bake_tex
-		extended_mesh = tmp_mesh.extend(len(self.cameras))
-		images_predicted = self.renderer(extended_mesh, cameras=self.cameras, lights=self.lights)
-		learned_views = [image.permute(2, 0, 1) for image in images_predicted]
-		return learned_views, baked.permute(2, 0, 1), total_weights.permute(2, 0, 1)
-	# Move the internel data to a specific device
-	def to(self, device):
-		for mesh_name in ["mesh", "mesh_d", "mesh_uv"]:
-			if hasattr(self, mesh_name):
-				mesh = getattr(self, mesh_name)
-				setattr(self, mesh_name, mesh.to(device))
-		for list_name in ["visible_triangles", "visibility_maps", "cos_maps"]:
-			if hasattr(self, list_name):
-				map_list = getattr(self, list_name)
-				for i in range(len(map_list)):
-					map_list[i] = map_list[i].to(device)

step1x3d_texture/texture_sync/shader.py DELETED Viewed

@@ -1,118 +0,0 @@
-from typing import Optional
-import torch
-import pytorch3d
-from pytorch3d.io import load_objs_as_meshes, load_obj, save_obj
-from pytorch3d.ops import interpolate_face_attributes
-from pytorch3d.structures import Meshes
-from pytorch3d.renderer import (
-	look_at_view_transform,
-	FoVPerspectiveCameras,
-	AmbientLights,
-	PointLights,
-	DirectionalLights,
-	Materials,
-	RasterizationSettings,
-	MeshRenderer,
-	MeshRasterizer,
-	SoftPhongShader,
-	SoftSilhouetteShader,
-	HardPhongShader,
-	TexturesVertex,
-	TexturesUV,
-	Materials,
-)
-from pytorch3d.renderer.blending import BlendParams,hard_rgb_blend
-from pytorch3d.renderer.utils import convert_to_tensors_and_broadcast, TensorProperties
-from pytorch3d.renderer.lighting import AmbientLights
-from pytorch3d.renderer.materials import Materials
-from pytorch3d.renderer.mesh.shader import ShaderBase
-from pytorch3d.renderer.mesh.shading import _apply_lighting, flat_shading
-from pytorch3d.renderer.mesh.rasterizer import Fragments
-'''
-	Customized the original pytorch3d hard flat shader to support N channel flat shading
-'''
-class HardNChannelFlatShader(ShaderBase):
-	"""
-	Per face lighting - the lighting model is applied using the average face
-	position and the face normal. The blending function hard assigns
-	the color of the closest face for each pixel.
-	To use the default values, simply initialize the shader with the desired
-	device e.g.
-	.. code-block::
-		shader = HardFlatShader(device=torch.device("cuda:0"))
-	"""
-	def __init__(
-		self,
-		device = "cpu",
-		cameras: Optional[TensorProperties] = None,
-		lights: Optional[TensorProperties] = None,
-		materials: Optional[Materials] = None,
-		blend_params: Optional[BlendParams] = None,
-		channels: int = 3,
-	):
-		self.channels = channels
-		ones = ((1.0,)*channels,)
-		zeros = ((0.0,)*channels,)
-		if not isinstance(lights, AmbientLights) or not lights.ambient_color.shape[-1] == channels:
-			lights = AmbientLights(
-				ambient_color=ones,
-				device=device,
-			)
-		if not materials or not materials.ambient_color.shape[-1] == channels:
-			materials = Materials(
-				device=device,
-				diffuse_color=zeros,
-				ambient_color=ones,
-				specular_color=zeros,
-				shininess=0.0,
-			)
-		blend_params_new = BlendParams(background_color=(1.0,)*channels)
-		if not isinstance(blend_params, BlendParams):
-			blend_params = blend_params_new
-		else:
-			background_color_ = blend_params.background_color
-			if isinstance(background_color_, Sequence[float]) and not len(background_color_) == channels:
-				blend_params = blend_params_new
-			if isinstance(background_color_, torch.Tensor) and not background_color_.shape[-1] == channels:
-				blend_params = blend_params_new
-		super().__init__(
-			device,
-			cameras,
-			lights,
-			materials,
-			blend_params,
-		)
-	def forward(self, fragments: Fragments, meshes: Meshes, **kwargs) -> torch.Tensor:
-		cameras = super()._get_cameras(**kwargs)
-		texels = meshes.sample_textures(fragments)
-		lights = kwargs.get("lights", self.lights)
-		materials = kwargs.get("materials", self.materials)
-		blend_params = kwargs.get("blend_params", self.blend_params)
-		colors = flat_shading(
-			meshes=meshes,
-			fragments=fragments,
-			texels=texels,
-			lights=lights,
-			cameras=cameras,
-			materials=materials,
-		)
-		images = hard_rgb_blend(colors, fragments, blend_params)
-		return images

step1x3d_texture/texture_sync/step_sync.py DELETED Viewed

@@ -1,125 +0,0 @@
-import torch
-from diffusers.utils.torch_utils import randn_tensor
-'''
-	Customized Step Function
-	step on texture
-'''
-@torch.no_grad()
-def step_tex_sync(
-		scheduler,
-		uvp,
-		model_output: torch.FloatTensor,
-		timestep: int,
-		sample: torch.FloatTensor,
-		texture: None,
-		generator=None,
-		return_dict: bool = True,
-		guidance_scale = 1,
-		main_views = [],
-		hires_original_views = True,
-		exp=None,
-		cos_weighted=True
-):
-	t = timestep
-	prev_t = scheduler.previous_timestep(t)
-	if model_output.shape[1] == sample.shape[1] * 2 and scheduler.variance_type in ["learned", "learned_range"]:
-		model_output, predicted_variance = torch.split(model_output, sample.shape[1], dim=1)
-	else:
-		predicted_variance = None
-	# 1. compute alphas, betas
-	alpha_prod_t = scheduler.alphas_cumprod[t]
-	alpha_prod_t_prev = scheduler.alphas_cumprod[prev_t] if prev_t >= 0 else scheduler.one
-	beta_prod_t = 1 - alpha_prod_t
-	beta_prod_t_prev = 1 - alpha_prod_t_prev
-	current_alpha_t = alpha_prod_t / alpha_prod_t_prev
-	current_beta_t = 1 - current_alpha_t
-	# 2. compute predicted original sample from predicted noise also called
-	# "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf
-	if scheduler.config.prediction_type == "epsilon":
-		pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
-	elif scheduler.config.prediction_type == "sample":
-		pred_original_sample = model_output
-	elif scheduler.config.prediction_type == "v_prediction":
-		pred_original_sample = (alpha_prod_t**0.5) * sample - (beta_prod_t**0.5) * model_output
-	else:
-		raise ValueError(
-			f"prediction_type given as {scheduler.config.prediction_type} must be one of `epsilon`, `sample` or"
-			" `v_prediction`  for the DDPMScheduler."
-		)
-	# 3. Clip or threshold "predicted x_0"
-	if scheduler.config.thresholding:
-		pred_original_sample = scheduler._threshold_sample(pred_original_sample)
-	elif scheduler.config.clip_sample:
-		pred_original_sample = pred_original_sample.clamp(
-			-scheduler.config.clip_sample_range, scheduler.config.clip_sample_range
-		)
-	# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
-	# See formula (7) from https://arxiv.org/pdf/2006.11239.pdf
-	pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
-	current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
-	'''
-		Add multidiffusion here
-	'''
-	if texture is None:
-		sample_views = [view for view in sample]
-		sample_views, texture, _ = uvp.bake_texture(views=sample_views, main_views=main_views, exp=exp)
-		sample_views = torch.stack(sample_views, axis=0)[:,:-1,...]
-	original_views = [view for view in pred_original_sample]
-	original_views, original_tex, visibility_weights = uvp.bake_texture(views=original_views, main_views=main_views, exp=exp)
-	uvp.set_texture_map(original_tex)
-	original_views = uvp.render_textured_views()
-	original_views = torch.stack(original_views, axis=0)[:,:-1,...]
-	# 5. Compute predicted previous sample µ_t
-	# See formula (7) from https://arxiv.org/pdf/2006.11239.pdf
-	# pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
-	prev_tex = pred_original_sample_coeff * original_tex + current_sample_coeff * texture
-	# 6. Add noise
-	variance = 0
-	if predicted_variance is not None:
-		variance_views = [view for view in predicted_variance]
-		variance_views, variance_tex, visibility_weights = uvp.bake_texture(views=variance_views, main_views=main_views, cos_weighted=cos_weighted, exp=exp)
-		variance_views = torch.stack(variance_views, axis=0)[:,:-1,...]
-	else:
-		variance_tex = None
-	if t > 0:
-		device = texture.device
-		variance_noise = randn_tensor(
-			texture.shape, generator=generator, device=device, dtype=texture.dtype
-		)
-		if scheduler.variance_type == "fixed_small_log":
-			variance = scheduler._get_variance(t, predicted_variance=variance_tex) * variance_noise
-		elif scheduler.variance_type == "learned_range":
-			variance = scheduler._get_variance(t, predicted_variance=variance_tex)
-			variance = torch.exp(0.5 * variance) * variance_noise
-		else:
-			variance = (scheduler._get_variance(t, predicted_variance=variance_tex) ** 0.5) * variance_noise
-	prev_tex = prev_tex + variance
-	uvp.set_texture_map(prev_tex)
-	prev_views = uvp.render_textured_views()
-	pred_prev_sample = torch.clone(sample)
-	for i, view in enumerate(prev_views):
-		pred_prev_sample[i] = view[:-1]
-	masks = [view[-1:] for view in prev_views]
-	return {"prev_sample": pred_prev_sample, "pred_original_sample":pred_original_sample, "prev_tex": prev_tex}
-	if not return_dict:
-		return pred_prev_sample, pred_original_sample
-	pass