Spaces:

xinjjj
/

ImgRoboAssetGen

Running on Zero

App Files Files Community

xinjie.wang commited on Apr 22

Commit

37e4080

1 Parent(s): f8d7009

update

Browse files

Files changed (3) hide show

asset3d_gen/data/backproject_v2.py +21 -15
asset3d_gen/models/delight_model.py +22 -27
asset3d_gen/models/sr_model.py +33 -36

asset3d_gen/data/backproject_v2.py CHANGED Viewed

@@ -221,32 +221,37 @@ class TextureBacker:
         mask_thresh: float = 0.5,
     ):
         camera = init_kal_camera(camera_params)
-        mv = camera.view_matrix()  # (n 4 4) world2cam
-        p = camera.intrinsics.projection_matrix()
-        # NOTE: add a negative sign at P[0, 2] as the y axis is flipped in `nvdiffrast` output.  # noqa
-        p[:, 1, 1] = -p[:, 1, 1]
-        renderer = DiffrastRender(
-            p_matrix=p,
-            mv_matrix=mv,
-            resolution_hw=camera_params.resolution_hw,
-            context=dr.RasterizeCudaContext(),
-            mask_thresh=mask_thresh,
-            grad_db=False,
-            device=camera_params.device,
-            antialias_mask=True,
-        )
         self.camera = camera
-        self.renderer = renderer
         self.view_weights = view_weights
         self.device = camera_params.device
         self.render_wh = render_wh
         self.texture_wh = texture_wh
         self.bake_angle_thresh = bake_angle_thresh
         self.bake_unreliable_kernel_size = int(
             (2 / 512) * max(self.render_wh[0], self.render_wh[1])
         )
     def load_mesh(self, mesh: trimesh.Trimesh) -> trimesh.Trimesh:
         mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
         self.scale, self.center = scale, center
@@ -458,6 +463,7 @@ class TextureBacker:
         mesh: trimesh.Trimesh,
         output_path: str,
     ) -> trimesh.Trimesh:
         mesh = self.load_mesh(mesh)
         texture_np, mask_np = self.cuda_forward(colors, mesh)

         mask_thresh: float = 0.5,
     ):
         camera = init_kal_camera(camera_params)
         self.camera = camera
+        self.camera_params = camera_params
+        self.renderer = None
         self.view_weights = view_weights
         self.device = camera_params.device
         self.render_wh = render_wh
         self.texture_wh = texture_wh
+        self.mask_thresh = mask_thresh
         self.bake_angle_thresh = bake_angle_thresh
         self.bake_unreliable_kernel_size = int(
             (2 / 512) * max(self.render_wh[0], self.render_wh[1])
         )
+    def _lazy_init_render(self, camera, camera_params, mask_thresh):
+        if self.renderer is None:
+            mv = camera.view_matrix()  # (n 4 4) world2cam
+            p = camera.intrinsics.projection_matrix()
+            # NOTE: add a negative sign at P[0, 2] as the y axis is flipped in `nvdiffrast` output.  # noqa
+            p[:, 1, 1] = -p[:, 1, 1]
+            self.renderer = DiffrastRender(
+                p_matrix=p,
+                mv_matrix=mv,
+                resolution_hw=camera_params.resolution_hw,
+                context=dr.RasterizeCudaContext(),
+                mask_thresh=mask_thresh,
+                grad_db=False,
+                device=camera_params.device,
+                antialias_mask=True,
+            )
     def load_mesh(self, mesh: trimesh.Trimesh) -> trimesh.Trimesh:
         mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
         self.scale, self.center = scale, center
         mesh: trimesh.Trimesh,
         output_path: str,
     ) -> trimesh.Trimesh:
+        self._lazy_init_render(self.camera, self.camera_params, self.mask_thresh)
         mesh = self.load_mesh(mesh)
         texture_np, mask_np = self.cuda_forward(colors, mesh)

asset3d_gen/models/delight_model.py CHANGED Viewed

@@ -28,7 +28,6 @@ class DelightingModel(object):
         device: str = "cuda",
         seed: int = 0,
     ) -> None:
-        self.model_path = model_path
         self.image_guide_scale = image_guide_scale
         self.text_guide_scale = text_guide_scale
         self.num_infer_step = num_infer_step
@@ -38,31 +37,28 @@ class DelightingModel(object):
         )
         self.seed = seed
         self.device = device
-        self.bg_remover = RembgRemover()
-        self.pipeline = None # lazy load model adapt to @spaces.GPU
-    def _lazy_init_pipeline(self):
-        if self.pipeline is None:
-            model_path = self.model_path
-            if model_path is None:
-                suffix = "hunyuan3d-delight-v2-0"
-                model_path = snapshot_download(
-                    repo_id="tencent/Hunyuan3D-2", allow_patterns=f"{suffix}/*"
-                )
-                model_path = os.path.join(model_path, suffix)
-            pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
-                model_path,
-                torch_dtype=torch.float16,
-                safety_checker=None,
-            )
-            pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
-                pipeline.scheduler.config
             )
-            pipeline.set_progress_bar_config(disable=True)
-            pipeline.to(self.device, torch.float16)
-            self.pipeline = pipeline
     def recenter_image(
         self, image: Image.Image, border_ratio: float = 0.2
@@ -113,15 +109,14 @@ class DelightingModel(object):
         preprocess: bool = False,
         target_wh: tuple[int, int] = None,
     ) -> Image.Image:
-        self._lazy_init_pipeline()
         if isinstance(image, str):
             image = Image.open(image)
         elif isinstance(image, np.ndarray):
             image = Image.fromarray(image)
         if preprocess:
-            image = self.bg_remover(image)
             image = self.recenter_image(image)
         if target_wh is not None:

         device: str = "cuda",
         seed: int = 0,
     ) -> None:
         self.image_guide_scale = image_guide_scale
         self.text_guide_scale = text_guide_scale
         self.num_infer_step = num_infer_step
         )
         self.seed = seed
         self.device = device
+        if model_path is None:
+            suffix = "hunyuan3d-delight-v2-0"
+            model_path = snapshot_download(
+                repo_id="tencent/Hunyuan3D-2", allow_patterns=f"{suffix}/*"
             )
+            model_path = os.path.join(model_path, suffix)
+        pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
+            model_path,
+            torch_dtype=torch.float16,
+            safety_checker=None,
+        )
+        pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
+            pipeline.scheduler.config
+        )
+        pipeline.set_progress_bar_config(disable=True)
+        pipeline.to(self.device, torch.float16)
+        # pipeline.enable_model_cpu_offload()
+        # pipeline.enable_xformers_memory_efficient_attention()
+        self.pipeline = pipeline
     def recenter_image(
         self, image: Image.Image, border_ratio: float = 0.2
         preprocess: bool = False,
         target_wh: tuple[int, int] = None,
     ) -> Image.Image:
         if isinstance(image, str):
             image = Image.open(image)
         elif isinstance(image, np.ndarray):
             image = Image.fromarray(image)
         if preprocess:
+            bg_remover = RembgRemover()
+            image = bg_remover(image)
             image = self.recenter_image(image)
         if target_wh is not None:

asset3d_gen/models/sr_model.py CHANGED Viewed

@@ -59,58 +59,55 @@ class ImageStableSR:
 class ImageRealESRGAN:
     def __init__(self, outscale: int, model_path: str = None) -> None:
-        # monkey patch to support torchvision>=0.16
         import torchvision
         from packaging import version
         if version.parse(torchvision.__version__) > version.parse("0.16"):
             import sys
             import types
             import torchvision.transforms.functional as TF
-            functional_tensor = types.ModuleType("torchvision.transforms.functional_tensor")
             functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
-            sys.modules["torchvision.transforms.functional_tensor"] = functional_tensor
         self.outscale = outscale
-        self.model_path = model_path
-        self.upsampler = None
-    def _lazy_init(self):
-        if self.upsampler is None:
-            from basicsr.archs.rrdbnet_arch import RRDBNet
-            from realesrgan import RealESRGANer
-            from huggingface_hub import snapshot_download
-            model = RRDBNet(
-                num_in_ch=3,
-                num_out_ch=3,
-                num_feat=64,
-                num_block=23,
-                num_grow_ch=32,
-                scale=4,
             )
-            model_path = self.model_path
-            if model_path is None:
-                suffix = "super_resolution"
-                model_path = snapshot_download(
-                    repo_id="xinjjj/RoboAssetGen", allow_patterns=f"{suffix}/*"
-                )
-                model_path = os.path.join(model_path, suffix, "RealESRGAN_x4plus.pth")
-            self.upsampler = RealESRGANer(
-                scale=4,
-                model_path=model_path,
-                model=model,
-                pre_pad=0,
-                half=True,
             )
     @spaces.GPU
     def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
-        self._lazy_init()
         if isinstance(image, Image.Image):
             image = np.array(image)

 class ImageRealESRGAN:
     def __init__(self, outscale: int, model_path: str = None) -> None:
+        # monkey_patch
         import torchvision
         from packaging import version
         if version.parse(torchvision.__version__) > version.parse("0.16"):
             import sys
             import types
             import torchvision.transforms.functional as TF
+            functional_tensor = types.ModuleType(
+                "torchvision.transforms.functional_tensor"
+            )
             functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
+            sys.modules["torchvision.transforms.functional_tensor"] = (
+                functional_tensor
+            )
+        from basicsr.archs.rrdbnet_arch import RRDBNet
+        from realesrgan import RealESRGANer
         self.outscale = outscale
+        model = RRDBNet(
+            num_in_ch=3,
+            num_out_ch=3,
+            num_feat=64,
+            num_block=23,
+            num_grow_ch=32,
+            scale=4,
+        )
+        if model_path is None:
+            suffix = "super_resolution"
+            model_path = snapshot_download(
+                repo_id="xinjjj/RoboAssetGen", allow_patterns=f"{suffix}/*"
             )
+            model_path = os.path.join(
+                model_path, suffix, "RealESRGAN_x4plus.pth"
             )
+        self.upsampler = RealESRGANer(
+            scale=4,
+            model_path=model_path,
+            model=model,
+            pre_pad=0,
+            half=True,
+        )
     @spaces.GPU
     def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
         if isinstance(image, Image.Image):
             image = np.array(image)