xinjie.wang commited on
Commit
37e4080
·
1 Parent(s): f8d7009
asset3d_gen/data/backproject_v2.py CHANGED
@@ -221,32 +221,37 @@ class TextureBacker:
221
  mask_thresh: float = 0.5,
222
  ):
223
  camera = init_kal_camera(camera_params)
224
- mv = camera.view_matrix() # (n 4 4) world2cam
225
- p = camera.intrinsics.projection_matrix()
226
- # NOTE: add a negative sign at P[0, 2] as the y axis is flipped in `nvdiffrast` output. # noqa
227
- p[:, 1, 1] = -p[:, 1, 1]
228
- renderer = DiffrastRender(
229
- p_matrix=p,
230
- mv_matrix=mv,
231
- resolution_hw=camera_params.resolution_hw,
232
- context=dr.RasterizeCudaContext(),
233
- mask_thresh=mask_thresh,
234
- grad_db=False,
235
- device=camera_params.device,
236
- antialias_mask=True,
237
- )
238
  self.camera = camera
239
- self.renderer = renderer
 
240
  self.view_weights = view_weights
241
  self.device = camera_params.device
242
  self.render_wh = render_wh
243
  self.texture_wh = texture_wh
 
244
 
245
  self.bake_angle_thresh = bake_angle_thresh
246
  self.bake_unreliable_kernel_size = int(
247
  (2 / 512) * max(self.render_wh[0], self.render_wh[1])
248
  )
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  def load_mesh(self, mesh: trimesh.Trimesh) -> trimesh.Trimesh:
251
  mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
252
  self.scale, self.center = scale, center
@@ -458,6 +463,7 @@ class TextureBacker:
458
  mesh: trimesh.Trimesh,
459
  output_path: str,
460
  ) -> trimesh.Trimesh:
 
461
  mesh = self.load_mesh(mesh)
462
  texture_np, mask_np = self.cuda_forward(colors, mesh)
463
 
 
221
  mask_thresh: float = 0.5,
222
  ):
223
  camera = init_kal_camera(camera_params)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  self.camera = camera
225
+ self.camera_params = camera_params
226
+ self.renderer = None
227
  self.view_weights = view_weights
228
  self.device = camera_params.device
229
  self.render_wh = render_wh
230
  self.texture_wh = texture_wh
231
+ self.mask_thresh = mask_thresh
232
 
233
  self.bake_angle_thresh = bake_angle_thresh
234
  self.bake_unreliable_kernel_size = int(
235
  (2 / 512) * max(self.render_wh[0], self.render_wh[1])
236
  )
237
 
238
+ def _lazy_init_render(self, camera, camera_params, mask_thresh):
239
+ if self.renderer is None:
240
+ mv = camera.view_matrix() # (n 4 4) world2cam
241
+ p = camera.intrinsics.projection_matrix()
242
+ # NOTE: add a negative sign at P[0, 2] as the y axis is flipped in `nvdiffrast` output. # noqa
243
+ p[:, 1, 1] = -p[:, 1, 1]
244
+ self.renderer = DiffrastRender(
245
+ p_matrix=p,
246
+ mv_matrix=mv,
247
+ resolution_hw=camera_params.resolution_hw,
248
+ context=dr.RasterizeCudaContext(),
249
+ mask_thresh=mask_thresh,
250
+ grad_db=False,
251
+ device=camera_params.device,
252
+ antialias_mask=True,
253
+ )
254
+
255
  def load_mesh(self, mesh: trimesh.Trimesh) -> trimesh.Trimesh:
256
  mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
257
  self.scale, self.center = scale, center
 
463
  mesh: trimesh.Trimesh,
464
  output_path: str,
465
  ) -> trimesh.Trimesh:
466
+ self._lazy_init_render(self.camera, self.camera_params, self.mask_thresh)
467
  mesh = self.load_mesh(mesh)
468
  texture_np, mask_np = self.cuda_forward(colors, mesh)
469
 
asset3d_gen/models/delight_model.py CHANGED
@@ -28,7 +28,6 @@ class DelightingModel(object):
28
  device: str = "cuda",
29
  seed: int = 0,
30
  ) -> None:
31
- self.model_path = model_path
32
  self.image_guide_scale = image_guide_scale
33
  self.text_guide_scale = text_guide_scale
34
  self.num_infer_step = num_infer_step
@@ -38,31 +37,28 @@ class DelightingModel(object):
38
  )
39
  self.seed = seed
40
  self.device = device
41
- self.bg_remover = RembgRemover()
42
- self.pipeline = None # lazy load model adapt to @spaces.GPU
43
-
44
- def _lazy_init_pipeline(self):
45
- if self.pipeline is None:
46
- model_path = self.model_path
47
- if model_path is None:
48
- suffix = "hunyuan3d-delight-v2-0"
49
- model_path = snapshot_download(
50
- repo_id="tencent/Hunyuan3D-2", allow_patterns=f"{suffix}/*"
51
- )
52
- model_path = os.path.join(model_path, suffix)
53
-
54
- pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
55
- model_path,
56
- torch_dtype=torch.float16,
57
- safety_checker=None,
58
- )
59
- pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
60
- pipeline.scheduler.config
61
  )
62
- pipeline.set_progress_bar_config(disable=True)
 
 
 
 
 
 
 
 
 
 
63
 
64
- pipeline.to(self.device, torch.float16)
65
- self.pipeline = pipeline
 
 
66
 
67
  def recenter_image(
68
  self, image: Image.Image, border_ratio: float = 0.2
@@ -113,15 +109,14 @@ class DelightingModel(object):
113
  preprocess: bool = False,
114
  target_wh: tuple[int, int] = None,
115
  ) -> Image.Image:
116
- self._lazy_init_pipeline()
117
-
118
  if isinstance(image, str):
119
  image = Image.open(image)
120
  elif isinstance(image, np.ndarray):
121
  image = Image.fromarray(image)
122
 
123
  if preprocess:
124
- image = self.bg_remover(image)
 
125
  image = self.recenter_image(image)
126
 
127
  if target_wh is not None:
 
28
  device: str = "cuda",
29
  seed: int = 0,
30
  ) -> None:
 
31
  self.image_guide_scale = image_guide_scale
32
  self.text_guide_scale = text_guide_scale
33
  self.num_infer_step = num_infer_step
 
37
  )
38
  self.seed = seed
39
  self.device = device
40
+
41
+ if model_path is None:
42
+ suffix = "hunyuan3d-delight-v2-0"
43
+ model_path = snapshot_download(
44
+ repo_id="tencent/Hunyuan3D-2", allow_patterns=f"{suffix}/*"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  )
46
+ model_path = os.path.join(model_path, suffix)
47
+
48
+ pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
49
+ model_path,
50
+ torch_dtype=torch.float16,
51
+ safety_checker=None,
52
+ )
53
+ pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
54
+ pipeline.scheduler.config
55
+ )
56
+ pipeline.set_progress_bar_config(disable=True)
57
 
58
+ pipeline.to(self.device, torch.float16)
59
+ # pipeline.enable_model_cpu_offload()
60
+ # pipeline.enable_xformers_memory_efficient_attention()
61
+ self.pipeline = pipeline
62
 
63
  def recenter_image(
64
  self, image: Image.Image, border_ratio: float = 0.2
 
109
  preprocess: bool = False,
110
  target_wh: tuple[int, int] = None,
111
  ) -> Image.Image:
 
 
112
  if isinstance(image, str):
113
  image = Image.open(image)
114
  elif isinstance(image, np.ndarray):
115
  image = Image.fromarray(image)
116
 
117
  if preprocess:
118
+ bg_remover = RembgRemover()
119
+ image = bg_remover(image)
120
  image = self.recenter_image(image)
121
 
122
  if target_wh is not None:
asset3d_gen/models/sr_model.py CHANGED
@@ -59,58 +59,55 @@ class ImageStableSR:
59
 
60
  class ImageRealESRGAN:
61
  def __init__(self, outscale: int, model_path: str = None) -> None:
62
- # monkey patch to support torchvision>=0.16
63
  import torchvision
64
  from packaging import version
65
 
66
  if version.parse(torchvision.__version__) > version.parse("0.16"):
67
  import sys
68
  import types
 
69
  import torchvision.transforms.functional as TF
70
 
71
- functional_tensor = types.ModuleType("torchvision.transforms.functional_tensor")
 
 
72
  functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
73
- sys.modules["torchvision.transforms.functional_tensor"] = functional_tensor
 
 
 
 
 
74
 
75
  self.outscale = outscale
76
- self.model_path = model_path
77
- self.upsampler = None
78
-
79
- def _lazy_init(self):
80
- if self.upsampler is None:
81
- from basicsr.archs.rrdbnet_arch import RRDBNet
82
- from realesrgan import RealESRGANer
83
- from huggingface_hub import snapshot_download
84
-
85
- model = RRDBNet(
86
- num_in_ch=3,
87
- num_out_ch=3,
88
- num_feat=64,
89
- num_block=23,
90
- num_grow_ch=32,
91
- scale=4,
92
  )
93
-
94
- model_path = self.model_path
95
- if model_path is None:
96
- suffix = "super_resolution"
97
- model_path = snapshot_download(
98
- repo_id="xinjjj/RoboAssetGen", allow_patterns=f"{suffix}/*"
99
- )
100
- model_path = os.path.join(model_path, suffix, "RealESRGAN_x4plus.pth")
101
-
102
- self.upsampler = RealESRGANer(
103
- scale=4,
104
- model_path=model_path,
105
- model=model,
106
- pre_pad=0,
107
- half=True,
108
  )
109
 
 
 
 
 
 
 
 
 
110
  @spaces.GPU
111
  def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
112
- self._lazy_init()
113
-
114
  if isinstance(image, Image.Image):
115
  image = np.array(image)
116
 
 
59
 
60
  class ImageRealESRGAN:
61
  def __init__(self, outscale: int, model_path: str = None) -> None:
62
+ # monkey_patch
63
  import torchvision
64
  from packaging import version
65
 
66
  if version.parse(torchvision.__version__) > version.parse("0.16"):
67
  import sys
68
  import types
69
+
70
  import torchvision.transforms.functional as TF
71
 
72
+ functional_tensor = types.ModuleType(
73
+ "torchvision.transforms.functional_tensor"
74
+ )
75
  functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
76
+ sys.modules["torchvision.transforms.functional_tensor"] = (
77
+ functional_tensor
78
+ )
79
+
80
+ from basicsr.archs.rrdbnet_arch import RRDBNet
81
+ from realesrgan import RealESRGANer
82
 
83
  self.outscale = outscale
84
+ model = RRDBNet(
85
+ num_in_ch=3,
86
+ num_out_ch=3,
87
+ num_feat=64,
88
+ num_block=23,
89
+ num_grow_ch=32,
90
+ scale=4,
91
+ )
92
+ if model_path is None:
93
+ suffix = "super_resolution"
94
+ model_path = snapshot_download(
95
+ repo_id="xinjjj/RoboAssetGen", allow_patterns=f"{suffix}/*"
 
 
 
 
96
  )
97
+ model_path = os.path.join(
98
+ model_path, suffix, "RealESRGAN_x4plus.pth"
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  )
100
 
101
+ self.upsampler = RealESRGANer(
102
+ scale=4,
103
+ model_path=model_path,
104
+ model=model,
105
+ pre_pad=0,
106
+ half=True,
107
+ )
108
+
109
  @spaces.GPU
110
  def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
 
 
111
  if isinstance(image, Image.Image):
112
  image = np.array(image)
113