Spaces:
Running
on
Zero
Running
on
Zero
xinjie.wang
commited on
Commit
·
37e4080
1
Parent(s):
f8d7009
update
Browse files- asset3d_gen/data/backproject_v2.py +21 -15
- asset3d_gen/models/delight_model.py +22 -27
- asset3d_gen/models/sr_model.py +33 -36
asset3d_gen/data/backproject_v2.py
CHANGED
@@ -221,32 +221,37 @@ class TextureBacker:
|
|
221 |
mask_thresh: float = 0.5,
|
222 |
):
|
223 |
camera = init_kal_camera(camera_params)
|
224 |
-
mv = camera.view_matrix() # (n 4 4) world2cam
|
225 |
-
p = camera.intrinsics.projection_matrix()
|
226 |
-
# NOTE: add a negative sign at P[0, 2] as the y axis is flipped in `nvdiffrast` output. # noqa
|
227 |
-
p[:, 1, 1] = -p[:, 1, 1]
|
228 |
-
renderer = DiffrastRender(
|
229 |
-
p_matrix=p,
|
230 |
-
mv_matrix=mv,
|
231 |
-
resolution_hw=camera_params.resolution_hw,
|
232 |
-
context=dr.RasterizeCudaContext(),
|
233 |
-
mask_thresh=mask_thresh,
|
234 |
-
grad_db=False,
|
235 |
-
device=camera_params.device,
|
236 |
-
antialias_mask=True,
|
237 |
-
)
|
238 |
self.camera = camera
|
239 |
-
self.
|
|
|
240 |
self.view_weights = view_weights
|
241 |
self.device = camera_params.device
|
242 |
self.render_wh = render_wh
|
243 |
self.texture_wh = texture_wh
|
|
|
244 |
|
245 |
self.bake_angle_thresh = bake_angle_thresh
|
246 |
self.bake_unreliable_kernel_size = int(
|
247 |
(2 / 512) * max(self.render_wh[0], self.render_wh[1])
|
248 |
)
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
def load_mesh(self, mesh: trimesh.Trimesh) -> trimesh.Trimesh:
|
251 |
mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
|
252 |
self.scale, self.center = scale, center
|
@@ -458,6 +463,7 @@ class TextureBacker:
|
|
458 |
mesh: trimesh.Trimesh,
|
459 |
output_path: str,
|
460 |
) -> trimesh.Trimesh:
|
|
|
461 |
mesh = self.load_mesh(mesh)
|
462 |
texture_np, mask_np = self.cuda_forward(colors, mesh)
|
463 |
|
|
|
221 |
mask_thresh: float = 0.5,
|
222 |
):
|
223 |
camera = init_kal_camera(camera_params)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
self.camera = camera
|
225 |
+
self.camera_params = camera_params
|
226 |
+
self.renderer = None
|
227 |
self.view_weights = view_weights
|
228 |
self.device = camera_params.device
|
229 |
self.render_wh = render_wh
|
230 |
self.texture_wh = texture_wh
|
231 |
+
self.mask_thresh = mask_thresh
|
232 |
|
233 |
self.bake_angle_thresh = bake_angle_thresh
|
234 |
self.bake_unreliable_kernel_size = int(
|
235 |
(2 / 512) * max(self.render_wh[0], self.render_wh[1])
|
236 |
)
|
237 |
|
238 |
+
def _lazy_init_render(self, camera, camera_params, mask_thresh):
|
239 |
+
if self.renderer is None:
|
240 |
+
mv = camera.view_matrix() # (n 4 4) world2cam
|
241 |
+
p = camera.intrinsics.projection_matrix()
|
242 |
+
# NOTE: add a negative sign at P[0, 2] as the y axis is flipped in `nvdiffrast` output. # noqa
|
243 |
+
p[:, 1, 1] = -p[:, 1, 1]
|
244 |
+
self.renderer = DiffrastRender(
|
245 |
+
p_matrix=p,
|
246 |
+
mv_matrix=mv,
|
247 |
+
resolution_hw=camera_params.resolution_hw,
|
248 |
+
context=dr.RasterizeCudaContext(),
|
249 |
+
mask_thresh=mask_thresh,
|
250 |
+
grad_db=False,
|
251 |
+
device=camera_params.device,
|
252 |
+
antialias_mask=True,
|
253 |
+
)
|
254 |
+
|
255 |
def load_mesh(self, mesh: trimesh.Trimesh) -> trimesh.Trimesh:
|
256 |
mesh.vertices, scale, center = normalize_vertices_array(mesh.vertices)
|
257 |
self.scale, self.center = scale, center
|
|
|
463 |
mesh: trimesh.Trimesh,
|
464 |
output_path: str,
|
465 |
) -> trimesh.Trimesh:
|
466 |
+
self._lazy_init_render(self.camera, self.camera_params, self.mask_thresh)
|
467 |
mesh = self.load_mesh(mesh)
|
468 |
texture_np, mask_np = self.cuda_forward(colors, mesh)
|
469 |
|
asset3d_gen/models/delight_model.py
CHANGED
@@ -28,7 +28,6 @@ class DelightingModel(object):
|
|
28 |
device: str = "cuda",
|
29 |
seed: int = 0,
|
30 |
) -> None:
|
31 |
-
self.model_path = model_path
|
32 |
self.image_guide_scale = image_guide_scale
|
33 |
self.text_guide_scale = text_guide_scale
|
34 |
self.num_infer_step = num_infer_step
|
@@ -38,31 +37,28 @@ class DelightingModel(object):
|
|
38 |
)
|
39 |
self.seed = seed
|
40 |
self.device = device
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
model_path = self.model_path
|
47 |
-
if model_path is None:
|
48 |
-
suffix = "hunyuan3d-delight-v2-0"
|
49 |
-
model_path = snapshot_download(
|
50 |
-
repo_id="tencent/Hunyuan3D-2", allow_patterns=f"{suffix}/*"
|
51 |
-
)
|
52 |
-
model_path = os.path.join(model_path, suffix)
|
53 |
-
|
54 |
-
pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
|
55 |
-
model_path,
|
56 |
-
torch_dtype=torch.float16,
|
57 |
-
safety_checker=None,
|
58 |
-
)
|
59 |
-
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
60 |
-
pipeline.scheduler.config
|
61 |
)
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
-
|
65 |
-
|
|
|
|
|
66 |
|
67 |
def recenter_image(
|
68 |
self, image: Image.Image, border_ratio: float = 0.2
|
@@ -113,15 +109,14 @@ class DelightingModel(object):
|
|
113 |
preprocess: bool = False,
|
114 |
target_wh: tuple[int, int] = None,
|
115 |
) -> Image.Image:
|
116 |
-
self._lazy_init_pipeline()
|
117 |
-
|
118 |
if isinstance(image, str):
|
119 |
image = Image.open(image)
|
120 |
elif isinstance(image, np.ndarray):
|
121 |
image = Image.fromarray(image)
|
122 |
|
123 |
if preprocess:
|
124 |
-
|
|
|
125 |
image = self.recenter_image(image)
|
126 |
|
127 |
if target_wh is not None:
|
|
|
28 |
device: str = "cuda",
|
29 |
seed: int = 0,
|
30 |
) -> None:
|
|
|
31 |
self.image_guide_scale = image_guide_scale
|
32 |
self.text_guide_scale = text_guide_scale
|
33 |
self.num_infer_step = num_infer_step
|
|
|
37 |
)
|
38 |
self.seed = seed
|
39 |
self.device = device
|
40 |
+
|
41 |
+
if model_path is None:
|
42 |
+
suffix = "hunyuan3d-delight-v2-0"
|
43 |
+
model_path = snapshot_download(
|
44 |
+
repo_id="tencent/Hunyuan3D-2", allow_patterns=f"{suffix}/*"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
)
|
46 |
+
model_path = os.path.join(model_path, suffix)
|
47 |
+
|
48 |
+
pipeline = StableDiffusionInstructPix2PixPipeline.from_pretrained(
|
49 |
+
model_path,
|
50 |
+
torch_dtype=torch.float16,
|
51 |
+
safety_checker=None,
|
52 |
+
)
|
53 |
+
pipeline.scheduler = EulerAncestralDiscreteScheduler.from_config(
|
54 |
+
pipeline.scheduler.config
|
55 |
+
)
|
56 |
+
pipeline.set_progress_bar_config(disable=True)
|
57 |
|
58 |
+
pipeline.to(self.device, torch.float16)
|
59 |
+
# pipeline.enable_model_cpu_offload()
|
60 |
+
# pipeline.enable_xformers_memory_efficient_attention()
|
61 |
+
self.pipeline = pipeline
|
62 |
|
63 |
def recenter_image(
|
64 |
self, image: Image.Image, border_ratio: float = 0.2
|
|
|
109 |
preprocess: bool = False,
|
110 |
target_wh: tuple[int, int] = None,
|
111 |
) -> Image.Image:
|
|
|
|
|
112 |
if isinstance(image, str):
|
113 |
image = Image.open(image)
|
114 |
elif isinstance(image, np.ndarray):
|
115 |
image = Image.fromarray(image)
|
116 |
|
117 |
if preprocess:
|
118 |
+
bg_remover = RembgRemover()
|
119 |
+
image = bg_remover(image)
|
120 |
image = self.recenter_image(image)
|
121 |
|
122 |
if target_wh is not None:
|
asset3d_gen/models/sr_model.py
CHANGED
@@ -59,58 +59,55 @@ class ImageStableSR:
|
|
59 |
|
60 |
class ImageRealESRGAN:
|
61 |
def __init__(self, outscale: int, model_path: str = None) -> None:
|
62 |
-
#
|
63 |
import torchvision
|
64 |
from packaging import version
|
65 |
|
66 |
if version.parse(torchvision.__version__) > version.parse("0.16"):
|
67 |
import sys
|
68 |
import types
|
|
|
69 |
import torchvision.transforms.functional as TF
|
70 |
|
71 |
-
functional_tensor = types.ModuleType(
|
|
|
|
|
72 |
functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
|
73 |
-
sys.modules["torchvision.transforms.functional_tensor"] =
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
self.outscale = outscale
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
num_feat=64,
|
89 |
-
num_block=23,
|
90 |
-
num_grow_ch=32,
|
91 |
-
scale=4,
|
92 |
)
|
93 |
-
|
94 |
-
|
95 |
-
if model_path is None:
|
96 |
-
suffix = "super_resolution"
|
97 |
-
model_path = snapshot_download(
|
98 |
-
repo_id="xinjjj/RoboAssetGen", allow_patterns=f"{suffix}/*"
|
99 |
-
)
|
100 |
-
model_path = os.path.join(model_path, suffix, "RealESRGAN_x4plus.pth")
|
101 |
-
|
102 |
-
self.upsampler = RealESRGANer(
|
103 |
-
scale=4,
|
104 |
-
model_path=model_path,
|
105 |
-
model=model,
|
106 |
-
pre_pad=0,
|
107 |
-
half=True,
|
108 |
)
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
@spaces.GPU
|
111 |
def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
|
112 |
-
self._lazy_init()
|
113 |
-
|
114 |
if isinstance(image, Image.Image):
|
115 |
image = np.array(image)
|
116 |
|
|
|
59 |
|
60 |
class ImageRealESRGAN:
|
61 |
def __init__(self, outscale: int, model_path: str = None) -> None:
|
62 |
+
# monkey_patch
|
63 |
import torchvision
|
64 |
from packaging import version
|
65 |
|
66 |
if version.parse(torchvision.__version__) > version.parse("0.16"):
|
67 |
import sys
|
68 |
import types
|
69 |
+
|
70 |
import torchvision.transforms.functional as TF
|
71 |
|
72 |
+
functional_tensor = types.ModuleType(
|
73 |
+
"torchvision.transforms.functional_tensor"
|
74 |
+
)
|
75 |
functional_tensor.rgb_to_grayscale = TF.rgb_to_grayscale
|
76 |
+
sys.modules["torchvision.transforms.functional_tensor"] = (
|
77 |
+
functional_tensor
|
78 |
+
)
|
79 |
+
|
80 |
+
from basicsr.archs.rrdbnet_arch import RRDBNet
|
81 |
+
from realesrgan import RealESRGANer
|
82 |
|
83 |
self.outscale = outscale
|
84 |
+
model = RRDBNet(
|
85 |
+
num_in_ch=3,
|
86 |
+
num_out_ch=3,
|
87 |
+
num_feat=64,
|
88 |
+
num_block=23,
|
89 |
+
num_grow_ch=32,
|
90 |
+
scale=4,
|
91 |
+
)
|
92 |
+
if model_path is None:
|
93 |
+
suffix = "super_resolution"
|
94 |
+
model_path = snapshot_download(
|
95 |
+
repo_id="xinjjj/RoboAssetGen", allow_patterns=f"{suffix}/*"
|
|
|
|
|
|
|
|
|
96 |
)
|
97 |
+
model_path = os.path.join(
|
98 |
+
model_path, suffix, "RealESRGAN_x4plus.pth"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
)
|
100 |
|
101 |
+
self.upsampler = RealESRGANer(
|
102 |
+
scale=4,
|
103 |
+
model_path=model_path,
|
104 |
+
model=model,
|
105 |
+
pre_pad=0,
|
106 |
+
half=True,
|
107 |
+
)
|
108 |
+
|
109 |
@spaces.GPU
|
110 |
def __call__(self, image: Union[Image.Image, np.ndarray]) -> Image.Image:
|
|
|
|
|
111 |
if isinstance(image, Image.Image):
|
112 |
image = np.array(image)
|
113 |
|