Spaces:
Running
Running
File size: 3,039 Bytes
7088d16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from itertools import product
import torch
from fvcore.common.benchmark import benchmark
from pytorch3d.renderer.points.rasterize_points import (
rasterize_points,
rasterize_points_python,
)
from pytorch3d.structures.pointclouds import Pointclouds
def _bm_python_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3):
torch.manual_seed(231)
points = torch.randn(N, P, 3)
pointclouds = Pointclouds(points=points)
args = (pointclouds, img_size, radius, pts_per_pxl)
return lambda: rasterize_points_python(*args)
def _bm_rasterize_points_with_init(
N, P, img_size=32, radius=0.1, pts_per_pxl=3, device="cpu", expand_radius=False
):
torch.manual_seed(231)
device = torch.device(device)
points = torch.randn(N, P, 3, device=device)
pointclouds = Pointclouds(points=points)
if expand_radius:
points_padded = pointclouds.points_padded()
radius = torch.full((N, P), fill_value=radius).type_as(points_padded)
args = (pointclouds, img_size, radius, pts_per_pxl)
if device == "cuda":
torch.cuda.synchronize(device)
def fn():
rasterize_points(*args)
if device == "cuda":
torch.cuda.synchronize(device)
return fn
def bm_python_vs_cpu_vs_cuda() -> None:
kwargs_list = []
num_meshes = [1]
num_points = [10000, 2000]
image_size = [128, 256]
radius = [1e-3, 0.01]
pts_per_pxl = [50, 100]
expand = [True, False]
test_cases = product(
num_meshes, num_points, image_size, radius, pts_per_pxl, expand
)
for case in test_cases:
n, p, im, r, pts, e = case
kwargs_list.append(
{
"N": n,
"P": p,
"img_size": im,
"radius": r,
"pts_per_pxl": pts,
"device": "cpu",
"expand_radius": e,
}
)
benchmark(
_bm_rasterize_points_with_init, "RASTERIZE_CPU", kwargs_list, warmup_iters=1
)
kwargs_list += [
{"N": 32, "P": 100000, "img_size": 128, "radius": 0.01, "pts_per_pxl": 50},
{"N": 8, "P": 200000, "img_size": 512, "radius": 0.01, "pts_per_pxl": 50},
{"N": 8, "P": 200000, "img_size": 256, "radius": 0.01, "pts_per_pxl": 50},
{
"N": 8,
"P": 200000,
"img_size": (512, 256),
"radius": 0.01,
"pts_per_pxl": 50,
},
{
"N": 8,
"P": 200000,
"img_size": (256, 512),
"radius": 0.01,
"pts_per_pxl": 50,
},
]
for k in kwargs_list:
k["device"] = "cuda"
benchmark(
_bm_rasterize_points_with_init, "RASTERIZE_CUDA", kwargs_list, warmup_iters=1
)
if __name__ == "__main__":
bm_python_vs_cpu_vs_cuda()
|