Spaces:
Running
Running
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the BSD-style license found in the | |
# LICENSE file in the root directory of this source tree. | |
import contextlib | |
import gzip | |
import os | |
import unittest | |
from typing import List | |
import numpy as np | |
import torch | |
from pytorch3d.implicitron.dataset import types | |
from pytorch3d.implicitron.dataset.dataset_base import FrameData | |
from pytorch3d.implicitron.dataset.frame_data import FrameDataBuilder | |
from pytorch3d.implicitron.dataset.utils import ( | |
get_bbox_from_mask, | |
load_16big_png_depth, | |
load_1bit_png_mask, | |
load_depth, | |
load_depth_mask, | |
load_image, | |
load_mask, | |
safe_as_tensor, | |
transpose_normalize_image, | |
) | |
from pytorch3d.implicitron.tools.config import get_default_args | |
from pytorch3d.renderer.cameras import PerspectiveCameras | |
from tests.common_testing import TestCaseMixin | |
from tests.implicitron.common_resources import get_skateboard_data | |
class TestFrameDataBuilder(TestCaseMixin, unittest.TestCase): | |
def setUp(self): | |
torch.manual_seed(42) | |
category = "skateboard" | |
stack = contextlib.ExitStack() | |
self.dataset_root, self.path_manager = stack.enter_context( | |
get_skateboard_data() | |
) | |
self.addCleanup(stack.close) | |
self.image_height = 768 | |
self.image_width = 512 | |
self.frame_data_builder = FrameDataBuilder( | |
image_height=self.image_height, | |
image_width=self.image_width, | |
dataset_root=self.dataset_root, | |
path_manager=self.path_manager, | |
) | |
# loading single frame annotation of dataset (see JsonIndexDataset._load_frames()) | |
frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") | |
local_file = self.path_manager.get_local_path(frame_file) | |
with gzip.open(local_file, "rt", encoding="utf8") as zipfile: | |
frame_annots_list = types.load_dataclass( | |
zipfile, List[types.FrameAnnotation] | |
) | |
self.frame_annotation = frame_annots_list[0] | |
sequence_annotations_file = os.path.join( | |
self.dataset_root, category, "sequence_annotations.jgz" | |
) | |
local_file = self.path_manager.get_local_path(sequence_annotations_file) | |
with gzip.open(local_file, "rt", encoding="utf8") as zipfile: | |
seq_annots_list = types.load_dataclass( | |
zipfile, List[types.SequenceAnnotation] | |
) | |
seq_annots = {entry.sequence_name: entry for entry in seq_annots_list} | |
self.seq_annotation = seq_annots[self.frame_annotation.sequence_name] | |
point_cloud = self.seq_annotation.point_cloud | |
self.frame_data = FrameData( | |
frame_number=safe_as_tensor(self.frame_annotation.frame_number, torch.long), | |
frame_timestamp=safe_as_tensor( | |
self.frame_annotation.frame_timestamp, torch.float | |
), | |
sequence_name=self.frame_annotation.sequence_name, | |
sequence_category=self.seq_annotation.category, | |
camera_quality_score=safe_as_tensor( | |
self.seq_annotation.viewpoint_quality_score, torch.float | |
), | |
point_cloud_quality_score=( | |
safe_as_tensor(point_cloud.quality_score, torch.float) | |
if point_cloud is not None | |
else None | |
), | |
) | |
def test_frame_data_builder_args(self): | |
# test that FrameDataBuilder works with get_default_args | |
get_default_args(FrameDataBuilder) | |
def test_fix_point_cloud_path(self): | |
"""Some files in Co3Dv2 have an accidental absolute path stored.""" | |
original_path = "some_file_path" | |
modified_path = self.frame_data_builder._fix_point_cloud_path(original_path) | |
self.assertIn(original_path, modified_path) | |
self.assertIn(self.frame_data_builder.dataset_root, modified_path) | |
def test_load_and_adjust_frame_data(self): | |
self.frame_data.image_size_hw = safe_as_tensor( | |
self.frame_annotation.image.size, torch.long | |
) | |
self.frame_data.effective_image_size_hw = self.frame_data.image_size_hw | |
fg_mask_np, mask_path = self.frame_data_builder._load_fg_probability( | |
self.frame_annotation | |
) | |
self.frame_data.mask_path = mask_path | |
self.frame_data.fg_probability = safe_as_tensor(fg_mask_np, torch.float) | |
mask_thr = self.frame_data_builder.box_crop_mask_thr | |
bbox_xywh = get_bbox_from_mask(fg_mask_np, mask_thr) | |
self.frame_data.bbox_xywh = safe_as_tensor(bbox_xywh, torch.long) | |
self.assertIsNotNone(self.frame_data.mask_path) | |
self.assertTrue(torch.is_tensor(self.frame_data.fg_probability)) | |
self.assertTrue(torch.is_tensor(self.frame_data.bbox_xywh)) | |
# assert bboxes shape | |
self.assertEqual(self.frame_data.bbox_xywh.shape, torch.Size([4])) | |
image_path = os.path.join( | |
self.frame_data_builder.dataset_root, self.frame_annotation.image.path | |
) | |
image_np = load_image(self.frame_data_builder._local_path(image_path)) | |
self.assertIsInstance(image_np, np.ndarray) | |
self.frame_data.image_rgb = self.frame_data_builder._postprocess_image( | |
image_np, self.frame_annotation.image.size, self.frame_data.fg_probability | |
) | |
self.assertIsInstance(self.frame_data.image_rgb, torch.Tensor) | |
( | |
self.frame_data.depth_map, | |
depth_path, | |
self.frame_data.depth_mask, | |
) = self.frame_data_builder._load_mask_depth( | |
self.frame_annotation, | |
self.frame_data.fg_probability, | |
) | |
self.assertTrue(torch.is_tensor(self.frame_data.depth_map)) | |
self.assertIsNotNone(depth_path) | |
self.assertTrue(torch.is_tensor(self.frame_data.depth_mask)) | |
new_size = (self.image_height, self.image_width) | |
if self.frame_data_builder.box_crop: | |
self.frame_data.crop_by_metadata_bbox_( | |
self.frame_data_builder.box_crop_context, | |
) | |
# assert image and mask shapes after resize | |
self.frame_data.resize_frame_( | |
new_size_hw=torch.tensor(new_size, dtype=torch.long), | |
) | |
self.assertEqual( | |
self.frame_data.mask_crop.shape, | |
torch.Size([1, self.image_height, self.image_width]), | |
) | |
self.assertEqual( | |
self.frame_data.image_rgb.shape, | |
torch.Size([3, self.image_height, self.image_width]), | |
) | |
self.assertEqual( | |
self.frame_data.mask_crop.shape, | |
torch.Size([1, self.image_height, self.image_width]), | |
) | |
self.assertEqual( | |
self.frame_data.fg_probability.shape, | |
torch.Size([1, self.image_height, self.image_width]), | |
) | |
self.assertEqual( | |
self.frame_data.depth_map.shape, | |
torch.Size([1, self.image_height, self.image_width]), | |
) | |
self.assertEqual( | |
self.frame_data.depth_mask.shape, | |
torch.Size([1, self.image_height, self.image_width]), | |
) | |
self.frame_data.camera = self.frame_data_builder._get_pytorch3d_camera( | |
self.frame_annotation, | |
) | |
self.assertEqual(type(self.frame_data.camera), PerspectiveCameras) | |
def test_transpose_normalize_image(self): | |
def inverse_transpose_normalize_image(image: np.ndarray) -> np.ndarray: | |
im = image * 255.0 | |
return im.transpose((1, 2, 0)).astype(np.uint8) | |
# Test 2D input | |
input_image = np.array( | |
[[10, 20, 30], [40, 50, 60], [70, 80, 90]], dtype=np.uint8 | |
) | |
expected_input = inverse_transpose_normalize_image( | |
transpose_normalize_image(input_image) | |
) | |
self.assertClose(input_image[..., None], expected_input) | |
# Test 3D input | |
input_image = np.array( | |
[ | |
[[10, 20, 30], [40, 50, 60], [70, 80, 90]], | |
[[100, 110, 120], [130, 140, 150], [160, 170, 180]], | |
[[190, 200, 210], [220, 230, 240], [250, 255, 255]], | |
], | |
dtype=np.uint8, | |
) | |
expected_input = inverse_transpose_normalize_image( | |
transpose_normalize_image(input_image) | |
) | |
self.assertClose(input_image, expected_input) | |
def test_load_image(self): | |
path = os.path.join(self.dataset_root, self.frame_annotation.image.path) | |
local_path = self.path_manager.get_local_path(path) | |
image = load_image(local_path) | |
self.assertEqual(image.dtype, np.float32) | |
self.assertLessEqual(np.max(image), 1.0) | |
self.assertGreaterEqual(np.min(image), 0.0) | |
def test_load_mask(self): | |
path = os.path.join(self.dataset_root, self.frame_annotation.mask.path) | |
path = self.path_manager.get_local_path(path) | |
mask = load_mask(path) | |
self.assertEqual(mask.dtype, np.float32) | |
self.assertLessEqual(np.max(mask), 1.0) | |
self.assertGreaterEqual(np.min(mask), 0.0) | |
def test_load_depth(self): | |
path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) | |
path = self.path_manager.get_local_path(path) | |
depth_map = load_depth(path, self.frame_annotation.depth.scale_adjustment) | |
self.assertEqual(depth_map.dtype, np.float32) | |
self.assertEqual(len(depth_map.shape), 3) | |
def test_load_16big_png_depth(self): | |
path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) | |
path = self.path_manager.get_local_path(path) | |
depth_map = load_16big_png_depth(path) | |
self.assertEqual(depth_map.dtype, np.float32) | |
self.assertEqual(len(depth_map.shape), 2) | |
def test_load_1bit_png_mask(self): | |
mask_path = os.path.join( | |
self.dataset_root, self.frame_annotation.depth.mask_path | |
) | |
mask_path = self.path_manager.get_local_path(mask_path) | |
mask = load_1bit_png_mask(mask_path) | |
self.assertEqual(mask.dtype, np.float32) | |
self.assertEqual(len(mask.shape), 2) | |
def test_load_depth_mask(self): | |
mask_path = os.path.join( | |
self.dataset_root, self.frame_annotation.depth.mask_path | |
) | |
mask_path = self.path_manager.get_local_path(mask_path) | |
mask = load_depth_mask(mask_path) | |
self.assertEqual(mask.dtype, np.float32) | |
self.assertEqual(len(mask.shape), 3) | |