Update Code
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- .gitignore +157 -0
- app.py +149 -9
- assets/examples/bicycle/_DSC8679.JPG +0 -0
- assets/examples/bicycle/_DSC8689.JPG +0 -0
- assets/examples/bonsai/DSCF5565.JPG +0 -0
- assets/examples/bonsai/DSCF5575.JPG +0 -0
- assets/examples/garden/DSC07956.JPG +0 -0
- assets/examples/garden/DSC07960.JPG +0 -0
- assets/examples/kitchen/0.jpg +0 -0
- assets/examples/kitchen/64.jpg +0 -0
- assets/examples/sofa/000000.jpg +0 -0
- assets/examples/sofa/000008.jpg +0 -0
- configs/model_config.yaml +20 -0
- requirements.txt +41 -0
- scannetv2-labels.combined.tsv +608 -0
- src/datasets/megadepth.py +125 -0
- src/datasets/scannet.py +109 -0
- src/datasets/scannetpp.py +107 -0
- src/datasets_preprocess/scannet_preprocess.py +209 -0
- src/datasets_preprocess/scannetpp_preprocess.py +227 -0
- src/gaussian_head.py +142 -0
- src/infer.py +23 -0
- src/losses.py +193 -0
- src/lseg.py +171 -0
- src/model.py +176 -0
- src/ptv3.py +13 -0
- src/train.py +73 -0
- src/utils/camera_utils.py +60 -0
- src/utils/cuda_splatting.py +216 -0
- src/utils/gaussian_model.py +160 -0
- src/utils/graphics_utils.py +77 -0
- src/utils/points_process.py +37 -0
- src/utils/sh_utils.py +117 -0
- src/utils/visualization_utils.py +355 -0
- submodules/PointTransformerV3/.gitmodules +3 -0
- submodules/PointTransformerV3/LICENSE +21 -0
- submodules/PointTransformerV3/Pointcept/.github/workflows/formatter.yml +20 -0
- submodules/PointTransformerV3/Pointcept/.gitignore +16 -0
- submodules/PointTransformerV3/Pointcept/LICENSE +21 -0
- submodules/PointTransformerV3/Pointcept/README.md +896 -0
- submodules/PointTransformerV3/Pointcept/configs/_base_/dataset/scannetpp.py +104 -0
- submodules/PointTransformerV3/Pointcept/configs/_base_/default_runtime.py +39 -0
- submodules/PointTransformerV3/Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py +313 -0
- submodules/PointTransformerV3/Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py +282 -0
- submodules/PointTransformerV3/Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py +232 -0
- submodules/PointTransformerV3/Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py +176 -0
- submodules/PointTransformerV3/Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py +342 -0
- submodules/PointTransformerV3/Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py +316 -0
- submodules/PointTransformerV3/Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py +292 -0
.gitattributes
CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
wheel/*.whl filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.whl filter=lfs diff=lfs merge=lfs -text
|
38 |
+
checkpoints/* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# *.pth
|
2 |
+
*.pt
|
3 |
+
submodules/diff-gaussian-rasterization
|
4 |
+
submodules/simple-knn
|
5 |
+
# checkpoints/
|
6 |
+
output*
|
7 |
+
.gradio/
|
8 |
+
|
9 |
+
core.*
|
10 |
+
logs/*
|
11 |
+
/data/
|
12 |
+
# checkpoints/
|
13 |
+
video*
|
14 |
+
train_images*
|
15 |
+
test_images_save*
|
16 |
+
/pl_main
|
17 |
+
/to_be_test
|
18 |
+
/test_lsm
|
19 |
+
/test_img
|
20 |
+
/figure3
|
21 |
+
|
22 |
+
# Byte-compiled / optimized / DLL files
|
23 |
+
__pycache__/
|
24 |
+
*.py[cod]
|
25 |
+
*$py.class
|
26 |
+
|
27 |
+
# C extensions
|
28 |
+
*.so
|
29 |
+
|
30 |
+
# Distribution / packaging
|
31 |
+
.Python
|
32 |
+
build/
|
33 |
+
develop-eggs/
|
34 |
+
dist/
|
35 |
+
downloads/
|
36 |
+
eggs/
|
37 |
+
.eggs/
|
38 |
+
lib/
|
39 |
+
lib64/
|
40 |
+
parts/
|
41 |
+
sdist/
|
42 |
+
var/
|
43 |
+
wheels/
|
44 |
+
pip-wheel-metadata/
|
45 |
+
share/python-wheels/
|
46 |
+
*.egg-info/
|
47 |
+
.installed.cfg
|
48 |
+
*.egg
|
49 |
+
MANIFEST
|
50 |
+
|
51 |
+
# PyInstaller
|
52 |
+
# Usually these files are written by a python script from a template
|
53 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
54 |
+
*.manifest
|
55 |
+
*.spec
|
56 |
+
|
57 |
+
# Installer logs
|
58 |
+
pip-log.txt
|
59 |
+
pip-delete-this-directory.txt
|
60 |
+
|
61 |
+
# Unit test / coverage reports
|
62 |
+
htmlcov/
|
63 |
+
.tox/
|
64 |
+
.nox/
|
65 |
+
.coverage
|
66 |
+
.coverage.*
|
67 |
+
.cache
|
68 |
+
nosetests.xml
|
69 |
+
coverage.xml
|
70 |
+
*.cover
|
71 |
+
*.py,cover
|
72 |
+
.hypothesis/
|
73 |
+
.pytest_cache/
|
74 |
+
|
75 |
+
# Translations
|
76 |
+
*.mo
|
77 |
+
*.pot
|
78 |
+
|
79 |
+
# Django stuff:
|
80 |
+
*.log
|
81 |
+
local_settings.py
|
82 |
+
db.sqlite3
|
83 |
+
db.sqlite3-journal
|
84 |
+
|
85 |
+
# Flask stuff:
|
86 |
+
instance/
|
87 |
+
.webassets-cache
|
88 |
+
|
89 |
+
# Scrapy stuff:
|
90 |
+
.scrapy
|
91 |
+
|
92 |
+
# Sphinx documentation
|
93 |
+
docs/_build/
|
94 |
+
|
95 |
+
# PyBuilder
|
96 |
+
target/
|
97 |
+
|
98 |
+
# Jupyter Notebook
|
99 |
+
.ipynb_checkpoints
|
100 |
+
|
101 |
+
# IPython
|
102 |
+
profile_default/
|
103 |
+
ipython_config.py
|
104 |
+
|
105 |
+
# pyenv
|
106 |
+
.python-version
|
107 |
+
|
108 |
+
# pipenv
|
109 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
110 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
111 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
112 |
+
# install all needed dependencies.
|
113 |
+
#Pipfile.lock
|
114 |
+
|
115 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
116 |
+
__pypackages__/
|
117 |
+
|
118 |
+
# Celery stuff
|
119 |
+
celerybeat-schedule
|
120 |
+
celerybeat.pid
|
121 |
+
|
122 |
+
# SageMath parsed files
|
123 |
+
*.sage.py
|
124 |
+
|
125 |
+
# Environments
|
126 |
+
.env
|
127 |
+
.venv
|
128 |
+
env/
|
129 |
+
venv/
|
130 |
+
ENV/
|
131 |
+
env.bak/
|
132 |
+
venv.bak/
|
133 |
+
|
134 |
+
# Spyder project settings
|
135 |
+
.spyderproject
|
136 |
+
.spyproject
|
137 |
+
|
138 |
+
# Rope project settings
|
139 |
+
.ropeproject
|
140 |
+
|
141 |
+
# mkdocs documentation
|
142 |
+
/site
|
143 |
+
|
144 |
+
# mypy
|
145 |
+
.mypy_cache/
|
146 |
+
.dmypy.json
|
147 |
+
dmypy.json
|
148 |
+
|
149 |
+
# Pyre type checker
|
150 |
+
.pyre/
|
151 |
+
video/
|
152 |
+
scannet_processed_scenes_1.tar.gz
|
153 |
+
test_results/*
|
154 |
+
output/*
|
155 |
+
test_images
|
156 |
+
colmap_scannet
|
157 |
+
/test_lseg
|
app.py
CHANGED
@@ -1,12 +1,152 @@
|
|
1 |
-
import os
|
2 |
-
import
|
|
|
|
|
|
|
|
|
3 |
import gradio as gr
|
4 |
-
import
|
5 |
-
|
|
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, subprocess, shlex, sys, gc
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import numpy as np
|
5 |
+
import shutil
|
6 |
+
import argparse
|
7 |
import gradio as gr
|
8 |
+
import uuid
|
9 |
+
import spaces
|
10 |
+
#
|
11 |
|
12 |
+
subprocess.run(shlex.split("pip install wheel/torch_scatter-2.1.2+pt21cu121-cp310-cp310-linux_x86_64.whl"))
|
13 |
+
subprocess.run(shlex.split("pip install wheel/flash_attn-2.6.3+cu123torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"))
|
14 |
+
subprocess.run(shlex.split("pip install wheel/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl"))
|
15 |
+
subprocess.run(shlex.split("pip install wheel/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl"))
|
16 |
+
subprocess.run(shlex.split("pip install wheel/curope-0.0.0-cp310-cp310-linux_x86_64.whl"))
|
17 |
+
subprocess.run(shlex.split("pip install wheel/pointops-1.0-cp310-cp310-linux_x86_64.whl"))
|
18 |
|
19 |
+
from src.utils.visualization_utils import render_video_from_file
|
20 |
+
from src.model import LSM_MASt3R
|
21 |
+
|
22 |
+
model = LSM_MASt3R.from_pretrained("checkpoints/pretrained_model/checkpoint-40.pth")
|
23 |
+
model = model.eval()
|
24 |
+
|
25 |
+
|
26 |
+
@spaces.GPU(duration=80)
|
27 |
+
def process(inputfiles, input_path=None):
|
28 |
+
# 创建唯一的缓存目录
|
29 |
+
cache_dir = os.path.join('outputs', str(uuid.uuid4()))
|
30 |
+
os.makedirs(cache_dir, exist_ok=True)
|
31 |
+
|
32 |
+
if input_path is not None:
|
33 |
+
imgs_path = './assets/examples/' + input_path
|
34 |
+
imgs_names = sorted(os.listdir(imgs_path))
|
35 |
+
|
36 |
+
inputfiles = []
|
37 |
+
for imgs_name in imgs_names:
|
38 |
+
file_path = os.path.join(imgs_path, imgs_name)
|
39 |
+
print(file_path)
|
40 |
+
inputfiles.append(file_path)
|
41 |
+
print(inputfiles)
|
42 |
+
|
43 |
+
filelist = inputfiles
|
44 |
+
if len(filelist) != 2:
|
45 |
+
gr.Warning("Please select 2 images")
|
46 |
+
shutil.rmtree(cache_dir) # 清理缓存目录
|
47 |
+
return None, None, None, None, None, None
|
48 |
+
|
49 |
+
ply_path = os.path.join(cache_dir, 'gaussians.ply')
|
50 |
+
# render_video_from_file(filelist, model, output_path=cache_dir, resolution=224)
|
51 |
+
render_video_from_file(filelist, model, output_path=cache_dir, resolution=512)
|
52 |
+
|
53 |
+
rgb_video_path = os.path.join(cache_dir, 'moved', 'output_images_video.mp4')
|
54 |
+
depth_video_path = os.path.join(cache_dir, 'moved', 'output_depth_video.mp4')
|
55 |
+
feature_video_path = os.path.join(cache_dir, 'moved', 'output_fmap_video.mp4')
|
56 |
+
|
57 |
+
return filelist, rgb_video_path, depth_video_path, feature_video_path, ply_path, ply_path
|
58 |
+
|
59 |
+
|
60 |
+
_TITLE = 'LargeSpatialModel'
|
61 |
+
_DESCRIPTION = '''
|
62 |
+
<div style="display: flex; justify-content: center; align-items: center;">
|
63 |
+
<div style="width: 100%; text-align: center; font-size: 30px;">
|
64 |
+
<strong>Large Spatial Model: End-to-end Unposed Images to Semantic 3D</strong>
|
65 |
+
</div>
|
66 |
+
</div>
|
67 |
+
<p></p>
|
68 |
+
|
69 |
+
<div align="center">
|
70 |
+
<a style="display:inline-block" href="https://arxiv.org/abs/2410.18956"><img src="https://img.shields.io/badge/ArXiv-2410.18956-b31b1b?logo=arxiv" alt='arxiv'></a>
|
71 |
+
<a style="display:inline-block" href="https://largespatialmodel.github.io/"><img src='https://img.shields.io/badge/Project_Page-ff7512?logo=lightning'></a>
|
72 |
+
<a title="Social" href="https://x.com/WayneINR" target="_blank" rel="noopener noreferrer" style="display: inline-block;">
|
73 |
+
<img src="https://www.obukhov.ai/img/badges/badge-social.svg" alt="social">
|
74 |
+
</a>
|
75 |
+
|
76 |
+
</div>
|
77 |
+
<p></p>
|
78 |
+
|
79 |
+
* Official demo of: [LargeSpatialModel: End-to-end Unposed Images to Semantic 3D](https://largespatialmodel.github.io/).
|
80 |
+
* Examples for direct viewing: you can simply click the examples (in the bottom of the page), to quickly view the results on representative data.
|
81 |
+
'''
|
82 |
+
|
83 |
+
block = gr.Blocks().queue()
|
84 |
+
with block:
|
85 |
+
gr.Markdown(_DESCRIPTION)
|
86 |
+
|
87 |
+
with gr.Column(variant="panel"):
|
88 |
+
with gr.Tab("Input"):
|
89 |
+
with gr.Row():
|
90 |
+
with gr.Column(scale=1):
|
91 |
+
inputfiles = gr.File(file_count="multiple", label="Load Images")
|
92 |
+
input_path = gr.Textbox(visible=False, label="example_path")
|
93 |
+
with gr.Column(scale=1):
|
94 |
+
image_gallery = gr.Gallery(
|
95 |
+
label="Gallery",
|
96 |
+
show_label=False,
|
97 |
+
elem_id="gallery",
|
98 |
+
columns=[2],
|
99 |
+
height=300, # 固定高度
|
100 |
+
object_fit="cover" # 确保图片填满空间
|
101 |
+
)
|
102 |
+
|
103 |
+
button_gen = gr.Button("Start Reconstruction", elem_id="button_gen")
|
104 |
+
processing_msg = gr.Markdown("Processing...", visible=False, elem_id="processing_msg")
|
105 |
+
|
106 |
+
|
107 |
+
with gr.Column(variant="panel"):
|
108 |
+
with gr.Tab("Output"):
|
109 |
+
with gr.Row():
|
110 |
+
with gr.Column(scale=1):
|
111 |
+
rgb_video = gr.Video(label="RGB Video", autoplay=True)
|
112 |
+
with gr.Column(scale=1):
|
113 |
+
feature_video = gr.Video(label="Feature Video", autoplay=True)
|
114 |
+
with gr.Column(scale=1):
|
115 |
+
depth_video = gr.Video(label="Depth Video", autoplay=True)
|
116 |
+
with gr.Row():
|
117 |
+
with gr.Group():
|
118 |
+
output_model = gr.Model3D(
|
119 |
+
label="3D Dense Model under Gaussian Splats Formats, need more time to visualize",
|
120 |
+
interactive=False,
|
121 |
+
camera_position=[0.5, 0.5, 1], # 稍微偏移一点,以便更好地查看模型
|
122 |
+
height=600,
|
123 |
+
)
|
124 |
+
gr.Markdown(
|
125 |
+
"""
|
126 |
+
<div class="model-description">
|
127 |
+
Use the left mouse button to rotate, the scroll wheel to zoom, and the right mouse button to move.
|
128 |
+
</div>
|
129 |
+
"""
|
130 |
+
)
|
131 |
+
with gr.Row():
|
132 |
+
output_file = gr.File(label="PLY File")
|
133 |
+
|
134 |
+
examples = gr.Examples(
|
135 |
+
examples=[
|
136 |
+
"sofa",
|
137 |
+
],
|
138 |
+
inputs=[input_path],
|
139 |
+
outputs=[image_gallery, rgb_video, depth_video, feature_video, output_model, output_file],
|
140 |
+
fn=lambda x: process(inputfiles=None, input_path=x),
|
141 |
+
cache_examples=True,
|
142 |
+
label="Examples"
|
143 |
+
)
|
144 |
+
|
145 |
+
|
146 |
+
button_gen.click(
|
147 |
+
process,
|
148 |
+
inputs=[inputfiles],
|
149 |
+
outputs=[image_gallery, rgb_video, depth_video, feature_video, output_model, output_file],
|
150 |
+
)
|
151 |
+
|
152 |
+
block.launch(server_name="0.0.0.0", share=False)
|
assets/examples/bicycle/_DSC8679.JPG
ADDED
|
assets/examples/bicycle/_DSC8689.JPG
ADDED
|
assets/examples/bonsai/DSCF5565.JPG
ADDED
|
assets/examples/bonsai/DSCF5575.JPG
ADDED
|
assets/examples/garden/DSC07956.JPG
ADDED
|
assets/examples/garden/DSC07960.JPG
ADDED
|
assets/examples/kitchen/0.jpg
ADDED
![]() |
assets/examples/kitchen/64.jpg
ADDED
![]() |
assets/examples/sofa/000000.jpg
ADDED
![]() |
assets/examples/sofa/000008.jpg
ADDED
![]() |
configs/model_config.yaml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
mast3r_config:
|
2 |
+
pretrained_model_name_or_path: "checkpoints/pretrained_model/MASt3R_ViTLarge_BaseDecoder_512_catmlpdpt_metric.pth"
|
3 |
+
|
4 |
+
point_transformer_config:
|
5 |
+
enc_depths: [1, 1, 1, 3, 1]
|
6 |
+
enc_channels: [32, 64, 128, 256, 512]
|
7 |
+
enc_num_head: [2, 4, 8, 16, 32]
|
8 |
+
enc_patch_size: [1024, 1024, 1024, 1024, 1024]
|
9 |
+
dec_depths: [1, 1, 1, 1]
|
10 |
+
dec_channels: [64, 64, 128, 256]
|
11 |
+
dec_num_head: [4, 4, 8, 16]
|
12 |
+
dec_patch_size: [1024, 1024, 1024, 1024]
|
13 |
+
|
14 |
+
gaussian_head_config:
|
15 |
+
rgb_residual: true
|
16 |
+
d_gs_feats: 32
|
17 |
+
|
18 |
+
lseg_config:
|
19 |
+
pretrained_model_name_or_path: "checkpoints/pretrained_model/lang_seg.ckpt"
|
20 |
+
half_res: true
|
requirements.txt
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch==2.1.2
|
2 |
+
torchvision==0.16.2
|
3 |
+
pytorch-lightning==2.1.2
|
4 |
+
open3d
|
5 |
+
roma
|
6 |
+
gradio
|
7 |
+
matplotlib
|
8 |
+
tqdm
|
9 |
+
opencv-python
|
10 |
+
scipy
|
11 |
+
einops
|
12 |
+
trimesh
|
13 |
+
tensorboard
|
14 |
+
pyglet<2
|
15 |
+
numpy<2.0
|
16 |
+
huggingface-hub[torch]>=0.22
|
17 |
+
ninja
|
18 |
+
scikit-learn
|
19 |
+
|
20 |
+
|
21 |
+
arrow
|
22 |
+
pandas
|
23 |
+
torch-tb-profiler
|
24 |
+
jaxtyping
|
25 |
+
ninja
|
26 |
+
h5py
|
27 |
+
pyyaml
|
28 |
+
moviepy==1.0.3
|
29 |
+
jupyter
|
30 |
+
lpips
|
31 |
+
torch-geometric
|
32 |
+
spconv-cu120
|
33 |
+
git+https://github.com/openai/CLIP.git
|
34 |
+
sharedarray
|
35 |
+
tensorboardx
|
36 |
+
yapf
|
37 |
+
addict
|
38 |
+
plyfile
|
39 |
+
termcolor
|
40 |
+
timm
|
41 |
+
|
scannetv2-labels.combined.tsv
ADDED
@@ -0,0 +1,608 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
id raw_category category count nyu40id eigen13id nyuClass nyu40class eigen13class ModelNet40 ModelNet10 ShapeNetCore55 synsetoffset wnsynsetid wnsynsetkey mpcat40 mpcat40index
|
2 |
+
1 wall wall 8277 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
3 |
+
2 chair chair 4646 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
4 |
+
22 books book 1678 23 2 book books Books n02870526 book.n.11 objects 39
|
5 |
+
3 floor floor 1553 2 5 floor floor Floor n03365592 floor.n.01 floor 2
|
6 |
+
5 door door 1483 8 12 door door Wall door n03221720 door.n.01 door 4
|
7 |
+
1163 object object 1313 40 7 otherprop Objects objects 39
|
8 |
+
16 window window 1209 9 13 window window Window n04587648 window.n.01 window 9
|
9 |
+
4 table table 1170 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
10 |
+
56 trash can trash can 1090 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
|
11 |
+
13 pillow pillow 937 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8
|
12 |
+
15 picture picture 862 11 8 picture picture Picture n03931044 picture.n.01 picture 6
|
13 |
+
41 ceiling ceiling 806 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17
|
14 |
+
26 box box 775 29 7 box box Objects n02883344 box.n.01 objects 39
|
15 |
+
161 doorframe doorframe 768 8 12 door door Wall door doorframe.n.01 door 4
|
16 |
+
19 monitor monitor 765 40 7 monitor otherprop Objects monitor monitor tv or monitor 3211117 n03782190 monitor.n.04 objects 39
|
17 |
+
7 cabinet cabinet 731 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
18 |
+
9 desk desk 680 14 10 desk desk Table desk desk table 4379243 n03179701 desk.n.01 table 5
|
19 |
+
8 shelf shelf 641 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
20 |
+
10 office chair office chair 595 5 4 chair chair Chair chair chair chair 3001627 n04373704 swivel_chair.n.01 chair 3
|
21 |
+
31 towel towel 570 27 7 towel towel Objects n04459362 towel.n.01 towel 20
|
22 |
+
6 couch couch 502 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10
|
23 |
+
14 sink sink 488 34 7 sink sink Objects sink n04223580 sink.n.01 sink 15
|
24 |
+
48 backpack backpack 479 40 7 backpack otherprop Objects n02769748 backpack.n.01 objects 39
|
25 |
+
28 lamp lamp 419 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
26 |
+
11 bed bed 370 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
|
27 |
+
18 bookshelf bookshelf 360 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
28 |
+
71 mirror mirror 349 19 7 mirror mirror Objects n03773035 mirror.n.01 mirror 21
|
29 |
+
21 curtain curtain 347 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12
|
30 |
+
40 plant plant 331 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14
|
31 |
+
52 whiteboard whiteboard 327 30 7 whiteboard whiteboard Objects n03211616 display_panel.n.01 board_panel 35
|
32 |
+
96 radiator radiator 322 39 6 radiator otherfurniture Furniture n04041069 radiator.n.02 misc 40
|
33 |
+
22 book book 318 23 2 book books Books n02870526 book.n.11 objects 39
|
34 |
+
29 kitchen cabinet kitchen cabinet 310 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7
|
35 |
+
49 toilet paper toilet paper 291 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39
|
36 |
+
29 kitchen cabinets kitchen cabinet 289 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
37 |
+
23 armchair armchair 281 5 4 chair chair Chair chair chair chair 3001627 n02738535 armchair.n.01 chair 3
|
38 |
+
63 shoes shoe 272 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
|
39 |
+
24 coffee table coffee table 258 7 10 coffee table table Table table table table 4379243 n03063968 coffee_table.n.01 table 5
|
40 |
+
17 toilet toilet 256 33 7 toilet toilet Objects toilet toilet n04446276 toilet.n.01 toilet 18
|
41 |
+
47 bag bag 252 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
42 |
+
32 clothes clothes 248 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
43 |
+
46 keyboard keyboard 246 40 7 keyboard otherprop Objects keyboard computer keyboard 3085013 n03085013 computer_keyboard.n.01 objects 39
|
44 |
+
65 bottle bottle 226 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
45 |
+
97 recycling bin recycling bin 225 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
|
46 |
+
34 nightstand nightstand 224 32 6 night stand night stand Furniture night_stand night_stand n03015254 chest_of_drawers.n.01 chest_of_drawers 13
|
47 |
+
38 stool stool 221 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19
|
48 |
+
33 tv tv 219 25 11 television television TV tv or monitor 3211117 n03211117 display.n.06 tv_monitor 22
|
49 |
+
75 file cabinet file cabinet 217 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
50 |
+
36 dresser dresser 213 17 6 dresser dresser Furniture dresser dresser n03015254 chest_of_drawers.n.01 chest_of_drawers 13
|
51 |
+
64 computer tower computer tower 203 40 7 computer otherprop Objects n03082979 computer.n.01 objects 39
|
52 |
+
32 clothing clothes 165 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
53 |
+
101 telephone telephone 164 40 7 telephone otherprop Objects telephone 4401088 n04401088 telephone.n.01 objects 39
|
54 |
+
130 cup cup 157 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
55 |
+
27 refrigerator refrigerator 154 24 6 refridgerator refridgerator Furniture n04070727 refrigerator.n.01 appliances 37
|
56 |
+
44 end table end table 147 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
57 |
+
131 jacket jacket 146 40 7 jacket otherprop Objects n03589791 jacket.n.01 clothes 38
|
58 |
+
55 shower curtain shower curtain 144 28 7 shower curtain shower curtain Objects curtain n04209239 shower_curtain.n.01 curtain 12
|
59 |
+
42 bathtub bathtub 144 36 7 bathtub bathtub Objects bathtub bathtub tub 2808440 n02808440 bathtub.n.01 bathtub 25
|
60 |
+
59 microwave microwave 141 40 7 microwave otherprop Objects microwave 3761084 n03761084 microwave.n.02 appliances 37
|
61 |
+
159 kitchen counter kitchen counter 140 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
|
62 |
+
74 sofa chair sofa chair 129 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
63 |
+
82 paper towel dispenser paper towel dispenser 129 40 7 paper towel dispenser otherprop Objects objects 39
|
64 |
+
1164 bathroom vanity bathroom vanity 126 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 table 5
|
65 |
+
93 suitcase suitcase 118 40 7 luggage otherprop Objects n02773838 bag.n.06 objects 39
|
66 |
+
77 laptop laptop 111 40 7 laptop otherprop Objects laptop laptop 3642806 n03642806 laptop.n.01 objects 39
|
67 |
+
67 ottoman ottoman 111 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
|
68 |
+
128 shower walls shower wall 109 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
69 |
+
50 printer printer 106 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37
|
70 |
+
35 counter counter 104 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
|
71 |
+
69 board board 100 38 7 board otherstructure Objects board_panel 35
|
72 |
+
100 soap dispenser soap dispenser 99 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39
|
73 |
+
62 stove stove 95 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37
|
74 |
+
105 light light 93 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28
|
75 |
+
1165 closet wall closet wall 90 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
76 |
+
165 mini fridge mini fridge 87 24 6 refridgerator refridgerator Furniture n03273913 electric_refrigerator.n.01 appliances 37
|
77 |
+
7 cabinets cabinet 79 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
78 |
+
5 doors door 76 8 12 door door Wall door n03221720 door.n.01 door 4
|
79 |
+
76 fan fan 75 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40
|
80 |
+
230 tissue box tissue box 73 40 7 tissue box otherprop Objects n02883344 box.n.01 objects 39
|
81 |
+
54 blanket blanket 72 40 7 blanket otherprop Objects n02849154 blanket.n.01 objects 39
|
82 |
+
125 bathroom stall bathroom stall 71 38 7 otherstructure Objects n02873839 booth.n.02 misc 40
|
83 |
+
72 copier copier 70 40 7 otherprop Objects n03257586 duplicator.n.01 appliances 37
|
84 |
+
68 bench bench 66 39 6 bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34
|
85 |
+
145 bar bar 66 38 7 bar otherstructure Objects n02788689 bar.n.03 misc 40
|
86 |
+
157 soap dish soap dish 65 40 7 soap dish otherprop Objects n04254009 soap_dish.n.01 objects 39
|
87 |
+
1166 laundry hamper laundry hamper 65 40 7 laundry basket otherprop Objects objects 39
|
88 |
+
132 storage bin storage bin 63 40 7 storage bin otherprop Objects objects 39
|
89 |
+
1167 bathroom stall door bathroom stall door 62 8 12 door door Wall door n03221720 door.n.01 door 4
|
90 |
+
232 light switch light switch 61 38 7 light switch otherstructure Objects n04372370 switch.n.01 misc 40
|
91 |
+
134 coffee maker coffee maker 61 40 7 otherprop Objects n03063338 coffee_maker.n.01 appliances 37
|
92 |
+
51 tv stand tv stand 61 39 6 tv stand otherfurniture Furniture tv_stand n03290653 entertainment_center.n.01 furniture 36
|
93 |
+
250 decoration decoration 60 40 7 otherprop Objects n03169390 decoration.n.01 misc 40
|
94 |
+
1168 ceiling light ceiling light 59 38 7 light otherstructure Objects n03665366 light.n.02 lighting 28
|
95 |
+
342 range hood range hood 59 38 7 range hood otherstructure Objects range_hood n04053677 range_hood.n.01 misc 40
|
96 |
+
89 blackboard blackboard 58 38 7 blackboard otherstructure Objects n02846511 blackboard.n.01 board_panel 35
|
97 |
+
103 clock clock 58 40 7 clock otherprop Objects clock 3046257 n03046257 clock.n.01 objects 39
|
98 |
+
99 wardrobe closet wardrobe 54 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
|
99 |
+
95 rail rail 53 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30
|
100 |
+
154 bulletin board bulletin board 53 38 7 board otherstructure Objects n03211616 display_panel.n.01 board_panel 35
|
101 |
+
140 mat mat 52 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2
|
102 |
+
1169 trash bin trash bin 52 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
|
103 |
+
193 ledge ledge 51 38 7 otherstructure Objects n09337253 ledge.n.01 misc 40
|
104 |
+
116 seat seat 49 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36
|
105 |
+
202 mouse mouse 49 40 7 mouse otherprop Objects n03793489 mouse.n.04 objects 39
|
106 |
+
73 basket basket 48 40 7 basket otherprop Objects basket 2801938 n02801938 basket.n.01 objects 39
|
107 |
+
78 shower shower 48 38 7 otherstructure Objects n04208936 shower.n.01 shower 23
|
108 |
+
1170 dumbbell dumbbell 48 40 7 otherprop Objects n03255030 dumbbell.n.01 objects 39
|
109 |
+
79 paper paper 46 26 7 paper paper Objects n14974264 paper.n.01 objects 39
|
110 |
+
80 person person 46 31 7 person person Objects person n05217688 person.n.02 misc 40
|
111 |
+
141 windowsill windowsill 45 38 7 otherstructure Objects n04590263 windowsill.n.01 window 9
|
112 |
+
57 closet closet 45 39 6 wardrobe otherfurniture Furniture wardrobe misc 40
|
113 |
+
102 bucket bucket 45 40 7 bucket otherprop Objects n02909870 bucket.n.01 misc 40
|
114 |
+
261 sign sign 44 40 7 sign otherprop Objects n04217882 signboard.n.01 objects 39
|
115 |
+
118 speaker speaker 43 40 7 speaker otherprop Objects speaker 3691459 n03691459 loudspeaker.n.01 objects 39
|
116 |
+
136 dishwasher dishwasher 43 38 7 dishwasher otherstructure Objects dishwasher 3207941 n03207941 dishwasher.n.01 appliances 37
|
117 |
+
98 container container 43 40 7 container otherprop Objects n03094503 container.n.01 objects 39
|
118 |
+
1171 stair rail stair rail 42 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30
|
119 |
+
170 shower curtain rod shower curtain rod 42 40 7 otherprop Objects curtain 12
|
120 |
+
1172 tube tube 41 40 7 otherprop Objects misc 40
|
121 |
+
1173 bathroom cabinet bathroom cabinet 39 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
122 |
+
79 papers paper 39 26 7 paper paper Objects n14974264 paper.n.01 objects 39
|
123 |
+
221 storage container storage container 39 40 7 container otherprop Objects objects 39
|
124 |
+
570 paper bag paper bag 39 37 7 bag bag Objects n04122825 sack.n.01 objects 39
|
125 |
+
138 paper towel roll paper towel roll 39 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
|
126 |
+
168 ball ball 39 40 7 ball otherprop Objects objects 39
|
127 |
+
276 closet doors closet door 38 8 12 door door Wall door n03221720 door.n.01 door 4
|
128 |
+
106 laundry basket laundry basket 37 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39
|
129 |
+
214 cart cart 37 40 7 cart otherprop Objects n03484083 handcart.n.01 shelving 31
|
130 |
+
276 closet door closet door 35 8 12 door door Wall door n03221720 door.n.01 door 4
|
131 |
+
323 dish rack dish rack 35 40 7 dish rack otherprop Objects n03207630 dish_rack.n.01 objects 39
|
132 |
+
58 stairs stairs 35 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16
|
133 |
+
86 blinds blinds 35 13 13 blinds blinds Window n02851099 blind.n.03 blinds 32
|
134 |
+
2 stack of chairs chair 35 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
135 |
+
399 purse purse 34 40 7 purse otherprop Objects n02774152 bag.n.04 objects 39
|
136 |
+
121 bicycle bicycle 33 40 7 bicycle otherprop Objects bicycle 2834778 n02834778 bicycle.n.01 objects 39
|
137 |
+
185 tray tray 32 40 7 tray otherprop Objects n04476259 tray.n.01 objects 39
|
138 |
+
300 plunger plunger 30 40 7 otherprop Objects n03970156 plunger.n.03 objects 39
|
139 |
+
180 paper cutter paper cutter 30 40 7 paper cutter otherprop Objects n03886940 paper_cutter.n.01 objects 39
|
140 |
+
163 toilet paper dispenser toilet paper dispenser 29 40 7 otherprop Objects objects 39
|
141 |
+
26 boxes box 29 29 7 box box Objects n02883344 box.n.01 objects 39
|
142 |
+
66 bin bin 28 40 7 bin otherprop Objects n02839910 bin.n.01 objects 39
|
143 |
+
208 toilet seat cover dispenser toilet seat cover dispenser 28 40 7 otherprop Objects objects 39
|
144 |
+
112 guitar guitar 28 40 7 guitar otherprop Objects guitar guitar 3467517 n03467517 guitar.n.01 objects 39
|
145 |
+
540 mailboxes mailbox 28 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40
|
146 |
+
395 handicap bar handicap bar 27 38 7 bar otherstructure Objects misc 40
|
147 |
+
166 fire extinguisher fire extinguisher 27 40 7 fire extinguisher otherprop Objects n03345837 fire_extinguisher.n.01 misc 40
|
148 |
+
122 ladder ladder 27 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 stairs 16
|
149 |
+
120 column column 26 38 7 column otherstructure Objects n03074380 column.n.06 column 24
|
150 |
+
107 pipe pipe 25 40 7 pipe otherprop Objects n03944672 pipe.n.02 misc 40
|
151 |
+
283 vacuum cleaner vacuum cleaner 25 40 7 otherprop Objects n04517823 vacuum.n.04 objects 39
|
152 |
+
88 plate plate 24 40 7 plate otherprop Objects n03959485 plate.n.04 objects 39
|
153 |
+
90 piano piano 24 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36
|
154 |
+
177 water cooler water cooler 24 39 6 water cooler otherfurniture Furniture n04559166 water_cooler.n.01 misc 40
|
155 |
+
1174 cd case cd case 24 40 7 otherprop Objects objects 39
|
156 |
+
562 bowl bowl 24 40 7 bowl otherprop Objects bowl bowl 2880940 n02880940 bowl.n.03 objects 39
|
157 |
+
1175 closet rod closet rod 24 40 7 otherprop Objects n04100174 rod.n.01 misc 40
|
158 |
+
1156 bathroom counter bathroom counter 24 12 6 counter counter Furniture table table table 4379243 n03116530 counter.n.01 counter 26
|
159 |
+
84 oven oven 23 38 7 oven otherstructure Objects n03862676 oven.n.01 appliances 37
|
160 |
+
104 stand stand 23 39 6 stand otherfurniture Furniture table table table 4379243 n04301000 stand.n.04 table 5
|
161 |
+
229 scale scale 23 40 7 scale otherprop Objects n04141975 scale.n.07 objects 39
|
162 |
+
70 washing machine washing machine 23 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37
|
163 |
+
325 broom broom 22 40 7 broom otherprop Objects n02906734 broom.n.01 objects 39
|
164 |
+
169 hat hat 22 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38
|
165 |
+
128 shower wall shower wall 22 1 12 wall wall Wall n04208936 shower.n.01 wall 1
|
166 |
+
331 guitar case guitar case 21 40 7 guitar case otherprop Objects objects 39
|
167 |
+
87 rack rack 21 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
168 |
+
488 water pitcher water pitcher 21 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39
|
169 |
+
776 laundry detergent laundry detergent 21 40 7 otherprop Objects objects 39
|
170 |
+
370 hair dryer hair dryer 21 40 7 hair dryer otherprop Objects n03483316 hand_blower.n.01 objects 39
|
171 |
+
191 pillar pillar 21 38 7 column otherstructure Objects n03073977 column.n.07 column 24
|
172 |
+
748 divider divider 20 40 7 otherprop Objects wall 1
|
173 |
+
242 power outlet power outlet 19 40 7 otherprop Objects misc 40
|
174 |
+
45 dining table dining table 19 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
175 |
+
417 shower floor shower floor 19 2 5 floor floor Floor n04208936 shower.n.01 floor 2
|
176 |
+
70 washing machines washing machine 19 39 6 washing machine otherfurniture Furniture washing_machine 4554684 n04554684 washer.n.03 appliances 37
|
177 |
+
188 shower door shower door 19 8 12 door door Wall door n04208936 shower.n.01 door 4
|
178 |
+
1176 coffee kettle coffee kettle 18 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39
|
179 |
+
1177 wardrobe cabinet wardrobe 18 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
|
180 |
+
1178 structure structure 18 38 7 otherstructure Objects misc 40
|
181 |
+
18 bookshelves bookshelf 17 10 6 bookshelf bookshelf Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
182 |
+
110 clothes dryer clothes dryer 17 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37
|
183 |
+
148 toaster toaster 17 40 7 toaster otherprop Objects n04442312 toaster.n.02 appliances 37
|
184 |
+
63 shoe shoe 17 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
|
185 |
+
155 ironing board ironing board 16 39 6 ironing board otherfurniture Furniture n03586090 ironing_board.n.01 objects 39
|
186 |
+
572 alarm clock alarm clock 16 40 7 alarm clock otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39
|
187 |
+
1179 shower head shower head 15 38 7 otherstructure Objects shower 23
|
188 |
+
28 lamp base lamp 15 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
189 |
+
392 water bottle water bottle 15 40 7 bottle otherprop Objects bottle bottle 2876657 n04557648 water_bottle.n.01 objects 39
|
190 |
+
1180 keyboard piano keyboard piano 15 39 6 piano otherfurniture Furniture piano piano 3928116 n03928116 piano.n.01 furniture 36
|
191 |
+
609 projector screen projector screen 15 38 7 projector screen otherstructure Objects misc 40
|
192 |
+
1181 case of water bottles case of water bottles 15 40 7 otherprop Objects objects 39
|
193 |
+
195 toaster oven toaster oven 14 40 7 toaster oven otherprop Objects n04442441 toaster_oven.n.01 appliances 37
|
194 |
+
581 music stand music stand 14 39 6 music stand otherfurniture Furniture n03801760 music_stand.n.01 furniture 36
|
195 |
+
58 staircase stairs 14 38 7 stairs otherstructure Objects n04298308 stairway.n.01 stairs 16
|
196 |
+
1182 coat rack coat rack 14 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 3
|
197 |
+
1183 storage organizer storage organizer 14 40 7 otherprop Objects shelving 3
|
198 |
+
139 machine machine 14 40 7 machine otherprop Objects n03699975 machine.n.01 appliances 37
|
199 |
+
1184 folded chair folded chair 14 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
200 |
+
1185 fire alarm fire alarm 14 40 7 otherprop Objects n03343737 fire_alarm.n.02 misc 40
|
201 |
+
156 fireplace fireplace 13 38 7 fireplace otherstructure Objects n03346455 fireplace.n.01 fireplace 27
|
202 |
+
408 vent vent 13 40 7 otherprop Objects n04526241 vent.n.01 misc 40
|
203 |
+
213 furniture furniture 13 39 6 furniture otherfurniture Furniture n03405725 furniture.n.01 furniture 36
|
204 |
+
1186 power strip power strip 13 40 7 otherprop Objects objects 39
|
205 |
+
1187 calendar calendar 13 40 7 otherprop Objects objects 39
|
206 |
+
1188 poster poster 13 11 8 picture picture Picture n03931044 picture.n.01 picture 6
|
207 |
+
115 toilet paper holder toilet paper holder 13 40 7 toilet paper holder otherprop Objects objects 39
|
208 |
+
1189 potted plant potted plant 12 40 7 plant otherprop Objects plant n00017222 plant.n.02 plant 14
|
209 |
+
304 stuffed animal stuffed animal 12 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39
|
210 |
+
1190 luggage luggage 12 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39
|
211 |
+
21 curtains curtain 12 16 13 curtain curtain Window curtain n03151077 curtain.n.01 curtain 12
|
212 |
+
312 headphones headphones 12 40 7 otherprop Objects n03261776 earphone.n.01 objects 39
|
213 |
+
233 crate crate 12 39 6 crate otherfurniture Furniture n03127925 crate.n.01 objects 39
|
214 |
+
286 candle candle 12 40 7 candle otherprop Objects lamp n02948072 candle.n.01 objects 39
|
215 |
+
264 projector projector 12 40 7 projector otherprop Objects n04009552 projector.n.02 objects 39
|
216 |
+
110 clothes dryers clothes dryer 12 39 6 otherfurniture Furniture n03251766 dryer.n.01 appliances 37
|
217 |
+
1191 mattress mattress 12 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
|
218 |
+
356 dustpan dustpan 12 40 7 otherprop Objects n03259009 dustpan.n.02 objects 39
|
219 |
+
25 drawer drawer 11 39 6 drawer otherfurniture Furniture n03233905 drawer.n.01 furniture 36
|
220 |
+
750 rod rod 11 40 7 otherprop Objects pistol 3948459 n03427202 gat.n.01 misc 40
|
221 |
+
269 globe globe 11 40 7 globe otherprop Objects objects 39
|
222 |
+
307 footrest footrest 11 39 6 foot rest otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
|
223 |
+
410 piano bench piano bench 11 39 6 piano bench otherfurniture Furniture bench bench 2828884 n02828884 bench.n.01 seating 34
|
224 |
+
730 breakfast bar breakfast bar 11 38 7 bar otherstructure Objects counter 26
|
225 |
+
216 step stool step stool 11 40 7 step stool otherprop Objects stool n04315713 step_stool.n.01 stool 19
|
226 |
+
1192 hand rail hand rail 11 38 7 railing otherstructure Objects railing 30
|
227 |
+
119 vending machine vending machine 11 40 7 machine otherprop Objects n04525305 vending_machine.n.01 appliances 37
|
228 |
+
682 ceiling fan ceiling fan 11 40 7 fan otherprop Objects n03320046 fan.n.01 misc 40
|
229 |
+
434 swiffer swiffer 11 40 7 otherprop Objects objects 39
|
230 |
+
126 foosball table foosball table 11 39 6 foosball table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5
|
231 |
+
919 jar jar 11 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39
|
232 |
+
85 footstool footstool 11 39 6 ottoman otherfurniture Furniture stool n03380724 footstool.n.01 stool 19
|
233 |
+
1193 folded table folded table 10 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
234 |
+
108 round table round table 10 7 10 table table Table table table table 4379243 n04114554 round_table.n.02 table 5
|
235 |
+
135 hamper hamper 10 40 7 basket otherprop Objects basket 2801938 n03482405 hamper.n.02 objects 39
|
236 |
+
1194 poster tube poster tube 10 40 7 otherprop Objects objects 39
|
237 |
+
432 case case 10 40 7 case otherprop Objects objects 39
|
238 |
+
53 carpet carpet 10 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2
|
239 |
+
1195 thermostat thermostat 10 40 7 otherprop Objects n04422875 thermostat.n.01 misc 40
|
240 |
+
111 coat coat 10 40 7 jacket otherprop Objects n03057021 coat.n.01 clothes 38
|
241 |
+
305 water fountain water fountain 10 38 7 water fountain otherstructure Objects n03241335 drinking_fountain.n.01 misc 40
|
242 |
+
1125 smoke detector smoke detector 10 40 7 otherprop Objects misc 40
|
243 |
+
13 pillows pillow 9 18 7 pillow pillow Objects pillow 3938244 n03938244 pillow.n.01 cushion 8
|
244 |
+
1196 flip flops flip flops 9 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
|
245 |
+
1197 cloth cloth 9 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
246 |
+
1198 banner banner 9 40 7 otherprop Objects n02788021 banner.n.01 misc 40
|
247 |
+
1199 clothes hanger clothes hanger 9 40 7 otherprop Objects n03057920 coat_hanger.n.01 objects 39
|
248 |
+
1200 whiteboard eraser whiteboard eraser 9 40 7 otherprop Objects objects 39
|
249 |
+
378 iron iron 9 40 7 otherprop Objects n03584829 iron.n.04 objects 39
|
250 |
+
591 instrument case instrument case 9 40 7 case otherprop Objects objects 39
|
251 |
+
49 toilet paper rolls toilet paper 9 40 7 toilet paper otherprop Objects n15075141 toilet_tissue.n.01 objects 39
|
252 |
+
92 soap soap 9 40 7 soap otherprop Objects n04253437 soap.n.01 objects 39
|
253 |
+
1098 block block 9 40 7 otherprop Objects misc 40
|
254 |
+
291 wall hanging wall hanging 8 40 7 otherprop Objects n03491178 hanging.n.01 picture 6
|
255 |
+
1063 kitchen island kitchen island 8 38 7 kitchen island otherstructure Objects n03620600 kitchen_island.n.01 counter 26
|
256 |
+
107 pipes pipe 8 38 7 otherstructure Objects misc 40
|
257 |
+
1135 toothbrush toothbrush 8 40 7 toothbrush otherprop Objects n04453156 toothbrush.n.01 objects 39
|
258 |
+
189 shirt shirt 8 40 7 otherprop Objects n04197391 shirt.n.01 clothes 38
|
259 |
+
245 cutting board cutting board 8 40 7 cutting board otherprop Objects n03025513 chopping_board.n.01 objects 39
|
260 |
+
194 vase vase 8 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39
|
261 |
+
1201 shower control valve shower control valve 8 38 7 otherstructure Objects n04208936 shower.n.01 shower 23
|
262 |
+
386 exercise machine exercise machine 8 40 7 machine otherprop Objects gym_equipment 33
|
263 |
+
1202 compost bin compost bin 8 39 6 garbage bin otherfurniture Furniture trash_bin 2747177 n02747177 ashcan.n.01 objects 39
|
264 |
+
857 shorts shorts 8 40 7 shorts otherprop Objects clothes 38
|
265 |
+
452 tire tire 8 40 7 otherprop Objects n04440749 tire.n.01 objects 39
|
266 |
+
1203 teddy bear teddy bear 7 40 7 stuffed animal otherprop Objects n04399382 teddy.n.01 objects 39
|
267 |
+
346 bathrobe bathrobe 7 40 7 otherprop Objects n02807616 bathrobe.n.01 clothes 38
|
268 |
+
152 handrail handrail 7 38 7 railing otherstructure Objects n02788148 bannister.n.02 railing 30
|
269 |
+
83 faucet faucet 7 40 7 faucet otherprop Objects faucet 3325088 n03325088 faucet.n.01 misc 40
|
270 |
+
1204 pantry wall pantry wall 7 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
271 |
+
726 thermos thermos 7 40 7 flask otherprop Objects bottle bottle 2876657 n04422727 thermos.n.01 objects 39
|
272 |
+
61 rug rug 7 40 7 rug otherprop Objects n04118021 rug.n.01 floor 2
|
273 |
+
39 couch cushions cushion 7 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8
|
274 |
+
1117 tripod tripod 7 39 6 stand otherfurniture Furniture n04485082 tripod.n.01 objects 39
|
275 |
+
540 mailbox mailbox 7 29 7 box box Objects mailbox 3710193 n03710193 mailbox.n.01 misc 40
|
276 |
+
1205 tupperware tupperware 7 40 7 otherprop Objects objects 39
|
277 |
+
415 shoe rack shoe rack 7 40 7 shoe rack otherprop Objects shelving 31
|
278 |
+
31 towels towel 6 27 7 towel towel Objects n04459362 towel.n.01 towel 20
|
279 |
+
1206 beer bottles beer bottle 6 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
280 |
+
153 treadmill treadmill 6 39 6 treadmill otherfurniture Furniture n04477387 treadmill.n.01 gym_equipment 33
|
281 |
+
1207 salt salt 6 40 7 otherprop Objects objects 39
|
282 |
+
129 chest chest 6 39 6 chest otherfurniture Furniture dresser dresser chest_of_drawers 13
|
283 |
+
220 dispenser dispenser 6 40 7 otherprop Objects n03210683 dispenser.n.01 objects 39
|
284 |
+
1208 mirror doors mirror door 6 8 12 door door Wall door n03221720 door.n.01 door 4
|
285 |
+
231 remote remote 6 40 7 otherprop Objects remote_control 4074963 n04074963 remote_control.n.01 objects 39
|
286 |
+
1209 folded ladder folded ladder 6 39 6 ladder otherfurniture Furniture stairs n03632277 ladder.n.01 misc 40
|
287 |
+
39 cushion cushion 6 18 7 pillow pillow Objects n03151500 cushion.n.03 cushion 8
|
288 |
+
1210 carton carton 6 40 7 otherprop Objects objects 39
|
289 |
+
117 step step 6 38 7 otherstructure Objects n04314914 step.n.04 misc 40
|
290 |
+
822 drying rack drying rack 6 39 6 drying rack otherfurniture Furniture shelving 31
|
291 |
+
238 slippers slipper 6 40 7 shoe otherprop Objects n04241394 slipper.n.01 clothes 38
|
292 |
+
143 pool table pool table 6 39 6 pool table otherfurniture Furniture table table table 4379243 n03982430 pool_table.n.01 table 5
|
293 |
+
1211 soda stream soda stream 6 40 7 otherprop Objects objects 39
|
294 |
+
228 toilet brush toilet brush 6 40 7 toilet brush otherprop Objects objects 39
|
295 |
+
494 loft bed loft bed 6 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
|
296 |
+
226 cooking pot cooking pot 6 40 7 pot otherprop Objects objects 39
|
297 |
+
91 heater heater 6 39 6 heater otherfurniture Furniture n03508101 heater.n.01 misc 40
|
298 |
+
1072 messenger bag messenger bag 6 37 7 bag bag Objects objects 39
|
299 |
+
435 stapler stapler 6 40 7 stapler otherprop Objects n04303497 stapler.n.01 objects 39
|
300 |
+
1165 closet walls closet wall 5 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
301 |
+
345 scanner scanner 5 40 7 otherprop Objects appliances 37
|
302 |
+
893 elliptical machine elliptical machine 5 40 7 machine otherprop Objects gym_equipment 33
|
303 |
+
621 kettle kettle 5 40 7 pot otherprop Objects n03612814 kettle.n.01 objects 39
|
304 |
+
1212 metronome metronome 5 40 7 otherprop Objects n03757604 metronome.n.01 objects 39
|
305 |
+
297 dumbell dumbell 5 40 7 otherprop Objects objects 39
|
306 |
+
1213 music book music book 5 23 2 book books Books n02870526 book.n.11 objects 39
|
307 |
+
1214 rice cooker rice cooker 5 40 7 otherprop Objects objects 39
|
308 |
+
1215 dart board dart board 5 38 7 board otherstructure Objects n03162940 dartboard.n.01 objects 39
|
309 |
+
529 sewing machine sewing machine 5 40 7 sewing machine otherprop Objects n04179913 sewing_machine.n.01 objects 39
|
310 |
+
1216 grab bar grab bar 5 38 7 railing otherstructure Objects railing 30
|
311 |
+
1217 flowerpot flowerpot 5 40 7 vase otherprop Objects vase jar 3593526 n04522168 vase.n.01 objects 39
|
312 |
+
1218 painting painting 5 11 8 picture picture Picture n03931044 picture.n.01 picture 6
|
313 |
+
1219 railing railing 5 38 7 railing otherstructure Objects n04047401 railing.n.01 railing 30
|
314 |
+
1220 stair stair 5 38 7 stairs otherstructure Objects stairs n04314914 step.n.04 stairs 16
|
315 |
+
525 toolbox toolbox 5 39 6 chest otherfurniture Furniture n04452615 toolbox.n.01 objects 39
|
316 |
+
204 nerf gun nerf gun 5 40 7 otherprop Objects objects 39
|
317 |
+
693 binders binder 5 40 7 binder otherprop Objects objects 39
|
318 |
+
179 desk lamp desk lamp 5 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
319 |
+
1221 quadcopter quadcopter 5 40 7 otherprop Objects objects 39
|
320 |
+
1222 pitcher pitcher 5 40 7 pitcher otherprop Objects n03950228 pitcher.n.02 objects 39
|
321 |
+
1223 hanging hanging 5 40 7 otherprop Objects misc 40
|
322 |
+
1224 mail mail 5 40 7 otherprop Objects misc 40
|
323 |
+
1225 closet ceiling closet ceiling 5 22 3 ceiling ceiling Ceiling n02990373 ceiling.n.01 ceiling 17
|
324 |
+
1226 hoverboard hoverboard 5 40 7 otherprop Objects objects 39
|
325 |
+
1227 beanbag chair beanbag chair 5 39 6 bean bag otherfurniture Furniture n02816656 beanbag.n.01 chair 3
|
326 |
+
571 water heater water heater 5 40 7 water heater otherprop Objects n04560113 water_heater.n.01 misc 40
|
327 |
+
1228 spray bottle spray bottle 5 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
328 |
+
556 rope rope 5 40 7 rope otherprop Objects n04108268 rope.n.01 objects 39
|
329 |
+
280 plastic container plastic container 5 40 7 container otherprop Objects objects 39
|
330 |
+
1229 soap bottle soap bottle 5 40 7 soap otherprop Objects objects 39
|
331 |
+
1230 ikea bag ikea bag 4 37 7 bag bag Objects 2773838 n02773838 bag.n.06 objects 39
|
332 |
+
1231 sleeping bag sleeping bag 4 40 7 otherprop Objects n04235860 sleeping_bag.n.01 objects 39
|
333 |
+
1232 duffel bag duffel bag 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
334 |
+
746 frying pan frying pan 4 40 7 frying pan otherprop Objects n03400231 frying_pan.n.01 objects 39
|
335 |
+
1233 oven mitt oven mitt 4 40 7 otherprop Objects objects 39
|
336 |
+
1234 pot pot 4 40 7 pot otherprop Objects n04235860 sleeping_bag.n.01 objects 39
|
337 |
+
144 hand dryer hand dryer 4 40 7 otherprop Objects objects 39
|
338 |
+
282 dollhouse dollhouse 4 39 6 doll house otherfurniture Furniture n03219483 dollhouse.n.01 objects 39
|
339 |
+
167 shampoo bottle shampoo bottle 4 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
340 |
+
1235 hair brush hair brush 4 40 7 otherprop Objects n02908217 brush.n.02 objects 39
|
341 |
+
1236 tennis racket tennis racket 4 40 7 otherprop Objects n04409806 tennis_racket.n.01 objects 39
|
342 |
+
1237 display case display case 4 40 7 case otherprop Objects objects 39
|
343 |
+
234 ping pong table ping pong table 4 39 6 ping pong table otherfurniture Furniture table table table 4379243 n04379243 table.n.02 table 5
|
344 |
+
563 boiler boiler 4 40 7 otherprop Objects misc 40
|
345 |
+
1238 bag of coffee beans bag of coffee beans 4 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
346 |
+
1239 bananas banana 4 40 7 otherprop Objects n00021265 food.n.01 objects 39
|
347 |
+
1240 carseat carseat 4 40 7 otherprop Objects misc 40
|
348 |
+
366 helmet helmet 4 40 7 otherprop Objects helmet 3513137 n03513137 helmet.n.02 clothes 38
|
349 |
+
816 umbrella umbrella 4 40 7 umbrella otherprop Objects n04507155 umbrella.n.01 objects 39
|
350 |
+
1241 coffee box coffee box 4 40 7 otherprop Objects objects 39
|
351 |
+
719 envelope envelope 4 40 7 envelope otherprop Objects n03291819 envelope.n.01 objects 39
|
352 |
+
284 wet floor sign wet floor sign 4 40 7 sign otherprop Objects misc 40
|
353 |
+
1242 clothing rack clothing rack 4 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
354 |
+
247 controller controller 4 40 7 otherprop Objects n03096960 control.n.09 objects 39
|
355 |
+
1243 bath walls bathroom wall 4 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
356 |
+
1244 podium podium 4 39 6 otherfurniture Furniture n03159640 dais.n.01 furniture 36
|
357 |
+
1245 storage box storage box 4 29 7 box box Objects n02883344 box.n.01 objects 39
|
358 |
+
1246 dolly dolly 4 40 7 otherprop Objects misc 40
|
359 |
+
1247 shampoo shampoo 3 40 7 otherprop Objects n04183516 shampoo.n.01 objects 39
|
360 |
+
592 paper tray paper tray 3 40 7 paper tray otherprop Objects objects 39
|
361 |
+
385 cabinet door cabinet door 3 8 12 door door Wall door door 4
|
362 |
+
1248 changing station changing station 3 40 7 otherprop Objects misc 40
|
363 |
+
1249 poster printer poster printer 3 40 7 printer otherprop Objects printer 4004475 n04004475 printer.n.03 appliances 37
|
364 |
+
133 screen screen 3 40 7 otherprop Objects n03151077 curtain.n.01 curtain 12
|
365 |
+
301 soap bar soap bar 3 38 7 bar otherstructure Objects objects 39
|
366 |
+
1250 crutches crutches 3 40 7 otherprop Objects n03141823 crutch.n.01 objects 39
|
367 |
+
379 studio light studio light 3 38 7 light otherstructure Objects lighting 28
|
368 |
+
130 stack of cups cup 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
369 |
+
1251 toilet flush button toilet flush button 3 40 7 otherprop Objects objects 39
|
370 |
+
450 trunk trunk 3 40 7 otherprop Objects misc 40
|
371 |
+
1252 grocery bag grocery bag 3 37 7 bag bag Objects suitcase 2773838 n03461288 grocery_bag.n.01 objects 39
|
372 |
+
316 plastic bin plastic bin 3 40 7 bin otherprop Objects objects 39
|
373 |
+
1253 pizza box pizza box 3 29 7 box box Objects objects 39
|
374 |
+
385 cabinet doors cabinet door 3 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 door 4
|
375 |
+
1254 legs legs 3 31 7 person person Objects person n05217688 person.n.02 misc 40
|
376 |
+
461 car car 3 40 7 car otherprop Objects car car 2958343 n02958343 car.n.01 misc 40
|
377 |
+
1255 shaving cream shaving cream 3 40 7 otherprop Objects n04186051 shaving_cream.n.01 objects 39
|
378 |
+
1256 luggage stand luggage stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
379 |
+
599 shredder shredder 3 40 7 otherprop Objects n04210120 shredder.n.01 objects 39
|
380 |
+
281 statue statue 3 40 7 sculpture otherprop Objects n04306847 statue.n.01 misc 40
|
381 |
+
1257 urinal urinal 3 33 7 toilet toilet Objects toilet toilet n04515991 urinal.n.01 toilet 18
|
382 |
+
1258 hose hose 3 40 7 otherprop Objects n03539875 hose.n.03 misc 40
|
383 |
+
1259 bike pump bike pump 3 40 7 otherprop Objects objects 39
|
384 |
+
319 coatrack coatrack 3 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
|
385 |
+
1260 bear bear 3 40 7 otherprop Objects objects 39
|
386 |
+
28 wall lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
387 |
+
1261 humidifier humidifier 3 40 7 otherprop Objects objects 39
|
388 |
+
546 toothpaste toothpaste 3 40 7 toothpaste otherprop Objects objects 39
|
389 |
+
1262 mouthwash bottle mouthwash bottle 3 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
390 |
+
1263 poster cutter poster cutter 3 40 7 otherprop Objects objects 39
|
391 |
+
1264 golf bag golf bag 3 37 7 bag bag Objects suitcase 2773838 n03445617 golf_bag.n.01 objects 39
|
392 |
+
1265 food container food container 3 40 7 container otherprop Objects n03094503 container.n.01 objects 39
|
393 |
+
1266 camera camera 3 40 7 otherprop Objects objects 39
|
394 |
+
28 table lamp lamp 3 35 7 lamp lamp Objects lamp lamp 3636649 n04380533 table_lamp.n.01 lighting 28
|
395 |
+
1267 yoga mat yoga mat 3 20 5 floor mat floor mat Floor n03727837 mat.n.01 floor 2
|
396 |
+
1268 card card 3 40 7 otherprop Objects objects 39
|
397 |
+
1269 mug mug 3 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
398 |
+
188 shower doors shower door 3 38 7 otherstructure Objects n04208936 shower.n.01 door 4
|
399 |
+
689 cardboard cardboard 3 40 7 otherprop Objects objects 39
|
400 |
+
1270 rack stand rack stand 3 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
401 |
+
1271 boxes of paper boxes of paper 3 29 7 box box Objects n02883344 box.n.01 objects 39
|
402 |
+
1272 flag flag 3 40 7 otherprop Objects misc 40
|
403 |
+
354 futon futon 3 39 6 mattress otherfurniture Furniture n03408444 futon.n.01 sofa 10
|
404 |
+
339 magazine magazine 3 40 7 magazine otherprop Objects n06595351 magazine.n.01 objects 39
|
405 |
+
1009 exit sign exit sign 3 40 7 exit sign otherprop Objects misc 40
|
406 |
+
1273 rolled poster rolled poster 3 40 7 otherprop Objects objects 39
|
407 |
+
1274 wheel wheel 3 40 7 otherprop Objects objects 39
|
408 |
+
15 pictures picture 3 11 8 picture picture Picture n03931044 picture.n.01 picture 6
|
409 |
+
1275 blackboard eraser blackboard eraser 3 40 7 eraser otherprop Objects n03294833 eraser.n.01 objects 39
|
410 |
+
361 organizer organizer 3 40 7 otherprop Objects n03918737 personal_digital_assistant.n.01 objects 39
|
411 |
+
1276 doll doll 3 40 7 toy otherprop Objects n03219135 doll.n.01 objects 39
|
412 |
+
326 book rack book rack 3 39 6 bookrack otherfurniture Furniture objects 39
|
413 |
+
1277 laundry bag laundry bag 3 40 7 laundry basket otherprop Objects basket 2801938 n03050864 clothes_hamper.n.01 objects 39
|
414 |
+
1278 sponge sponge 3 40 7 otherprop Objects n01906749 sponge.n.04 objects 39
|
415 |
+
116 seating seat 3 39 6 furniture otherfurniture Furniture n04161981 seat.n.03 furniture 36
|
416 |
+
1184 folded chairs folded chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
417 |
+
1279 lotion bottle lotion bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
418 |
+
212 can can 2 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39
|
419 |
+
1280 lunch box lunch box 2 40 7 otherprop Objects objects 39
|
420 |
+
1281 food display food display 2 40 7 otherprop Objects misc 40
|
421 |
+
794 storage shelf storage shelf 2 40 7 otherprop Objects shelving 31
|
422 |
+
1282 sliding wood door sliding wood door 2 40 7 otherprop Objects door 4
|
423 |
+
955 pants pants 2 40 7 otherprop Objects n04489008 trouser.n.01 clothes 38
|
424 |
+
387 wood wood 2 40 7 otherprop Objects misc 40
|
425 |
+
69 boards board 2 38 7 board otherstructure Objects board_panel 35
|
426 |
+
65 bottles bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
427 |
+
523 washcloth washcloth 2 40 7 otherprop Objects n04554523 washcloth.n.01 towel 20
|
428 |
+
389 workbench workbench 2 39 6 bench otherfurniture Furniture bench table 4379243 n04600486 workbench.n.01 table 5
|
429 |
+
29 open kitchen cabinet kitchen cabinet 2 3 6 cabinet cabinet Furniture n02933112 cabinet.n.01 cabinet 7
|
430 |
+
1283 organizer shelf organizer shelf 2 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
431 |
+
146 frame frame 2 38 7 otherstructure Objects misc 40
|
432 |
+
130 cups cup 2 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
433 |
+
372 exercise ball exercise ball 2 40 7 ball otherprop Objects n04285146 sports_equipment.n.01 gym_equipment 33
|
434 |
+
289 easel easel 2 39 6 stand otherfurniture Furniture n03262809 easel.n.01 furniture 36
|
435 |
+
440 garbage bag garbage bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
436 |
+
321 roomba roomba 2 40 7 otherprop Objects objects 39
|
437 |
+
976 garage door garage door 2 38 7 garage door otherstructure Objects door door 4
|
438 |
+
1256 luggage rack luggage stand 2 39 6 stand otherfurniture Furniture n04038440 shelving 31
|
439 |
+
1284 bike lock bike lock 2 40 7 otherprop Objects objects 39
|
440 |
+
1285 briefcase briefcase 2 40 7 otherprop Objects n02900705 briefcase.n.01 objects 39
|
441 |
+
357 hand towel hand towel 2 27 7 towel towel Objects n03490006 hand_towel.n.01 towel 20
|
442 |
+
1286 bath products bath product 2 40 7 otherprop Objects objects 39
|
443 |
+
1287 star star 2 40 7 otherprop Objects n09444783 star.n.03 misc 40
|
444 |
+
365 map map 2 40 7 map otherprop Objects n03720163 map.n.01 misc 40
|
445 |
+
1288 coffee bean bag coffee bean bag 2 37 7 bag bag Objects suitcase 2773838 n02773838 bag.n.06 objects 39
|
446 |
+
81 headboard headboard 2 39 6 headboard otherfurniture Furniture n03502200 headboard.n.01 bed 11
|
447 |
+
1289 ipad ipad 2 40 7 otherprop Objects objects 39
|
448 |
+
1290 display rack display rack 2 39 6 stand otherfurniture Furniture n04038440 rack.n.05 shelving 31
|
449 |
+
948 traffic cone traffic cone 2 40 7 cone otherprop Objects cone objects 39
|
450 |
+
174 toiletry toiletry 2 40 7 otherprop Objects n04447443 toiletry.n.01 objects 39
|
451 |
+
1028 canopy canopy 2 40 7 otherprop Objects misc 40
|
452 |
+
1291 massage chair massage chair 2 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
453 |
+
1292 paper organizer paper organizer 2 40 7 otherprop Objects objects 39
|
454 |
+
1005 barricade barricade 2 40 7 otherprop Objects misc 40
|
455 |
+
235 platform platform 2 38 7 otherstructure Objects misc 40
|
456 |
+
1293 cap cap 2 40 7 hat otherprop Objects n03497657 hat.n.01 clothes 38
|
457 |
+
1294 dumbbell plates dumbbell plates 2 40 7 otherprop Objects objects 39
|
458 |
+
1295 elevator elevator 2 38 7 otherstructure Objects misc 40
|
459 |
+
1296 cooking pan cooking pan 2 40 7 pan otherprop Objects n03880531 pan.n.01 objects 39
|
460 |
+
1297 trash bag trash bag 2 37 7 bag bag Objects objects 39
|
461 |
+
1298 santa santa 2 40 7 otherprop Objects misc 40
|
462 |
+
1299 jewelry box jewelry box 2 29 7 box box Objects n02883344 box.n.01 objects 39
|
463 |
+
1300 boat boat 2 40 7 otherprop Objects misc 40
|
464 |
+
1301 sock sock 2 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38
|
465 |
+
1051 kinect kinect 2 40 7 kinect otherprop Objects objects 39
|
466 |
+
566 crib crib 2 39 6 crib otherfurniture Furniture furniture 36
|
467 |
+
1302 plastic storage bin plastic storage bin 2 40 7 container otherprop Objects n03094503 container.n.01 objects 39
|
468 |
+
1062 cooler cooler 2 24 6 refridgerator refridgerator Furniture n03102654 cooler.n.01 appliances 37
|
469 |
+
1303 kitchen apron kitchen apron 2 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
470 |
+
1304 dishwashing soap bottle dishwashing soap bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
471 |
+
1305 xbox controller xbox controller 2 40 7 otherprop Objects objects 39
|
472 |
+
1306 banana holder banana holder 2 40 7 otherprop Objects objects 39
|
473 |
+
298 ping pong paddle ping pong paddle 2 40 7 otherprop Objects table 5
|
474 |
+
1307 airplane airplane 2 40 7 otherprop Objects misc 40
|
475 |
+
1308 conditioner bottle conditioner bottle 2 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
476 |
+
1309 tea kettle tea kettle 2 40 7 tea kettle otherprop Objects n04397768 teakettle.n.01 objects 39
|
477 |
+
43 bedframe bedframe 2 39 6 otherfurniture Furniture n02822579 bedstead.n.01 bed 11
|
478 |
+
1310 wood beam wood beam 2 38 7 otherstructure Objects beam 29
|
479 |
+
593 toilet paper package toilet paper package 2 40 7 otherprop Objects objects 39
|
480 |
+
1311 wall mounted coat rack wall mounted coat rack 2 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
|
481 |
+
1312 film light film light 2 40 7 otherprop Objects lighting 28
|
482 |
+
749 ceiling lamp ceiling lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
483 |
+
623 chain chain 1 40 7 otherprop Objects chair 3
|
484 |
+
1313 sofa sofa 1 6 9 sofa sofa Sofa sofa sofa sofa 4256520 n04256520 sofa.n.01 sofa 10
|
485 |
+
99 closet wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
|
486 |
+
265 sweater sweater 1 40 7 otherprop Objects n04370048 sweater.n.01 clothes 38
|
487 |
+
1314 kitchen mixer kitchen mixer 1 40 7 otherprop Objects appliances 37
|
488 |
+
99 wardrobe wardrobe 1 39 6 wardrobe otherfurniture Furniture wardrobe n04550184 wardrobe.n.01 furniture 36
|
489 |
+
1315 water softener water softener 1 40 7 otherprop Objects misc 40
|
490 |
+
448 banister banister 1 38 7 banister otherstructure Objects n02788148 bannister.n.02 railing 30
|
491 |
+
257 trolley trolley 1 40 7 trolley otherprop Objects n04335435 streetcar.n.01 misc 40
|
492 |
+
1316 pantry shelf pantry shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
493 |
+
786 sofa bed sofa bed 1 4 1 bed bed Bed bed bed bed 2818832 n02818832 bed.n.01 bed 11
|
494 |
+
801 loofa loofa 1 40 7 otherprop Objects objects 39
|
495 |
+
972 shower faucet handle shower faucet handle 1 40 7 handle otherprop Objects shower 23
|
496 |
+
1317 toy piano toy piano 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39
|
497 |
+
1318 fish fish 1 40 7 otherprop Objects n02512053 fish.n.01 objects 39
|
498 |
+
75 file cabinets file cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n03337140 file.n.03 cabinet 7
|
499 |
+
657 cat litter box cat litter box 1 29 7 box box Objects objects 39
|
500 |
+
561 electric panel electric panel 1 40 7 otherprop Objects misc 40
|
501 |
+
93 suitcases suitcase 1 40 7 luggage otherprop Objects n02774630 baggage.n.01 objects 39
|
502 |
+
513 curtain rod curtain rod 1 38 7 curtain rod otherstructure Objects curtain 12
|
503 |
+
411 bunk bed bunk bed 1 39 6 bunk bed otherfurniture Furniture bed bed bed 2818832 n02920259 bunk_bed.n.01 bed 11
|
504 |
+
1122 chandelier chandelier 1 38 7 chandelier otherstructure Objects n03005285 chandelier.n.01 lighting 28
|
505 |
+
922 tape tape 1 40 7 tape otherprop Objects objects 39
|
506 |
+
88 plates plate 1 40 7 otherprop Objects n03959485 plate.n.04 objects 39
|
507 |
+
518 alarm alarm 1 40 7 alarm otherprop Objects clock 3046257 n02694662 alarm_clock.n.01 objects 39
|
508 |
+
814 fire hose fire hose 1 40 7 otherprop Objects n03346004 fire_hose.n.01 misc 40
|
509 |
+
1319 toy dinosaur toy dinosaur 1 40 7 toy otherprop Objects n03964744 plaything.n.01 objects 39
|
510 |
+
1320 cone cone 1 40 7 otherprop Objects objects 39
|
511 |
+
649 glass doors glass door 1 8 12 door door Wall door n03221720 door.n.01 door 4
|
512 |
+
607 hatrack hatrack 1 40 7 otherprop Objects n03059103 coatrack.n.01 shelving 31
|
513 |
+
819 subwoofer subwoofer 1 40 7 speaker otherprop Objects speaker 3691459 n04349401 subwoofer.n.01 objects 39
|
514 |
+
1321 fire sprinkler fire sprinkler 1 40 7 otherprop Objects misc 40
|
515 |
+
1322 trash cabinet trash cabinet 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
516 |
+
1204 pantry walls pantry wall 1 1 12 wall wall Wall n04546855 wall.n.01 wall 1
|
517 |
+
227 photo photo 1 40 7 photo otherprop Objects n03925226 photograph.n.01 picture 6
|
518 |
+
817 barrier barrier 1 40 7 otherprop Objects n02796623 barrier.n.01 misc 40
|
519 |
+
130 stacks of cups cup 1 40 7 otherprop Objects n03147509 cup.n.01 objects 39
|
520 |
+
712 beachball beachball 1 40 7 ball otherprop Objects n02814224 beach_ball.n.01 objects 39
|
521 |
+
1323 folded boxes folded boxes 1 40 7 otherprop Objects objects 39
|
522 |
+
1324 contact lens solution bottle contact lens solution bottle 1 40 7 bottle otherprop Objects bottle bottle 2876657 n02876657 bottle.n.01 objects 39
|
523 |
+
673 covered box covered box 1 29 7 box box Objects objects 39
|
524 |
+
459 folder folder 1 40 7 folder otherprop Objects n03376279 folder.n.02 objects 39
|
525 |
+
643 mail trays mail tray 1 40 7 mail tray otherprop Objects objects 39
|
526 |
+
238 slipper slipper 1 40 7 otherprop Objects n04241394 slipper.n.01 clothes 38
|
527 |
+
765 magazine rack magazine rack 1 39 6 stand otherfurniture Furniture n03704549 magazine_rack.n.01 shelving 31
|
528 |
+
1008 sticker sticker 1 40 7 sticker otherprop Objects n07272545 gummed_label.n.01 objects 39
|
529 |
+
225 lotion lotion 1 40 7 otherprop Objects n03690938 lotion.n.01 objects 39
|
530 |
+
1083 buddha buddha 1 40 7 otherprop Objects objects 39
|
531 |
+
813 file organizer file organizer 1 40 7 otherprop Objects objects 39
|
532 |
+
138 paper towel rolls paper towel roll 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
|
533 |
+
1145 night lamp night lamp 1 35 7 lamp lamp Objects lamp lamp 3636649 n03636649 lamp.n.02 lighting 28
|
534 |
+
796 fuse box fuse box 1 40 7 otherprop Objects misc 40
|
535 |
+
1325 knife block knife block 1 40 7 otherprop Objects objects 39
|
536 |
+
363 furnace furnace 1 39 6 furnace otherfurniture Furniture n03404449 furnace.n.01
|
537 |
+
1174 cd cases cd case 1 40 7 otherprop Objects objects 39
|
538 |
+
38 stools stool 1 40 7 stool otherprop Objects stool n04326896 stool.n.01 stool 19
|
539 |
+
1326 hand sanitzer dispenser hand sanitzer dispenser 1 40 7 otherprop Objects n04254120 soap_dispenser.n.01 objects 39
|
540 |
+
997 teapot teapot 1 40 7 tea pot otherprop Objects n04398044 teapot.n.01 objects 39
|
541 |
+
1327 pen holder pen holder 1 40 7 otherprop Objects objects 39
|
542 |
+
1328 tray rack tray rack 1 40 7 otherprop Objects objects 39
|
543 |
+
1329 wig wig 1 40 7 otherprop Objects n04584207 wig.n.01 objects 39
|
544 |
+
182 switch switch 1 40 7 otherprop Objects n04372370 switch.n.01 misc 40
|
545 |
+
280 plastic containers plastic container 1 40 7 container otherprop Objects n03094503 container.n.01 objects 39
|
546 |
+
1330 night light night light 1 40 7 otherprop Objects lighting 28
|
547 |
+
1331 notepad notepad 1 40 7 otherprop Objects objects 39
|
548 |
+
1332 mail bin mail bin 1 40 7 otherprop Objects misc 40
|
549 |
+
1333 elevator button elevator button 1 40 7 otherprop Objects misc 40
|
550 |
+
939 gaming wheel gaming wheel 1 40 7 otherprop Objects objects 39
|
551 |
+
1334 drum set drum set 1 40 7 otherprop Objects objects 39
|
552 |
+
480 cosmetic bag cosmetic bag 1 37 7 bag bag Objects objects 39
|
553 |
+
907 coffee mug coffee mug 1 40 7 vessel otherprop Objects cup or mug 3797390 n03063599 coffee_mug.n.01 objects 39
|
554 |
+
1335 closet shelf closet shelf 1 15 6 shelves shelves Furniture bookshelf bookshelf 2871439 n02871439 bookshelf.n.01 shelving 31
|
555 |
+
1336 baby mobile baby mobile 1 40 7 otherprop Objects objects 39
|
556 |
+
829 diaper bin diaper bin 1 40 7 bin otherprop Objects objects 39
|
557 |
+
947 door wall door wall 1 1 12 wall wall Wall wall 1
|
558 |
+
1116 stepstool stepstool 1 40 7 step stool otherprop Objects objects 39
|
559 |
+
599 paper shredder shredder 1 40 7 otherprop Objects n04210120 shredder.n.01 objects 39
|
560 |
+
733 dress rack dress rack 1 40 7 otherprop Objects n03238762 dress_rack.n.01 misc 40
|
561 |
+
123 cover cover 1 40 7 blanket otherprop Objects objects 39
|
562 |
+
506 shopping bag shopping bag 1 37 7 bag bag Objects n04204081 shopping_bag.n.01 objects 39
|
563 |
+
569 sliding door sliding door 1 8 12 door door Wall door n04239074 sliding_door.n.01 door 4
|
564 |
+
1337 exercise bike exercise bike 1 40 7 machine otherprop Objects n04210120 shredder.n.01 gym_equipment 33
|
565 |
+
1338 recliner chair recliner chair 1 5 4 chair chair Chair chair chair chair 3001627 n03238762 dress_rack.n.01 chair 3
|
566 |
+
1314 kitchenaid mixer kitchen mixer 1 40 7 otherprop Objects appliances 37
|
567 |
+
1339 soda can soda can 1 40 7 can otherprop Objects can 2946921 n02946921 can.n.01 objects 39
|
568 |
+
1340 stovetop stovetop 1 38 7 stove otherstructure Objects stove 4330267 n04330267 stove.n.02 appliances 37
|
569 |
+
851 stepladder stepladder 1 39 6 ladder otherfurniture Furniture stairs n04315599 step_ladder.n.01 stairs 16
|
570 |
+
142 tap tap 1 40 7 faucet otherprop Objects faucet 3325088 n04559451 water_faucet.n.01 objects 39
|
571 |
+
436 cable cable 1 40 7 cables otherprop Objects objects 39
|
572 |
+
1341 baby changing station baby changing station 1 39 6 otherfurniture Furniture furniture 36
|
573 |
+
1342 costume costume 1 21 7 clothes clothes Objects n02728440 apparel.n.01 clothes 38
|
574 |
+
885 rocking chair rocking chair 1 5 4 chair chair Chair chair chair chair 3001627 n04099969 rocking_chair.n.01 chair 3
|
575 |
+
693 binder binder 1 40 7 binder otherprop Objects objects 39
|
576 |
+
815 media center media center 1 3 6 cabinet cabinet Furniture cabinet 2933112 n02933112 cabinet.n.01 cabinet 7
|
577 |
+
401 towel rack towel rack 1 40 7 otherprop Objects n04459773 towel_rack.n.01 misc 40
|
578 |
+
1343 medal medal 1 40 7 otherprop Objects objects 39
|
579 |
+
1184 stack of folded chairs folded chair 1 5 4 chair chair Chair chair chair chair 3001627 n03001627 chair.n.01 chair 3
|
580 |
+
1344 telescope telescope 1 40 7 otherprop Objects n04403638 telescope.n.01 objects 39
|
581 |
+
1345 closet doorframe closet doorframe 1 8 12 door door Wall door door 4
|
582 |
+
160 glass glass 1 38 7 glass otherstructure Objects n03438257 glass.n.02 misc 40
|
583 |
+
1126 baseball cap baseball cap 1 40 7 otherprop Objects cap 2954340 n02799323 baseball_cap.n.01 clothes 38
|
584 |
+
1346 battery disposal jar battery disposal jar 1 40 7 jar otherprop Objects jar 3593526 n03593526 jar.n.01 objects 39
|
585 |
+
332 mop mop 1 40 7 otherprop Objects n04367480 swab.n.02 objects 39
|
586 |
+
397 tank tank 1 40 7 otherprop Objects objects 39
|
587 |
+
643 mail tray mail tray 1 40 7 mail tray otherprop Objects objects 39
|
588 |
+
551 centerpiece centerpiece 1 40 7 centerpiece otherprop Objects n02994419 centerpiece.n.02 objects 39
|
589 |
+
1163 stick object 1 40 7 stick otherprop Objects objects 39
|
590 |
+
1347 closet floor closet floor 1 2 5 floor floor Floor n03365592 floor.n.01 floor 2
|
591 |
+
1348 dryer sheets dryer sheets 1 40 7 otherprop Objects objects 39
|
592 |
+
803 bycicle bycicle 1 40 7 otherprop Objects misc 40
|
593 |
+
484 flower stand flower stand 1 39 6 stand otherfurniture Furniture furniture 36
|
594 |
+
1349 air mattress air mattress 1 4 1 bed bed Bed bed bed bed 2818832 n02690809 air_mattress.n.01 bed 11
|
595 |
+
1350 clip clip 1 40 7 otherprop Objects objects 39
|
596 |
+
222 side table side table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
597 |
+
1253 pizza boxes pizza box 1 29 7 box box Objects n02883344 box.n.01 objects 39
|
598 |
+
1351 display display 1 39 7 otherfurniture Furniture n03211117 display.n.06 misc 40
|
599 |
+
1352 postcard postcard 1 40 7 otherprop Objects objects 39
|
600 |
+
828 display sign display sign 1 40 7 sign otherprop Objects misc 40
|
601 |
+
1353 paper towel paper towel 1 40 7 paper towel otherprop Objects n03887697 paper_towel.n.01 towel 20
|
602 |
+
612 boots boot 1 40 7 shoe otherprop Objects n04199027 shoe.n.01 clothes 38
|
603 |
+
1354 tennis racket bag tennis racket bag 1 40 7 otherprop Objects objects 39
|
604 |
+
1355 air hockey table air hockey table 1 7 10 table table Table table table table 4379243 n04379243 table.n.02 table 5
|
605 |
+
1301 socks sock 1 21 7 clothes clothes Objects n04254777 sock.n.01 clothes 38
|
606 |
+
1356 food bag food bag 1 37 7 bag bag Objects objects 39
|
607 |
+
1199 clothes hangers clothes hanger 1 40 7 otherprop Objects n03057920 coat_hanger.n.01 misc 40
|
608 |
+
1357 starbucks cup starbucks cup 1 40 7 cup otherprop Objects cup cup or mug 3797390 n03797390 mug.n.04 objects 39
|
src/datasets/megadepth.py
ADDED
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright (C) 2024-present Naver Corporation. All rights reserved.
|
2 |
+
# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
|
3 |
+
#
|
4 |
+
# --------------------------------------------------------
|
5 |
+
# Dataloader for preprocessed MegaDepth
|
6 |
+
# dataset at https://www.cs.cornell.edu/projects/megadepth/
|
7 |
+
# See datasets_preprocess/preprocess_megadepth.py
|
8 |
+
# --------------------------------------------------------
|
9 |
+
import os.path as osp
|
10 |
+
import numpy as np
|
11 |
+
import sys
|
12 |
+
sys.path.append("submodules/mast3r/dust3r")
|
13 |
+
from dust3r.datasets.base.base_stereo_view_dataset import BaseStereoViewDataset
|
14 |
+
from dust3r.utils.image import imread_cv2
|
15 |
+
|
16 |
+
|
17 |
+
class MegaDepth(BaseStereoViewDataset):
|
18 |
+
def __init__(self, *args, split, ROOT, **kwargs):
|
19 |
+
self.ROOT = ROOT
|
20 |
+
super().__init__(*args, **kwargs)
|
21 |
+
self.num_views = 3 # render third view
|
22 |
+
self.loaded_data = self._load_data(self.split)
|
23 |
+
|
24 |
+
if self.split is None:
|
25 |
+
pass
|
26 |
+
elif self.split == 'train':
|
27 |
+
self.select_scene(('0015', '0022'), opposite=True)
|
28 |
+
elif self.split == 'val':
|
29 |
+
self.select_scene(('0015', '0022'))
|
30 |
+
else:
|
31 |
+
raise ValueError(f'bad {self.split=}')
|
32 |
+
|
33 |
+
def _load_data(self, split):
|
34 |
+
with np.load(osp.join(self.ROOT, 'all_metadata.npz')) as data:
|
35 |
+
self.all_scenes = data['scenes']
|
36 |
+
self.all_images = data['images']
|
37 |
+
self.pairs = data['pairs']
|
38 |
+
|
39 |
+
def __len__(self):
|
40 |
+
return len(self.pairs)
|
41 |
+
|
42 |
+
def get_stats(self):
|
43 |
+
return f'{len(self)} pairs from {len(self.all_scenes)} scenes'
|
44 |
+
|
45 |
+
def select_scene(self, scene, *instances, opposite=False):
|
46 |
+
scenes = (scene,) if isinstance(scene, str) else tuple(scene)
|
47 |
+
scene_id = [s.startswith(scenes) for s in self.all_scenes]
|
48 |
+
assert any(scene_id), 'no scene found'
|
49 |
+
|
50 |
+
valid = np.in1d(self.pairs['scene_id'], np.nonzero(scene_id)[0])
|
51 |
+
if instances:
|
52 |
+
image_id = [i.startswith(instances) for i in self.all_images]
|
53 |
+
image_id = np.nonzero(image_id)[0]
|
54 |
+
assert len(image_id), 'no instance found'
|
55 |
+
# both together?
|
56 |
+
if len(instances) == 2:
|
57 |
+
valid &= np.in1d(self.pairs['im1_id'], image_id) & np.in1d(self.pairs['im2_id'], image_id)
|
58 |
+
else:
|
59 |
+
valid &= np.in1d(self.pairs['im1_id'], image_id) | np.in1d(self.pairs['im2_id'], image_id)
|
60 |
+
|
61 |
+
if opposite:
|
62 |
+
valid = ~valid
|
63 |
+
assert valid.any()
|
64 |
+
self.pairs = self.pairs[valid]
|
65 |
+
|
66 |
+
def _get_views(self, pair_idx, resolution, rng):
|
67 |
+
scene_id, im1_id, im2_id, score = self.pairs[pair_idx]
|
68 |
+
im3_id = int((im1_id + im2_id) / 2)
|
69 |
+
scene, subscene = self.all_scenes[scene_id].split()
|
70 |
+
seq_path = osp.join(self.ROOT, scene, subscene)
|
71 |
+
|
72 |
+
views = []
|
73 |
+
|
74 |
+
for im_id in [im1_id, im2_id, im2_id]:
|
75 |
+
img = self.all_images[im_id]
|
76 |
+
try:
|
77 |
+
image = imread_cv2(osp.join(seq_path, img + '.jpg'))
|
78 |
+
depthmap = imread_cv2(osp.join(seq_path, img + ".exr"))
|
79 |
+
camera_params = np.load(osp.join(seq_path, img + ".npz"))
|
80 |
+
except Exception as e:
|
81 |
+
raise OSError(f'cannot load {img}, got exception {e}')
|
82 |
+
|
83 |
+
intrinsics = np.float32(camera_params['intrinsics'])
|
84 |
+
camera_pose = np.float32(camera_params['cam2world'])
|
85 |
+
|
86 |
+
image, depthmap, intrinsics = self._crop_resize_if_necessary(
|
87 |
+
image, depthmap, intrinsics, resolution, rng, info=(seq_path, img))
|
88 |
+
|
89 |
+
views.append(dict(
|
90 |
+
img=image,
|
91 |
+
depthmap=depthmap,
|
92 |
+
camera_pose=camera_pose, # cam2world
|
93 |
+
camera_intrinsics=intrinsics,
|
94 |
+
dataset='MegaDepth',
|
95 |
+
label=osp.relpath(seq_path, self.ROOT),
|
96 |
+
instance=img))
|
97 |
+
|
98 |
+
return views
|
99 |
+
|
100 |
+
|
101 |
+
if __name__ == "__main__":
|
102 |
+
from dust3r.datasets.base.base_stereo_view_dataset import view_name
|
103 |
+
from dust3r.viz import SceneViz, auto_cam_size
|
104 |
+
from dust3r.utils.image import rgb
|
105 |
+
|
106 |
+
dataset = MegaDepth(split='train', ROOT="data/megadepth_processed", resolution=224, aug_crop=16)
|
107 |
+
|
108 |
+
for idx in np.random.permutation(len(dataset)):
|
109 |
+
views = dataset[idx]
|
110 |
+
assert len(views) == 3
|
111 |
+
print(idx, view_name(views[0]), view_name(views[1]), view_name(views[2]))
|
112 |
+
viz = SceneViz()
|
113 |
+
poses = [views[view_idx]['camera_pose'] for view_idx in [0, 1, 2]]
|
114 |
+
cam_size = max(auto_cam_size(poses), 0.001)
|
115 |
+
for view_idx in [0, 1, 2]:
|
116 |
+
pts3d = views[view_idx]['pts3d']
|
117 |
+
valid_mask = views[view_idx]['valid_mask']
|
118 |
+
colors = rgb(views[view_idx]['img'])
|
119 |
+
viz.add_pointcloud(pts3d, colors, valid_mask)
|
120 |
+
viz.add_camera(pose_c2w=views[view_idx]['camera_pose'],
|
121 |
+
focal=views[view_idx]['camera_intrinsics'][0, 0],
|
122 |
+
color=(idx * 255, (1 - idx) * 255, 0),
|
123 |
+
image=colors,
|
124 |
+
cam_size=cam_size)
|
125 |
+
viz.show()
|
src/datasets/scannet.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import os.path as osp
|
3 |
+
import sys
|
4 |
+
sys.path.append("submodules/mast3r/dust3r")
|
5 |
+
from dust3r.datasets.base.base_stereo_view_dataset import BaseStereoViewDataset
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
from dust3r.utils.image import imread_cv2
|
9 |
+
|
10 |
+
class Scannet(BaseStereoViewDataset):
|
11 |
+
def __init__(self, *args, ROOT, **kwargs):
|
12 |
+
self.ROOT = ROOT
|
13 |
+
super().__init__(*args, **kwargs)
|
14 |
+
self.num_views = 3 # render third view
|
15 |
+
self._load_data()
|
16 |
+
|
17 |
+
def _load_data(self):
|
18 |
+
# Traverse all the folders in the data_root
|
19 |
+
scene_names = [folder for folder in os.listdir(self.ROOT) if os.path.isdir(os.path.join(self.ROOT, folder))]
|
20 |
+
# Filter out scenes without scene_data.npz
|
21 |
+
valid_scenes = []
|
22 |
+
for scene_name in scene_names:
|
23 |
+
scene_data_path = osp.join(self.ROOT, scene_name, "scene_data.npz")
|
24 |
+
if osp.exists(scene_data_path):
|
25 |
+
valid_scenes.append(scene_name)
|
26 |
+
else:
|
27 |
+
print(f"Skipping {scene_name}: scene_data.npz not found")
|
28 |
+
scene_names = valid_scenes
|
29 |
+
scene_names.sort()
|
30 |
+
if self.split == 'train':
|
31 |
+
scene_names = scene_names[:-150]
|
32 |
+
else:
|
33 |
+
scene_names = scene_names[-150:]
|
34 |
+
# merge all pairs and images
|
35 |
+
pairs = [] # (scene_name, image_idx1, image_idx2)
|
36 |
+
images = {} # (scene_name, image_idx) -> image_path
|
37 |
+
for scene_name in scene_names:
|
38 |
+
scene_path = osp.join(self.ROOT, scene_name, "scene_data.npz")
|
39 |
+
scene_data = np.load(scene_path)
|
40 |
+
pairs.extend([(scene_name, *pair) for pair in scene_data['pairs']])
|
41 |
+
images.update({(scene_name, idx): path for idx, path in enumerate(scene_data['images'])})
|
42 |
+
self.pairs = pairs
|
43 |
+
self.images = images
|
44 |
+
|
45 |
+
def __len__(self):
|
46 |
+
return len(self.pairs)
|
47 |
+
|
48 |
+
def _get_views(self, idx, resolution, rng):
|
49 |
+
scene_name, image_idx1, image_idx2, _ = self.pairs[idx]
|
50 |
+
image_idx1 = int(image_idx1)
|
51 |
+
image_idx2 = int(image_idx2)
|
52 |
+
image_idx3 = int((image_idx1 + image_idx2) / 2)
|
53 |
+
views = []
|
54 |
+
for view_idx in [image_idx1, image_idx2, image_idx3]:
|
55 |
+
basename = self.images[(scene_name, view_idx)]
|
56 |
+
# Load RGB image
|
57 |
+
rgb_path = osp.join(self.ROOT, scene_name, 'images', f'{basename}.jpg')
|
58 |
+
rgb_image = imread_cv2(rgb_path)
|
59 |
+
# Load depthmap
|
60 |
+
depthmap_path = osp.join(self.ROOT, scene_name, 'depths', f'{basename}.png')
|
61 |
+
depthmap = imread_cv2(depthmap_path, cv2.IMREAD_UNCHANGED)
|
62 |
+
depthmap = depthmap.astype(np.float32) / 1000
|
63 |
+
depthmap[~np.isfinite(depthmap)] = 0 # invalid
|
64 |
+
# Load camera parameters
|
65 |
+
meta_path = osp.join(self.ROOT, scene_name, 'images', f'{basename}.npz')
|
66 |
+
meta = np.load(meta_path)
|
67 |
+
intrinsics = meta['camera_intrinsics']
|
68 |
+
camera_pose = meta['camera_pose']
|
69 |
+
# crop if necessary
|
70 |
+
rgb_image, depthmap, intrinsics = self._crop_resize_if_necessary(
|
71 |
+
rgb_image, depthmap, intrinsics, resolution, rng=rng, info=view_idx)
|
72 |
+
views.append(dict(
|
73 |
+
img=rgb_image,
|
74 |
+
depthmap=depthmap.astype(np.float32),
|
75 |
+
camera_pose=camera_pose.astype(np.float32),
|
76 |
+
camera_intrinsics=intrinsics.astype(np.float32),
|
77 |
+
dataset='ScanNet',
|
78 |
+
label=scene_name + '_' + basename,
|
79 |
+
instance=f'{str(idx)}_{str(view_idx)}',
|
80 |
+
))
|
81 |
+
return views
|
82 |
+
|
83 |
+
if __name__ == "__main__":
|
84 |
+
from dust3r.datasets.base.base_stereo_view_dataset import view_name
|
85 |
+
from dust3r.viz import SceneViz, auto_cam_size
|
86 |
+
from dust3r.utils.image import rgb
|
87 |
+
|
88 |
+
dataset = Scannet(split='train', ROOT="data/scannet_processed", resolution=224, aug_crop=16)
|
89 |
+
|
90 |
+
print(len(dataset))
|
91 |
+
|
92 |
+
for idx in np.random.permutation(len(dataset)):
|
93 |
+
views = dataset[idx]
|
94 |
+
assert len(views) == 3
|
95 |
+
print(view_name(views[0]), view_name(views[1]), view_name(views[2]))
|
96 |
+
viz = SceneViz()
|
97 |
+
poses = [views[view_idx]['camera_pose'] for view_idx in [0, 1, 2]]
|
98 |
+
cam_size = max(auto_cam_size(poses), 0.001)
|
99 |
+
for view_idx in [0, 1, 2]:
|
100 |
+
pts3d = views[view_idx]['pts3d']
|
101 |
+
valid_mask = views[view_idx]['valid_mask']
|
102 |
+
colors = rgb(views[view_idx]['img'])
|
103 |
+
viz.add_pointcloud(pts3d, colors, valid_mask)
|
104 |
+
viz.add_camera(pose_c2w=views[view_idx]['camera_pose'],
|
105 |
+
focal=views[view_idx]['camera_intrinsics'][0, 0],
|
106 |
+
color=(idx*255, (1 - idx)*255, 0),
|
107 |
+
image=colors,
|
108 |
+
cam_size=cam_size)
|
109 |
+
viz.show()
|
src/datasets/scannetpp.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import os.path as osp
|
3 |
+
import sys
|
4 |
+
sys.path.append("submodules/mast3r/dust3r")
|
5 |
+
from dust3r.datasets.base.base_stereo_view_dataset import BaseStereoViewDataset
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
from dust3r.utils.image import imread_cv2
|
9 |
+
|
10 |
+
class Scannetpp(BaseStereoViewDataset):
|
11 |
+
def __init__(self, *args, ROOT, **kwargs):
|
12 |
+
self.ROOT = ROOT
|
13 |
+
super().__init__(*args, **kwargs)
|
14 |
+
assert self.split == 'train' # just for training
|
15 |
+
self.num_views = 3 # render third view
|
16 |
+
self._load_data()
|
17 |
+
|
18 |
+
def _load_data(self):
|
19 |
+
# Traverse all the folders in the data_root
|
20 |
+
scene_names = [folder for folder in os.listdir(self.ROOT) if os.path.isdir(os.path.join(self.ROOT, folder))]
|
21 |
+
# Filter out scenes without scene_data.npz
|
22 |
+
valid_scenes = []
|
23 |
+
for scene_name in scene_names:
|
24 |
+
scene_data_path = osp.join(self.ROOT, scene_name, "scene_data.npz")
|
25 |
+
if osp.exists(scene_data_path):
|
26 |
+
valid_scenes.append(scene_name)
|
27 |
+
else:
|
28 |
+
print(f"Skipping {scene_name}: scene_data.npz not found")
|
29 |
+
scene_names = valid_scenes
|
30 |
+
scene_names.sort()
|
31 |
+
|
32 |
+
# merge all pairs and images
|
33 |
+
pairs = [] # (scene_name, image_idx1, image_idx2)
|
34 |
+
images = {} # (scene_name, image_idx) -> image_path
|
35 |
+
for scene_name in scene_names:
|
36 |
+
scene_path = osp.join(self.ROOT, scene_name, "scene_data.npz")
|
37 |
+
scene_data = np.load(scene_path)
|
38 |
+
pairs.extend([(scene_name, *pair) for pair in scene_data['pairs']])
|
39 |
+
images.update({(scene_name, idx): path for idx, path in enumerate(scene_data['images'])})
|
40 |
+
self.pairs = pairs
|
41 |
+
self.images = images
|
42 |
+
|
43 |
+
def __len__(self):
|
44 |
+
return len(self.pairs)
|
45 |
+
|
46 |
+
def _get_views(self, idx, resolution, rng):
|
47 |
+
scene_name, image_idx1, image_idx2, _ = self.pairs[idx]
|
48 |
+
image_idx1 = int(image_idx1)
|
49 |
+
image_idx2 = int(image_idx2)
|
50 |
+
image_idx3 = int((image_idx1 + image_idx2) / 2)
|
51 |
+
views = []
|
52 |
+
for view_idx in [image_idx1, image_idx2, image_idx3]:
|
53 |
+
basename = self.images[(scene_name, view_idx)]
|
54 |
+
# Load RGB image
|
55 |
+
rgb_path = osp.join(self.ROOT, scene_name, 'images', f'{basename}.JPG')
|
56 |
+
rgb_image = imread_cv2(rgb_path)
|
57 |
+
# Load depthmap
|
58 |
+
depthmap_path = osp.join(self.ROOT, scene_name, 'depths', f'{basename}.png')
|
59 |
+
depthmap = imread_cv2(depthmap_path, cv2.IMREAD_UNCHANGED)
|
60 |
+
depthmap = depthmap.astype(np.float32) / 1000
|
61 |
+
depthmap[~np.isfinite(depthmap)] = 0 # invalid
|
62 |
+
# Load camera parameters
|
63 |
+
meta_path = osp.join(self.ROOT, scene_name, 'images', f'{basename}.npz')
|
64 |
+
meta = np.load(meta_path)
|
65 |
+
intrinsics = meta['camera_intrinsics']
|
66 |
+
camera_pose = meta['camera_pose']
|
67 |
+
# crop if necessary
|
68 |
+
rgb_image, depthmap, intrinsics = self._crop_resize_if_necessary(
|
69 |
+
rgb_image, depthmap, intrinsics, resolution, rng=rng, info=view_idx)
|
70 |
+
views.append(dict(
|
71 |
+
img=rgb_image,
|
72 |
+
depthmap=depthmap.astype(np.float32),
|
73 |
+
camera_pose=camera_pose.astype(np.float32),
|
74 |
+
camera_intrinsics=intrinsics.astype(np.float32),
|
75 |
+
dataset='ScanNet++',
|
76 |
+
label=scene_name + '_' + basename,
|
77 |
+
instance=f'{str(idx)}_{str(view_idx)}',
|
78 |
+
))
|
79 |
+
return views
|
80 |
+
|
81 |
+
if __name__ == "__main__":
|
82 |
+
from dust3r.datasets.base.base_stereo_view_dataset import view_name
|
83 |
+
from dust3r.viz import SceneViz, auto_cam_size
|
84 |
+
from dust3r.utils.image import rgb
|
85 |
+
|
86 |
+
dataset = Scannetpp(split='train', ROOT="data/scannetpp_processed", resolution=224, aug_crop=16)
|
87 |
+
|
88 |
+
print(len(dataset))
|
89 |
+
|
90 |
+
for idx in np.random.permutation(len(dataset)):
|
91 |
+
views = dataset[idx]
|
92 |
+
assert len(views) == 3
|
93 |
+
print(view_name(views[0]), view_name(views[1]), view_name(views[2]))
|
94 |
+
viz = SceneViz()
|
95 |
+
poses = [views[view_idx]['camera_pose'] for view_idx in [0, 1, 2]]
|
96 |
+
cam_size = max(auto_cam_size(poses), 0.001)
|
97 |
+
for view_idx in [0, 1, 2]:
|
98 |
+
pts3d = views[view_idx]['pts3d']
|
99 |
+
valid_mask = views[view_idx]['valid_mask']
|
100 |
+
colors = rgb(views[view_idx]['img'])
|
101 |
+
viz.add_pointcloud(pts3d, colors, valid_mask)
|
102 |
+
viz.add_camera(pose_c2w=views[view_idx]['camera_pose'],
|
103 |
+
focal=views[view_idx]['camera_intrinsics'][0, 0],
|
104 |
+
color=(idx*255, (1 - idx)*255, 0),
|
105 |
+
image=colors,
|
106 |
+
cam_size=cam_size)
|
107 |
+
viz.show()
|
src/datasets_preprocess/scannet_preprocess.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import cv2
|
4 |
+
import torch
|
5 |
+
import torch.multiprocessing as mp
|
6 |
+
|
7 |
+
def process_scene_on_gpu(gpu_id, scene_names, data_root, output_queue):
|
8 |
+
torch.cuda.set_device(gpu_id)
|
9 |
+
local_pairs = {}
|
10 |
+
local_images = {}
|
11 |
+
|
12 |
+
for scene_name in scene_names:
|
13 |
+
save_path = os.path.join(data_root, scene_name, "scene_data.npz")
|
14 |
+
if os.path.exists(save_path):
|
15 |
+
print(f"Scene {scene_name} already processed, skipping")
|
16 |
+
continue
|
17 |
+
pairs, images = process_scene(data_root, scene_name)
|
18 |
+
np.savez_compressed(save_path, pairs=pairs, images=images)
|
19 |
+
|
20 |
+
output_queue.put((local_pairs, local_images))
|
21 |
+
|
22 |
+
def preprocess_scannet(data_root, threads_per_gpu=4):
|
23 |
+
scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))]
|
24 |
+
num_gpus = torch.cuda.device_count()
|
25 |
+
total_threads = num_gpus * threads_per_gpu
|
26 |
+
|
27 |
+
# 将场景平均分配给所有线程
|
28 |
+
scenes_per_thread = [scene_names[i::total_threads] for i in range(total_threads)]
|
29 |
+
|
30 |
+
output_queue = mp.Queue()
|
31 |
+
processes = []
|
32 |
+
|
33 |
+
# 为每个GPU创建多个进程
|
34 |
+
for gpu_id in range(num_gpus):
|
35 |
+
for thread_id in range(threads_per_gpu):
|
36 |
+
process_id = gpu_id * threads_per_gpu + thread_id
|
37 |
+
p = mp.Process(
|
38 |
+
target=process_scene_on_gpu,
|
39 |
+
args=(gpu_id, scenes_per_thread[process_id], data_root, output_queue)
|
40 |
+
)
|
41 |
+
p.start()
|
42 |
+
processes.append(p)
|
43 |
+
|
44 |
+
# 收集所有进程的结果
|
45 |
+
all_pairs = {}
|
46 |
+
all_images = {}
|
47 |
+
for _ in range(total_threads):
|
48 |
+
local_pairs, local_images = output_queue.get()
|
49 |
+
all_pairs.update(local_pairs)
|
50 |
+
all_images.update(local_images)
|
51 |
+
|
52 |
+
# Wait for all processes to complete
|
53 |
+
for p in processes:
|
54 |
+
p.join()
|
55 |
+
|
56 |
+
# Save to npz file
|
57 |
+
np.savez_compressed(os.path.join(data_root, "scannet_image_pairs.npz"), **all_pairs)
|
58 |
+
np.savez_compressed(os.path.join(data_root, "scannet_images.npz"), **all_images)
|
59 |
+
|
60 |
+
# print the number of image pairs
|
61 |
+
# sum up the number of image pairs for all scenes
|
62 |
+
total_pairs = sum(len(pairs) for pairs in all_pairs.values())
|
63 |
+
print(f"Total number of image pairs: {total_pairs}")
|
64 |
+
return all_pairs, all_images
|
65 |
+
|
66 |
+
def process_scene(data_root, scene_name):
|
67 |
+
pairs = []
|
68 |
+
images_dir = os.path.join(data_root, scene_name, "images")
|
69 |
+
images = [os.path.splitext(file)[0] for file in os.listdir(images_dir) if file.endswith(".jpg")]
|
70 |
+
images.sort()
|
71 |
+
|
72 |
+
# Check validity of c2w for each image
|
73 |
+
valid_images = []
|
74 |
+
for image in images:
|
75 |
+
_, c2w, _ = load_image(data_root, scene_name, image)
|
76 |
+
if is_valid_c2w(c2w):
|
77 |
+
valid_images.append(image)
|
78 |
+
else:
|
79 |
+
print(f"Invalid c2w for image {image} in scene {scene_name}")
|
80 |
+
|
81 |
+
# generate image pairs
|
82 |
+
slide_window = 50
|
83 |
+
num_sub_intervals = 5
|
84 |
+
|
85 |
+
pairs = generate_image_pairs(data_root, scene_name, valid_images, slide_window, num_sub_intervals)
|
86 |
+
print(f"Scene {scene_name} has {len(pairs)} image pairs and {len(valid_images)} valid images out of {len(images)} total images")
|
87 |
+
return pairs, valid_images
|
88 |
+
|
89 |
+
def is_valid_c2w(c2w):
|
90 |
+
return not np.any(np.isinf(c2w)) and not np.any(np.isnan(c2w))
|
91 |
+
|
92 |
+
def generate_image_pairs(data_root, scene_name, images, slide_window, num_sub_intervals=3):
|
93 |
+
pairs = []
|
94 |
+
n = len(images)
|
95 |
+
|
96 |
+
# Define IOU sub-intervals
|
97 |
+
iou_range = (0.3, 0.8)
|
98 |
+
sub_interval_size = (iou_range[1] - iou_range[0]) / num_sub_intervals
|
99 |
+
sub_intervals = [(iou_range[0] + i * sub_interval_size, iou_range[0] + (i + 1) * sub_interval_size)
|
100 |
+
for i in range(num_sub_intervals)]
|
101 |
+
|
102 |
+
for i in range(n):
|
103 |
+
# Keep track of whether a pair has been added for each sub-interval
|
104 |
+
interval_selected = [False] * num_sub_intervals
|
105 |
+
|
106 |
+
for j in range(i+1, min(i + slide_window, n)):
|
107 |
+
# Break early if all sub-intervals have been selected
|
108 |
+
if all(interval_selected):
|
109 |
+
break
|
110 |
+
|
111 |
+
# Load image pair
|
112 |
+
depth1, c2w1, K1 = load_image(data_root, scene_name, images[i])
|
113 |
+
depth2, c2w2, K2 = load_image(data_root, scene_name, images[j])
|
114 |
+
|
115 |
+
# Calculate mean IoU
|
116 |
+
try:
|
117 |
+
iou_1 = calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2)
|
118 |
+
iou_2 = calculate_iou(depth2, c2w2, K2, depth1, c2w1, K1)
|
119 |
+
except Exception as e:
|
120 |
+
print(f"Error calculating IoU for images {images[i]} and {images[j]} in scene {scene_name}: {str(e)}")
|
121 |
+
continue
|
122 |
+
|
123 |
+
mean_iou = (iou_1 + iou_2) / 2
|
124 |
+
|
125 |
+
# Check which sub-interval the mean IoU falls into
|
126 |
+
for idx, (lower, upper) in enumerate(sub_intervals):
|
127 |
+
if lower <= mean_iou <= upper and not interval_selected[idx]:
|
128 |
+
pairs.append((i, j, mean_iou))
|
129 |
+
interval_selected[idx] = True # Mark this interval as selected
|
130 |
+
break # Move to the next pair after adding one in the current sub-interval
|
131 |
+
|
132 |
+
return pairs
|
133 |
+
|
134 |
+
|
135 |
+
def load_image(data_root, scene_name, image_id):
|
136 |
+
# load depthmap
|
137 |
+
depth_path = f"{data_root}/{scene_name}/depths/{image_id}.png"
|
138 |
+
depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 1000.0
|
139 |
+
# load camera parameters
|
140 |
+
meta_path = f"{data_root}/{scene_name}/images/{image_id}.npz"
|
141 |
+
meta = np.load(meta_path)
|
142 |
+
c2w = meta['camera_pose']
|
143 |
+
K = meta['camera_intrinsics']
|
144 |
+
return depth, c2w, K
|
145 |
+
|
146 |
+
# Unproject depthmap to point cloud and project to another camera
|
147 |
+
def calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2):
|
148 |
+
# Move data to GPU and ensure float32 dtype
|
149 |
+
depth1 = torch.from_numpy(depth1).cuda().float()
|
150 |
+
depth2 = torch.from_numpy(depth2).cuda().float()
|
151 |
+
c2w1 = torch.from_numpy(c2w1).cuda().float()
|
152 |
+
c2w2 = torch.from_numpy(c2w2).cuda().float()
|
153 |
+
K1 = torch.from_numpy(K1).cuda().float()
|
154 |
+
K2 = torch.from_numpy(K2).cuda().float()
|
155 |
+
|
156 |
+
# Get image dimensions
|
157 |
+
h, w = depth1.shape
|
158 |
+
|
159 |
+
# Create pixel coordinates
|
160 |
+
y, x = torch.meshgrid(torch.arange(h, device='cuda', dtype=torch.float32),
|
161 |
+
torch.arange(w, device='cuda', dtype=torch.float32))
|
162 |
+
pixels = torch.stack((x.flatten(), y.flatten(), torch.ones_like(x.flatten())), dim=-1).T
|
163 |
+
|
164 |
+
# Unproject pixels to 3D points
|
165 |
+
pixels_3d = torch.linalg.inv(K1) @ pixels
|
166 |
+
pixels_3d *= depth1.flatten().unsqueeze(0)
|
167 |
+
|
168 |
+
# Transform 3D points to world coordinates
|
169 |
+
pixels_world = c2w1[:3, :3] @ pixels_3d + c2w1[:3, 3:4]
|
170 |
+
|
171 |
+
# Check if c2w2[:3, :3] is invertible
|
172 |
+
if torch.det(c2w2[:3, :3]) == 0:
|
173 |
+
return 0, False # Calculation failed
|
174 |
+
|
175 |
+
# Project world points to second camera
|
176 |
+
pixels_cam2 = torch.linalg.inv(c2w2[:3, :3]) @ (pixels_world - c2w2[:3, 3:4])
|
177 |
+
pixels_img2 = K2 @ pixels_cam2
|
178 |
+
|
179 |
+
# Normalize homogeneous coordinates
|
180 |
+
pixels_img2 = pixels_img2[:2] / pixels_img2[2]
|
181 |
+
pixels_img2 = pixels_img2.T
|
182 |
+
|
183 |
+
# Filter valid pixels
|
184 |
+
valid_mask = (pixels_img2[:, 0] >= 0) & (pixels_img2[:, 0] < w) & \
|
185 |
+
(pixels_img2[:, 1] >= 0) & (pixels_img2[:, 1] < h)
|
186 |
+
|
187 |
+
pixels_img2 = pixels_img2[valid_mask].long()
|
188 |
+
|
189 |
+
# Compare depths
|
190 |
+
projected_depth = pixels_cam2[2, valid_mask]
|
191 |
+
actual_depth = depth2[pixels_img2[:, 1], pixels_img2[:, 0]]
|
192 |
+
|
193 |
+
depth_diff = torch.abs(projected_depth - actual_depth)
|
194 |
+
depth_threshold = 0.1 # 10cm threshold
|
195 |
+
|
196 |
+
overlap_mask = depth_diff < depth_threshold
|
197 |
+
|
198 |
+
# Calculate IoU
|
199 |
+
intersection = torch.sum(overlap_mask)
|
200 |
+
union = torch.sum(valid_mask) + torch.sum(depth2 > 0) - intersection
|
201 |
+
|
202 |
+
iou = intersection.float() / union.float() if union > 0 else torch.tensor(0.0, device='cuda')
|
203 |
+
|
204 |
+
return iou.item()
|
205 |
+
|
206 |
+
if __name__ == "__main__":
|
207 |
+
data_root = "data/scannet_processed"
|
208 |
+
# 可以通过参数指定每个GPU的线程数
|
209 |
+
preprocess_scannet(data_root, threads_per_gpu=12)
|
src/datasets_preprocess/scannetpp_preprocess.py
ADDED
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import cv2
|
4 |
+
import torch
|
5 |
+
import torch.multiprocessing as mp
|
6 |
+
import shutil
|
7 |
+
|
8 |
+
def process_scene_on_gpu(gpu_id, scene_names, data_root, target_root, output_queue):
|
9 |
+
torch.cuda.set_device(gpu_id)
|
10 |
+
local_pairs = {}
|
11 |
+
local_images = {}
|
12 |
+
|
13 |
+
for scene_name in scene_names:
|
14 |
+
save_path = os.path.join(target_root, scene_name, "scene_data.npz")
|
15 |
+
if os.path.exists(save_path):
|
16 |
+
print(f"Scene {scene_name} already processed, skipping")
|
17 |
+
continue
|
18 |
+
pairs, images = process_scene(data_root, target_root, scene_name)
|
19 |
+
np.savez_compressed(save_path, pairs=pairs, images=images)
|
20 |
+
|
21 |
+
output_queue.put((local_pairs, local_images))
|
22 |
+
|
23 |
+
def preprocess_scannetpp(data_root, target_root):
|
24 |
+
# Traverse all the folders in the data_root
|
25 |
+
scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))]
|
26 |
+
|
27 |
+
# Get the number of available GPUs
|
28 |
+
num_gpus = torch.cuda.device_count()
|
29 |
+
|
30 |
+
# Distribute scenes across GPUs
|
31 |
+
scenes_per_gpu = [scene_names[i::num_gpus] for i in range(num_gpus)]
|
32 |
+
|
33 |
+
# Create a multiprocessing queue to collect results
|
34 |
+
output_queue = mp.Queue()
|
35 |
+
|
36 |
+
# Launch parallel processes
|
37 |
+
processes = []
|
38 |
+
for gpu_id in range(num_gpus):
|
39 |
+
p = mp.Process(target=process_scene_on_gpu, args=(gpu_id, scenes_per_gpu[gpu_id], data_root, target_root, output_queue))
|
40 |
+
p.start()
|
41 |
+
processes.append(p)
|
42 |
+
|
43 |
+
# Collect results from all processes
|
44 |
+
all_pairs = {}
|
45 |
+
all_images = {}
|
46 |
+
for _ in range(num_gpus):
|
47 |
+
local_pairs, local_images = output_queue.get()
|
48 |
+
all_pairs.update(local_pairs)
|
49 |
+
all_images.update(local_images)
|
50 |
+
|
51 |
+
# Wait for all processes to complete
|
52 |
+
for p in processes:
|
53 |
+
p.join()
|
54 |
+
|
55 |
+
# Save to npz file
|
56 |
+
np.savez_compressed(os.path.join(data_root, "scannet_image_pairs.npz"), **all_pairs)
|
57 |
+
np.savez_compressed(os.path.join(data_root, "scannet_images.npz"), **all_images)
|
58 |
+
|
59 |
+
# print the number of image pairs
|
60 |
+
# sum up the number of image pairs for all scenes
|
61 |
+
total_pairs = sum(len(pairs) for pairs in all_pairs.values())
|
62 |
+
print(f"Total number of image pairs: {total_pairs}")
|
63 |
+
return all_pairs, all_images
|
64 |
+
|
65 |
+
# def preprocess_scannetpp(data_root, target_root):
|
66 |
+
# # Traverse all the folders in the data_root
|
67 |
+
# scene_names = [folder for folder in os.listdir(data_root) if os.path.isdir(os.path.join(data_root, folder))]
|
68 |
+
|
69 |
+
# for scene_name in scene_names:
|
70 |
+
# save_path = os.path.join(target_root, scene_name, "scene_data.npz")
|
71 |
+
# if os.path.exists(save_path):
|
72 |
+
# print(f"Scene {scene_name} already processed, skipping")
|
73 |
+
# continue
|
74 |
+
# pairs, images = process_scene(data_root, target_root, scene_name)
|
75 |
+
# np.savez_compressed(save_path, pairs=pairs, images=images)
|
76 |
+
|
77 |
+
def process_scene(data_root, target_root, scene_name):
|
78 |
+
pairs = []
|
79 |
+
images_dir = os.path.join(data_root, scene_name, "images")
|
80 |
+
images = [os.path.splitext(file)[0] for file in os.listdir(images_dir) if file.endswith(".JPG")]
|
81 |
+
images.sort()
|
82 |
+
# copy images, depths, and camera parameters to target_root
|
83 |
+
os.makedirs(os.path.join(target_root, scene_name, "images"), exist_ok=True)
|
84 |
+
os.makedirs(os.path.join(target_root, scene_name, "depths"), exist_ok=True)
|
85 |
+
for image in images:
|
86 |
+
shutil.copy(os.path.join(data_root, scene_name, "images", f"{image}.JPG"), os.path.join(target_root, scene_name, "images", f"{image}.JPG"))
|
87 |
+
shutil.copy(os.path.join(data_root, scene_name, "depths", f"{image}.png"), os.path.join(target_root, scene_name, "depths", f"{image}.png"))
|
88 |
+
shutil.copy(os.path.join(data_root, scene_name, "images", f"{image}.npz"), os.path.join(target_root, scene_name, "images", f"{image}.npz"))
|
89 |
+
|
90 |
+
# Check validity of c2w for each image
|
91 |
+
valid_images = []
|
92 |
+
for image in images:
|
93 |
+
_, c2w, _ = load_image(data_root, scene_name, image)
|
94 |
+
if is_valid_c2w(c2w):
|
95 |
+
valid_images.append(image)
|
96 |
+
else:
|
97 |
+
print(f"Invalid c2w for image {image} in scene {scene_name}")
|
98 |
+
|
99 |
+
# generate image pairs
|
100 |
+
slide_window = 100
|
101 |
+
num_sub_intervals = 5
|
102 |
+
|
103 |
+
pairs = generate_image_pairs(data_root, scene_name, valid_images, slide_window, num_sub_intervals)
|
104 |
+
print(f"Scene {scene_name} has {len(pairs)} image pairs and {len(valid_images)} valid images out of {len(images)} total images")
|
105 |
+
return pairs, valid_images
|
106 |
+
|
107 |
+
def is_valid_c2w(c2w):
|
108 |
+
return not np.any(np.isinf(c2w)) and not np.any(np.isnan(c2w))
|
109 |
+
|
110 |
+
def generate_image_pairs(data_root, scene_name, images, slide_window, num_sub_intervals=3):
|
111 |
+
pairs = []
|
112 |
+
n = len(images)
|
113 |
+
|
114 |
+
# Define IOU sub-intervals
|
115 |
+
iou_range = (0.3, 0.8)
|
116 |
+
sub_interval_size = (iou_range[1] - iou_range[0]) / num_sub_intervals
|
117 |
+
sub_intervals = [(iou_range[0] + i * sub_interval_size, iou_range[0] + (i + 1) * sub_interval_size)
|
118 |
+
for i in range(num_sub_intervals)]
|
119 |
+
|
120 |
+
for i in range(n):
|
121 |
+
# Keep track of whether a pair has been added for each sub-interval
|
122 |
+
interval_selected = [False] * num_sub_intervals
|
123 |
+
|
124 |
+
for j in range(i+1, min(i + slide_window, n)):
|
125 |
+
# Break early if all sub-intervals have been selected
|
126 |
+
if all(interval_selected):
|
127 |
+
break
|
128 |
+
|
129 |
+
# Load image pair
|
130 |
+
depth1, c2w1, K1 = load_image(data_root, scene_name, images[i])
|
131 |
+
depth2, c2w2, K2 = load_image(data_root, scene_name, images[j])
|
132 |
+
|
133 |
+
# Calculate mean IoU
|
134 |
+
try:
|
135 |
+
iou_1 = calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2)
|
136 |
+
iou_2 = calculate_iou(depth2, c2w2, K2, depth1, c2w1, K1)
|
137 |
+
except Exception as e:
|
138 |
+
print(f"Error calculating IoU for images {images[i]} and {images[j]} in scene {scene_name}: {str(e)}")
|
139 |
+
continue
|
140 |
+
|
141 |
+
mean_iou = (iou_1 + iou_2) / 2
|
142 |
+
|
143 |
+
# Check which sub-interval the mean IoU falls into
|
144 |
+
for idx, (lower, upper) in enumerate(sub_intervals):
|
145 |
+
if lower <= mean_iou <= upper and not interval_selected[idx]:
|
146 |
+
pairs.append((i, j, mean_iou))
|
147 |
+
interval_selected[idx] = True # Mark this interval as selected
|
148 |
+
break # Move to the next pair after adding one in the current sub-interval
|
149 |
+
|
150 |
+
return pairs
|
151 |
+
|
152 |
+
|
153 |
+
def load_image(data_root, scene_name, image_id):
|
154 |
+
# load depthmap
|
155 |
+
depth_path = f"{data_root}/{scene_name}/depths/{image_id}.png"
|
156 |
+
depth = cv2.imread(depth_path, cv2.IMREAD_UNCHANGED).astype(np.float32) / 1000.0
|
157 |
+
# load camera parameters
|
158 |
+
meta_path = f"{data_root}/{scene_name}/images/{image_id}.npz"
|
159 |
+
meta = np.load(meta_path)
|
160 |
+
c2w = meta['camera_pose']
|
161 |
+
K = meta['camera_intrinsics']
|
162 |
+
return depth, c2w, K
|
163 |
+
|
164 |
+
# Unproject depthmap to point cloud and project to another camera
|
165 |
+
def calculate_iou(depth1, c2w1, K1, depth2, c2w2, K2):
|
166 |
+
# Move data to GPU and ensure float32 dtype
|
167 |
+
depth1 = torch.from_numpy(depth1).cuda().float()
|
168 |
+
depth2 = torch.from_numpy(depth2).cuda().float()
|
169 |
+
c2w1 = torch.from_numpy(c2w1).cuda().float()
|
170 |
+
c2w2 = torch.from_numpy(c2w2).cuda().float()
|
171 |
+
K1 = torch.from_numpy(K1).cuda().float()
|
172 |
+
K2 = torch.from_numpy(K2).cuda().float()
|
173 |
+
|
174 |
+
# Get image dimensions
|
175 |
+
h, w = depth1.shape
|
176 |
+
|
177 |
+
# Create pixel coordinates
|
178 |
+
y, x = torch.meshgrid(torch.arange(h, device='cuda', dtype=torch.float32),
|
179 |
+
torch.arange(w, device='cuda', dtype=torch.float32))
|
180 |
+
pixels = torch.stack((x.flatten(), y.flatten(), torch.ones_like(x.flatten())), dim=-1).T
|
181 |
+
|
182 |
+
# Unproject pixels to 3D points
|
183 |
+
pixels_3d = torch.linalg.inv(K1) @ pixels
|
184 |
+
pixels_3d *= depth1.flatten().unsqueeze(0)
|
185 |
+
|
186 |
+
# Transform 3D points to world coordinates
|
187 |
+
pixels_world = c2w1[:3, :3] @ pixels_3d + c2w1[:3, 3:4]
|
188 |
+
|
189 |
+
# Check if c2w2[:3, :3] is invertible
|
190 |
+
if torch.det(c2w2[:3, :3]) == 0:
|
191 |
+
return 0, False # Calculation failed
|
192 |
+
|
193 |
+
# Project world points to second camera
|
194 |
+
pixels_cam2 = torch.linalg.inv(c2w2[:3, :3]) @ (pixels_world - c2w2[:3, 3:4])
|
195 |
+
pixels_img2 = K2 @ pixels_cam2
|
196 |
+
|
197 |
+
# Normalize homogeneous coordinates
|
198 |
+
pixels_img2 = pixels_img2[:2] / pixels_img2[2]
|
199 |
+
pixels_img2 = pixels_img2.T
|
200 |
+
|
201 |
+
# Filter valid pixels
|
202 |
+
valid_mask = (pixels_img2[:, 0] >= 0) & (pixels_img2[:, 0] < w) & \
|
203 |
+
(pixels_img2[:, 1] >= 0) & (pixels_img2[:, 1] < h)
|
204 |
+
|
205 |
+
pixels_img2 = pixels_img2[valid_mask].long()
|
206 |
+
|
207 |
+
# Compare depths
|
208 |
+
projected_depth = pixels_cam2[2, valid_mask]
|
209 |
+
actual_depth = depth2[pixels_img2[:, 1], pixels_img2[:, 0]]
|
210 |
+
|
211 |
+
depth_diff = torch.abs(projected_depth - actual_depth)
|
212 |
+
depth_threshold = 0.1 # 10cm threshold
|
213 |
+
|
214 |
+
overlap_mask = depth_diff < depth_threshold
|
215 |
+
|
216 |
+
# Calculate IoU
|
217 |
+
intersection = torch.sum(overlap_mask)
|
218 |
+
union = torch.sum(valid_mask) + torch.sum(depth2 > 0) - intersection
|
219 |
+
|
220 |
+
iou = intersection.float() / union.float() if union > 0 else torch.tensor(0.0, device='cuda')
|
221 |
+
|
222 |
+
return iou.item()
|
223 |
+
|
224 |
+
if __name__ == "__main__":
|
225 |
+
data_root = "data/scannetpp_processed"
|
226 |
+
target_root = "data/scannetpp_target"
|
227 |
+
preprocess_scannetpp(data_root, target_root)
|
src/gaussian_head.py
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from einops import rearrange
|
4 |
+
from src.utils.gaussian_model import build_covariance
|
5 |
+
from simple_knn._C import distCUDA2
|
6 |
+
from src.utils.sh_utils import RGB2SH
|
7 |
+
|
8 |
+
class GaussianHead(nn.Module):
|
9 |
+
def __init__(self, d_pt_feat=64, **kwargs):
|
10 |
+
super().__init__()
|
11 |
+
# args
|
12 |
+
self.args = kwargs
|
13 |
+
self.d_means = 3
|
14 |
+
self.d_scales = 3
|
15 |
+
self.d_rotations = 4
|
16 |
+
self.d_opacities = 1
|
17 |
+
self.sh_degree = 3
|
18 |
+
self.d_view_dep_features = 3 # RGB
|
19 |
+
self.d_sh = (self.sh_degree + 1) ** 2
|
20 |
+
self.d_attr = (self.d_scales + self.d_rotations + self.d_opacities + self.d_view_dep_features * self.d_sh)
|
21 |
+
if self.args.get('d_gs_feats'):
|
22 |
+
self.d_attr += self.args['d_gs_feats']
|
23 |
+
|
24 |
+
# Create a mask for the spherical harmonics coefficients.
|
25 |
+
# This ensures that at initialization, the coefficients are biased
|
26 |
+
# towards having a large DC component and small view-dependent components.
|
27 |
+
self.register_buffer(
|
28 |
+
"sh_mask",
|
29 |
+
torch.ones((self.d_sh,), dtype=torch.float32),
|
30 |
+
persistent=False,
|
31 |
+
)
|
32 |
+
for degree in range(1, self.sh_degree + 1):
|
33 |
+
self.sh_mask[degree**2 : (degree + 1) ** 2] = 0.5 * 0.25**degree
|
34 |
+
|
35 |
+
self.gaussian_proj = nn.Linear(d_pt_feat, self.d_attr)
|
36 |
+
|
37 |
+
# Activation functions
|
38 |
+
self.scale_activation = torch.exp
|
39 |
+
self.rotation_activation = torch.nn.functional.normalize
|
40 |
+
self.opacity_activation = torch.sigmoid
|
41 |
+
|
42 |
+
def forward(self, point_transformer_output, lseg_features=None):
|
43 |
+
pred1 = {}
|
44 |
+
pred2 = {}
|
45 |
+
|
46 |
+
scene_scale = point_transformer_output['scale'] # B, 1, 1
|
47 |
+
scene_center = point_transformer_output['center'] # B, 1, 3
|
48 |
+
B, H, W, _ = point_transformer_output['shape']
|
49 |
+
normalized_means = point_transformer_output['coord'] # B * V * H * W, 3
|
50 |
+
colors = point_transformer_output['color'] # B * V * H * W, 3
|
51 |
+
|
52 |
+
# split normalized_means to 2 views
|
53 |
+
normalized_means = rearrange(normalized_means, '(b v h w) c -> v b (h w) c', v=2, b=B, h=H, w=W)
|
54 |
+
means = normalized_means * scene_scale + scene_center # V, B, H * W, 3
|
55 |
+
means = rearrange(means, 'v b (h w) c -> b (v h w) c', b=B, v=2, h=H, w=W)
|
56 |
+
|
57 |
+
# get features
|
58 |
+
feat = point_transformer_output['feat']
|
59 |
+
gaussian_attr = self.gaussian_proj(feat)
|
60 |
+
|
61 |
+
# # split gaussian attributes
|
62 |
+
# scales, rotations, opacities, sh_coeffs = torch.split(gaussian_attr,
|
63 |
+
# [
|
64 |
+
# self.d_scales,
|
65 |
+
# self.d_rotations,
|
66 |
+
# self.d_opacities,
|
67 |
+
# self.d_view_dep_features * self.d_sh
|
68 |
+
# ],
|
69 |
+
# dim=-1)
|
70 |
+
|
71 |
+
scales, rotations, opacities, sh_coeffs, gs_feats = torch.split(gaussian_attr,
|
72 |
+
[
|
73 |
+
self.d_scales,
|
74 |
+
self.d_rotations,
|
75 |
+
self.d_opacities,
|
76 |
+
self.d_view_dep_features * self.d_sh,
|
77 |
+
self.args['d_gs_feats']
|
78 |
+
],
|
79 |
+
dim=-1)
|
80 |
+
|
81 |
+
# scales
|
82 |
+
# calculate the distance between each point and its nearest neighbor
|
83 |
+
all_dist = torch.stack([torch.sqrt(torch.clamp_min(distCUDA2(pts3d), 0.0000001)) for pts3d in means]) # B, V * H * W
|
84 |
+
median_dist = all_dist.median(dim=-1)[0][:, None, None] # B, 1, 1
|
85 |
+
scales = self.scale_activation(scales)
|
86 |
+
scales = rearrange(scales, '(b v h w) c -> b (v h w) c', b=B, v=2, h=H, w=W)
|
87 |
+
scales = scales * all_dist[..., None]
|
88 |
+
# clip scales
|
89 |
+
scales = torch.clamp(scales, min=0.1 * median_dist, max=3.0 * median_dist)
|
90 |
+
scales = rearrange(scales, 'b (v h w) c -> (b v h w) c', b=B, v=2, h=H, w=W)
|
91 |
+
|
92 |
+
# activation
|
93 |
+
rotations = self.rotation_activation(rotations)
|
94 |
+
opacities = self.opacity_activation(opacities)
|
95 |
+
|
96 |
+
# build covariance matrix
|
97 |
+
covs = build_covariance(scales, rotations)
|
98 |
+
|
99 |
+
# sh_mask
|
100 |
+
sh_coeffs = rearrange(sh_coeffs, '(b v h w) (c d) -> (b v h w) c d', b=B, v=2, h=H, w=W, c=self.d_sh, d=self.d_view_dep_features)
|
101 |
+
sh_dc = sh_coeffs[..., 0, :]
|
102 |
+
sh_rest = sh_coeffs[..., 1:, :]
|
103 |
+
if self.args.get('rgb_residual'):
|
104 |
+
# denormalize colors
|
105 |
+
colors = colors * 0.5 + 0.5
|
106 |
+
sh_rgb = RGB2SH(colors) # (B * V * H * W, 3)
|
107 |
+
# add rgb residual to dc component
|
108 |
+
sh_dc = sh_dc + sh_rgb
|
109 |
+
# concatenate dc and rest
|
110 |
+
sh_coeffs = torch.cat([sh_dc[..., None, :], sh_rest], dim=-2)
|
111 |
+
sh_coeffs = sh_coeffs * self.sh_mask[None, :, None]
|
112 |
+
|
113 |
+
# lseg_features(learning residual)
|
114 |
+
lseg_features = rearrange(lseg_features, '(v b) c h w -> (b v h w) c', b=B, v=2, h=H, w=W)
|
115 |
+
gs_feats = gs_feats + lseg_features
|
116 |
+
|
117 |
+
# split to 2 views
|
118 |
+
scales = rearrange(scales, '(b v h w) ... -> v b h w ...', v=2, b=B, h=H, w=W)
|
119 |
+
rotations = rearrange(rotations, '(b v h w) ... -> v b h w ...', v=2, b=B, h=H, w=W)
|
120 |
+
opacities = rearrange(opacities, '(b v h w) ... -> v b h w ...', v=2, b=B, h=H, w=W)
|
121 |
+
sh_coeffs = rearrange(sh_coeffs, '(b v h w) ... -> v b h w ...', v=2, b=B, h=H, w=W)
|
122 |
+
covs = rearrange(covs, '(b v h w) ... -> v b h w ...', v=2, b=B, h=H, w=W)
|
123 |
+
means = rearrange(means, 'b (v h w) ... -> v b h w ...', v=2, b=B, h=H, w=W)
|
124 |
+
gs_feats = rearrange(gs_feats, '(b v h w) ... -> v b h w ...', v=2, b=B, h=H, w=W)
|
125 |
+
|
126 |
+
pred1['scales'] = scales[0]
|
127 |
+
pred1['rotations'] = rotations[0]
|
128 |
+
pred1['covs'] = covs[0]
|
129 |
+
pred1['opacities'] = opacities[0]
|
130 |
+
pred1['sh_coeffs'] = sh_coeffs[0]
|
131 |
+
pred1['means'] = means[0]
|
132 |
+
pred1['gs_feats'] = gs_feats[0]
|
133 |
+
|
134 |
+
pred2['scales'] = scales[1]
|
135 |
+
pred2['rotations'] = rotations[1]
|
136 |
+
pred2['covs'] = covs[1]
|
137 |
+
pred2['opacities'] = opacities[1]
|
138 |
+
pred2['sh_coeffs'] = sh_coeffs[1]
|
139 |
+
pred2['means'] = means[1]
|
140 |
+
pred2['gs_feats'] = gs_feats[1]
|
141 |
+
|
142 |
+
return pred1, pred2
|
src/infer.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import sys
|
3 |
+
|
4 |
+
sys.path.append('.')
|
5 |
+
from src.model import LSM_MASt3R
|
6 |
+
from src.utils.visualization_utils import render_video_from_file
|
7 |
+
|
8 |
+
if __name__ == '__main__':
|
9 |
+
parser = argparse.ArgumentParser()
|
10 |
+
parser.add_argument('--file_list', type=str, nargs='+', required=True,
|
11 |
+
help='List of input image files or directories')
|
12 |
+
parser.add_argument('--model_path', type=str, required=True)
|
13 |
+
parser.add_argument('--output_path', type=str, required=True)
|
14 |
+
parser.add_argument('--resolution', type=int, default=512)
|
15 |
+
parser.add_argument('--n_interp', type=int, default=90)
|
16 |
+
parser.add_argument('--fps', type=int, default=30)
|
17 |
+
|
18 |
+
args = parser.parse_args()
|
19 |
+
|
20 |
+
# 1. load model
|
21 |
+
model = LSM_MASt3R.from_pretrained(args.model_path)
|
22 |
+
# 2. render video
|
23 |
+
render_video_from_file(args.file_list, model, args.output_path, resolution=args.resolution, n_interp=args.n_interp, fps=args.fps)
|
src/losses.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from submodules.mast3r.dust3r.dust3r.losses import *
|
2 |
+
from torchmetrics import PeakSignalNoiseRatio, StructuralSimilarityIndexMeasure, JaccardIndex, Accuracy
|
3 |
+
import lpips
|
4 |
+
from src.utils.gaussian_model import GaussianModel
|
5 |
+
from src.utils.cuda_splatting import render, DummyPipeline
|
6 |
+
from einops import rearrange
|
7 |
+
from src.utils.camera_utils import get_scaled_camera
|
8 |
+
from torchvision.utils import save_image
|
9 |
+
from dust3r.inference import make_batch_symmetric
|
10 |
+
|
11 |
+
class L2Loss (LLoss):
|
12 |
+
""" Euclidean distance between 3d points """
|
13 |
+
|
14 |
+
def distance(self, a, b):
|
15 |
+
return torch.norm(a - b, dim=-1) # normalized L2 distance
|
16 |
+
|
17 |
+
class L1Loss (LLoss):
|
18 |
+
""" Manhattan distance between 3d points """
|
19 |
+
|
20 |
+
def distance(self, a, b):
|
21 |
+
return torch.abs(a - b).mean() # L1 distance
|
22 |
+
|
23 |
+
L2 = L2Loss()
|
24 |
+
L1 = L1Loss()
|
25 |
+
|
26 |
+
def merge_and_split_predictions(pred1, pred2):
|
27 |
+
merged = {}
|
28 |
+
for key in pred1.keys():
|
29 |
+
merged_pred = torch.stack([pred1[key], pred2[key]], dim=1)
|
30 |
+
merged_pred = rearrange(merged_pred, 'b v h w ... -> b (v h w) ...')
|
31 |
+
merged[key] = merged_pred
|
32 |
+
|
33 |
+
# Split along the batch dimension
|
34 |
+
batch_size = next(iter(merged.values())).shape[0]
|
35 |
+
split = [{key: value[i] for key, value in merged.items()} for i in range(batch_size)]
|
36 |
+
|
37 |
+
return split
|
38 |
+
|
39 |
+
class GaussianLoss(MultiLoss):
|
40 |
+
def __init__(self, ssim_weight=0.2):
|
41 |
+
super().__init__()
|
42 |
+
self.ssim_weight = ssim_weight
|
43 |
+
self.ssim = StructuralSimilarityIndexMeasure(data_range=1.0).cuda()
|
44 |
+
self.psnr = PeakSignalNoiseRatio(data_range=1.0).cuda()
|
45 |
+
self.lpips_vgg = lpips.LPIPS(net='vgg').cuda()
|
46 |
+
self.pipeline = DummyPipeline()
|
47 |
+
# bg_color
|
48 |
+
self.register_buffer('bg_color', torch.tensor([0.0, 0.0, 0.0]).cuda())
|
49 |
+
|
50 |
+
def get_name(self):
|
51 |
+
return f'GaussianLoss(ssim_weight={self.ssim_weight})'
|
52 |
+
|
53 |
+
# def compute_loss(self, gt1, gt2, target_view, pred1, pred2, model):
|
54 |
+
# # render images
|
55 |
+
# # 1. merge predictions
|
56 |
+
# pred = merge_and_split_predictions(pred1, pred2)
|
57 |
+
|
58 |
+
# # 2. calculate optimal scaling
|
59 |
+
# pred_pts1 = pred1['means']
|
60 |
+
# pred_pts2 = pred2['means']
|
61 |
+
# # convert to camera1 coordinates
|
62 |
+
# # everything is normalized w.r.t. camera of view1
|
63 |
+
# valid1 = gt1['valid_mask'].clone()
|
64 |
+
# valid2 = gt2['valid_mask'].clone()
|
65 |
+
# in_camera1 = inv(gt1['camera_pose'])
|
66 |
+
# gt_pts1 = geotrf(in_camera1, gt1['pts3d'].to(in_camera1.device)) # B,H,W,3
|
67 |
+
# gt_pts2 = geotrf(in_camera1, gt2['pts3d'].to(in_camera1.device)) # B,H,W,3
|
68 |
+
# scaling = find_opt_scaling(gt_pts1, gt_pts2, pred_pts1, pred_pts2, valid1=valid1, valid2=valid2)
|
69 |
+
|
70 |
+
# # 3. render images(need gaussian model, camera, pipeline)
|
71 |
+
# rendered_images = []
|
72 |
+
# rendered_feats = []
|
73 |
+
# for i in range(len(pred)):
|
74 |
+
# # get gaussian model
|
75 |
+
# gaussians = GaussianModel.from_predictions(pred[i], sh_degree=3)
|
76 |
+
# # get camera
|
77 |
+
# ref_camera_extrinsics = gt1['camera_pose'][i]
|
78 |
+
# target_extrinsics = target_view['camera_pose'][i]
|
79 |
+
# target_intrinsics = target_view['camera_intrinsics'][i]
|
80 |
+
# image_shape = target_view['true_shape'][i]
|
81 |
+
# scale = scaling[i]
|
82 |
+
# camera = get_scaled_camera(ref_camera_extrinsics, target_extrinsics, target_intrinsics, scale, image_shape)
|
83 |
+
# # render(image and features)
|
84 |
+
# rendered_output = render(camera, gaussians, self.pipeline, self.bg_color)
|
85 |
+
# rendered_images.append(rendered_output['render'])
|
86 |
+
# rendered_feats.append(rendered_output['feature_map'])
|
87 |
+
|
88 |
+
# rendered_images = torch.stack(rendered_images, dim=0) # B, 3, H, W
|
89 |
+
# rendered_feats = torch.stack(rendered_feats, dim=0) # B, d_feats, H, W
|
90 |
+
# rendered_feats = model.feature_expansion(rendered_feats) # B, 512, H//2, W//2
|
91 |
+
|
92 |
+
# gt_images = target_view['img'] * 0.5 + 0.5
|
93 |
+
# gt_feats = model.lseg_feature_extractor.extract_features(target_view['img']) # B, 512, H//2, W//2
|
94 |
+
# image_loss = torch.abs(rendered_images - gt_images).mean()
|
95 |
+
# feature_loss = torch.abs(rendered_feats - gt_feats).mean()
|
96 |
+
# loss = image_loss + 100 * feature_loss
|
97 |
+
|
98 |
+
# # # temp
|
99 |
+
# # gt_logits = model.lseg_feature_extractor.decode_feature(gt_feats, ['wall', 'floor', 'others'])
|
100 |
+
# # gt_labels = torch.argmax(gt_logits, dim=1, keepdim=True)
|
101 |
+
# # rendered_logits = model.lseg_feature_extractor.decode_feature(rendered_feats, ['wall', 'floor', 'others'])
|
102 |
+
# # rendered_labels = torch.argmax(rendered_logits, dim=1, keepdim=True)
|
103 |
+
|
104 |
+
# # calculate metric
|
105 |
+
# with torch.no_grad():
|
106 |
+
# ssim = self.ssim(rendered_images, gt_images)
|
107 |
+
# psnr = self.psnr(rendered_images, gt_images)
|
108 |
+
# lpips = self.lpips_vgg(rendered_images, gt_images).mean()
|
109 |
+
|
110 |
+
# return loss, {'ssim': ssim, 'psnr': psnr, 'lpips': lpips, 'image_loss': image_loss, 'feature_loss': feature_loss}
|
111 |
+
|
112 |
+
def compute_loss(self, gt1, gt2, target_view, pred1, pred2, model):
|
113 |
+
# render images
|
114 |
+
# 1. merge predictions
|
115 |
+
pred = merge_and_split_predictions(pred1, pred2)
|
116 |
+
|
117 |
+
# 2. calculate optimal scaling
|
118 |
+
pred_pts1 = pred1['means']
|
119 |
+
pred_pts2 = pred2['means']
|
120 |
+
# convert to camera1 coordinates
|
121 |
+
# everything is normalized w.r.t. camera of view1
|
122 |
+
valid1 = gt1['valid_mask'].clone()
|
123 |
+
valid2 = gt2['valid_mask'].clone()
|
124 |
+
in_camera1 = inv(gt1['camera_pose'])
|
125 |
+
gt_pts1 = geotrf(in_camera1, gt1['pts3d'].to(in_camera1.device)) # B,H,W,3
|
126 |
+
gt_pts2 = geotrf(in_camera1, gt2['pts3d'].to(in_camera1.device)) # B,H,W,3
|
127 |
+
scaling = find_opt_scaling(gt_pts1, gt_pts2, pred_pts1, pred_pts2, valid1=valid1, valid2=valid2)
|
128 |
+
|
129 |
+
# 3. render images(need gaussian model, camera, pipeline)
|
130 |
+
rendered_images = []
|
131 |
+
rendered_feats = []
|
132 |
+
gt_images = []
|
133 |
+
|
134 |
+
for i in range(len(pred)):
|
135 |
+
# get gaussian model
|
136 |
+
gaussians = GaussianModel.from_predictions(pred[i], sh_degree=3)
|
137 |
+
# get camera
|
138 |
+
ref_camera_extrinsics = gt1['camera_pose'][i]
|
139 |
+
target_view_list = [gt1, gt2, target_view] # use gt1, gt2, and target_view
|
140 |
+
for j in range(len(target_view_list)):
|
141 |
+
target_extrinsics = target_view_list[j]['camera_pose'][i]
|
142 |
+
target_intrinsics = target_view_list[j]['camera_intrinsics'][i]
|
143 |
+
image_shape = target_view_list[j]['true_shape'][i]
|
144 |
+
scale = scaling[i]
|
145 |
+
camera = get_scaled_camera(ref_camera_extrinsics, target_extrinsics, target_intrinsics, scale, image_shape)
|
146 |
+
# render(image and features)
|
147 |
+
rendered_output = render(camera, gaussians, self.pipeline, self.bg_color)
|
148 |
+
rendered_images.append(rendered_output['render'])
|
149 |
+
rendered_feats.append(rendered_output['feature_map'])
|
150 |
+
gt_images.append(target_view_list[j]['img'][i] * 0.5 + 0.5)
|
151 |
+
|
152 |
+
rendered_images = torch.stack(rendered_images, dim=0) # B, 3, H, W
|
153 |
+
gt_images = torch.stack(gt_images, dim=0)
|
154 |
+
rendered_feats = torch.stack(rendered_feats, dim=0) # B, d_feats, H, W
|
155 |
+
rendered_feats = model.feature_expansion(rendered_feats) # B, 512, H//2, W//2
|
156 |
+
gt_feats = model.lseg_feature_extractor.extract_features(gt_images) # B, 512, H//2, W//2
|
157 |
+
image_loss = torch.abs(rendered_images - gt_images).mean()
|
158 |
+
feature_loss = torch.abs(rendered_feats - gt_feats).mean()
|
159 |
+
loss = image_loss + feature_loss
|
160 |
+
|
161 |
+
# calculate metric
|
162 |
+
with torch.no_grad():
|
163 |
+
ssim = self.ssim(rendered_images, gt_images)
|
164 |
+
psnr = self.psnr(rendered_images, gt_images)
|
165 |
+
lpips = self.lpips_vgg(rendered_images, gt_images).mean()
|
166 |
+
|
167 |
+
return loss, {'ssim': ssim, 'psnr': psnr, 'lpips': lpips, 'image_loss': image_loss, 'feature_loss': feature_loss}
|
168 |
+
|
169 |
+
# loss for one batch
|
170 |
+
def loss_of_one_batch(batch, model, criterion, device, symmetrize_batch=False, use_amp=False, ret=None):
|
171 |
+
view1, view2, target_view = batch
|
172 |
+
ignore_keys = set(['depthmap', 'dataset', 'label', 'instance', 'idx', 'true_shape', 'rng', 'pts3d'])
|
173 |
+
for view in batch:
|
174 |
+
for name in view.keys(): # pseudo_focal
|
175 |
+
if name in ignore_keys:
|
176 |
+
continue
|
177 |
+
view[name] = view[name].to(device, non_blocking=True)
|
178 |
+
|
179 |
+
if symmetrize_batch:
|
180 |
+
view1, view2 = make_batch_symmetric(batch)
|
181 |
+
|
182 |
+
# Get the actual model if it's distributed
|
183 |
+
actual_model = model.module if hasattr(model, 'module') else model
|
184 |
+
|
185 |
+
with torch.cuda.amp.autocast(enabled=bool(use_amp)):
|
186 |
+
pred1, pred2 = actual_model(view1, view2)
|
187 |
+
|
188 |
+
# loss is supposed to be symmetric
|
189 |
+
with torch.cuda.amp.autocast(enabled=False):
|
190 |
+
loss = criterion(view1, view2, target_view, pred1, pred2, actual_model) if criterion is not None else None
|
191 |
+
|
192 |
+
result = dict(view1=view1, view2=view2, target_view=target_view, pred1=pred1, pred2=pred2, loss=loss)
|
193 |
+
return result[ret] if ret else result
|
src/lseg.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
from submodules.lang_seg.modules.models.lseg_net import LSegNet, clip
|
4 |
+
|
5 |
+
class LSegFeatureExtractor(LSegNet):
|
6 |
+
def __init__(self, half_res=True):
|
7 |
+
super().__init__(
|
8 |
+
labels='',
|
9 |
+
backbone='clip_vitl16_384',
|
10 |
+
features=256,
|
11 |
+
crop_size=224,
|
12 |
+
arch_option=0,
|
13 |
+
block_depth=0,
|
14 |
+
activation='lrelu'
|
15 |
+
)
|
16 |
+
|
17 |
+
self.half_res = half_res
|
18 |
+
|
19 |
+
@torch.no_grad()
|
20 |
+
def extract_features(self, x):
|
21 |
+
layer_1, layer_2, layer_3, layer_4 = forward_layers(self.pretrained, x)
|
22 |
+
# layer:(b, 1024, h//16, w//16)
|
23 |
+
# image_features = torch.cat([layer_1, layer_2, layer_3, layer_4], dim=1)
|
24 |
+
# # image_features:(b, 4096, h//16, w//16)
|
25 |
+
|
26 |
+
# dense feature
|
27 |
+
# DPT head
|
28 |
+
pretrained = self.pretrained
|
29 |
+
layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1)
|
30 |
+
layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2)
|
31 |
+
layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3)
|
32 |
+
layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4)
|
33 |
+
|
34 |
+
# refinenet
|
35 |
+
layer_1_rn = self.scratch.layer1_rn(layer_1)
|
36 |
+
layer_2_rn = self.scratch.layer2_rn(layer_2)
|
37 |
+
layer_3_rn = self.scratch.layer3_rn(layer_3)
|
38 |
+
layer_4_rn = self.scratch.layer4_rn(layer_4)
|
39 |
+
|
40 |
+
path_4 = self.scratch.refinenet4(layer_4_rn)
|
41 |
+
path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
|
42 |
+
path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
|
43 |
+
path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
|
44 |
+
|
45 |
+
# (b, 512, h//2, w//2)
|
46 |
+
image_features = self.scratch.head1(path_1)
|
47 |
+
if self.half_res:
|
48 |
+
return image_features
|
49 |
+
|
50 |
+
# (b, 512, h, w)
|
51 |
+
image_features = self.scratch.output_conv(image_features)
|
52 |
+
|
53 |
+
return image_features
|
54 |
+
|
55 |
+
@torch.no_grad()
|
56 |
+
def decode_feature(self, image_features, labelset=''):
|
57 |
+
# # image_features:(b, 4096, h//16, w//16)
|
58 |
+
# # split image_features into 4 parts
|
59 |
+
# layer_1, layer_2, layer_3, layer_4 = torch.split(image_features, 1024, dim=1)
|
60 |
+
|
61 |
+
# # DPT head
|
62 |
+
# pretrained = self.pretrained
|
63 |
+
# layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1)
|
64 |
+
# layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2)
|
65 |
+
# layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3)
|
66 |
+
# layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4)
|
67 |
+
|
68 |
+
# # refinenet
|
69 |
+
# layer_1_rn = self.scratch.layer1_rn(layer_1)
|
70 |
+
# layer_2_rn = self.scratch.layer2_rn(layer_2)
|
71 |
+
# layer_3_rn = self.scratch.layer3_rn(layer_3)
|
72 |
+
# layer_4_rn = self.scratch.layer4_rn(layer_4)
|
73 |
+
|
74 |
+
# path_4 = self.scratch.refinenet4(layer_4_rn)
|
75 |
+
# path_3 = self.scratch.refinenet3(path_4, layer_3_rn)
|
76 |
+
# path_2 = self.scratch.refinenet2(path_3, layer_2_rn)
|
77 |
+
# path_1 = self.scratch.refinenet1(path_2, layer_1_rn)
|
78 |
+
|
79 |
+
# image_features = self.scratch.head1(path_1)
|
80 |
+
imshape = image_features.shape
|
81 |
+
|
82 |
+
# encode text
|
83 |
+
if labelset == '':
|
84 |
+
text = self.text
|
85 |
+
else:
|
86 |
+
text = clip.tokenize(labelset)
|
87 |
+
|
88 |
+
self.logit_scale = self.logit_scale.to(image_features.device)
|
89 |
+
text = text.to(image_features.device)
|
90 |
+
text_features = self.clip_pretrained.encode_text(text)
|
91 |
+
image_features = image_features.permute(0,2,3,1).reshape(-1, self.out_c)
|
92 |
+
|
93 |
+
# normalized features
|
94 |
+
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
95 |
+
text_features = text_features / text_features.norm(dim=-1, keepdim=True)
|
96 |
+
|
97 |
+
logits_per_image = self.logit_scale * image_features.half() @ text_features.t()
|
98 |
+
out = logits_per_image.float().view(imshape[0], imshape[2], imshape[3], -1).permute(0,3,1,2)
|
99 |
+
|
100 |
+
if self.arch_option in [1, 2]:
|
101 |
+
for _ in range(self.block_depth - 1):
|
102 |
+
out = self.scratch.head_block(out)
|
103 |
+
out = self.scratch.head_block(out, False)
|
104 |
+
|
105 |
+
if self.half_res:
|
106 |
+
out = self.scratch.output_conv(out)
|
107 |
+
|
108 |
+
return out
|
109 |
+
|
110 |
+
@classmethod
|
111 |
+
def from_pretrained(cls, pretrained_model_name_or_path, *args, **kwargs):
|
112 |
+
print(f"Loading checkpoint from: {pretrained_model_name_or_path}")
|
113 |
+
ckpt = torch.load(pretrained_model_name_or_path, map_location='cpu')
|
114 |
+
print(f"Checkpoint loaded. Keys in checkpoint: {ckpt.keys()}")
|
115 |
+
|
116 |
+
print("Processing state dict...")
|
117 |
+
new_state_dict = {k[len("net."):]: v for k, v in ckpt['state_dict'].items() if k.startswith("net.")}
|
118 |
+
print(f"Processed state dict. Number of keys: {len(new_state_dict)}")
|
119 |
+
|
120 |
+
print("Initializing model...")
|
121 |
+
model = cls(*args, **kwargs)
|
122 |
+
|
123 |
+
print("Loading state dict into model...")
|
124 |
+
model.load_state_dict(new_state_dict, strict=True)
|
125 |
+
print("State dict loaded successfully.")
|
126 |
+
|
127 |
+
print("Cleaning up...")
|
128 |
+
del ckpt
|
129 |
+
del new_state_dict
|
130 |
+
|
131 |
+
print("Model loading complete.")
|
132 |
+
return model
|
133 |
+
|
134 |
+
def forward_layers(pretrained, x):
|
135 |
+
b, c, h, w = x.shape
|
136 |
+
|
137 |
+
# encoder
|
138 |
+
glob = pretrained.model.forward_flex(x)
|
139 |
+
|
140 |
+
layer_1 = pretrained.activations["1"]
|
141 |
+
layer_2 = pretrained.activations["2"]
|
142 |
+
layer_3 = pretrained.activations["3"]
|
143 |
+
layer_4 = pretrained.activations["4"]
|
144 |
+
|
145 |
+
layer_1 = pretrained.act_postprocess1[0:2](layer_1)
|
146 |
+
layer_2 = pretrained.act_postprocess2[0:2](layer_2)
|
147 |
+
layer_3 = pretrained.act_postprocess3[0:2](layer_3)
|
148 |
+
layer_4 = pretrained.act_postprocess4[0:2](layer_4)
|
149 |
+
|
150 |
+
unflatten = nn.Sequential(
|
151 |
+
nn.Unflatten(
|
152 |
+
2,
|
153 |
+
torch.Size(
|
154 |
+
[
|
155 |
+
h // pretrained.model.patch_size[1],
|
156 |
+
w // pretrained.model.patch_size[0],
|
157 |
+
]
|
158 |
+
),
|
159 |
+
)
|
160 |
+
)
|
161 |
+
|
162 |
+
if layer_1.ndim == 3:
|
163 |
+
layer_1 = unflatten(layer_1)
|
164 |
+
if layer_2.ndim == 3:
|
165 |
+
layer_2 = unflatten(layer_2)
|
166 |
+
if layer_3.ndim == 3:
|
167 |
+
layer_3 = unflatten(layer_3)
|
168 |
+
if layer_4.ndim == 3:
|
169 |
+
layer_4 = unflatten(layer_4)
|
170 |
+
|
171 |
+
return layer_1, layer_2, layer_3, layer_4
|
src/model.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import yaml
|
4 |
+
import sys
|
5 |
+
sys.path.append(".")
|
6 |
+
sys.path.append("submodules")
|
7 |
+
sys.path.append("submodules/mast3r")
|
8 |
+
from mast3r.model import AsymmetricMASt3R
|
9 |
+
from src.ptv3 import PTV3
|
10 |
+
from src.gaussian_head import GaussianHead
|
11 |
+
from src.utils.points_process import merge_points
|
12 |
+
from src.losses import GaussianLoss
|
13 |
+
from src.lseg import LSegFeatureExtractor
|
14 |
+
import argparse
|
15 |
+
|
16 |
+
class LSM_MASt3R(nn.Module):
|
17 |
+
def __init__(self,
|
18 |
+
mast3r_config,
|
19 |
+
point_transformer_config,
|
20 |
+
gaussian_head_config,
|
21 |
+
lseg_config,
|
22 |
+
):
|
23 |
+
|
24 |
+
super().__init__()
|
25 |
+
# self.config
|
26 |
+
self.config = {
|
27 |
+
'mast3r_config': mast3r_config,
|
28 |
+
'point_transformer_config': point_transformer_config,
|
29 |
+
'gaussian_head_config': gaussian_head_config,
|
30 |
+
'lseg_config': lseg_config
|
31 |
+
}
|
32 |
+
|
33 |
+
# Initialize AsymmetricMASt3R
|
34 |
+
self.mast3r = AsymmetricMASt3R.from_pretrained(**mast3r_config)
|
35 |
+
|
36 |
+
# Freeze MASt3R parameters
|
37 |
+
for param in self.mast3r.parameters():
|
38 |
+
param.requires_grad = False
|
39 |
+
self.mast3r.eval()
|
40 |
+
|
41 |
+
# Initialize PointTransformerV3
|
42 |
+
self.point_transformer = PTV3(**point_transformer_config)
|
43 |
+
|
44 |
+
# Initialize the gaussian head
|
45 |
+
self.gaussian_head = GaussianHead(**gaussian_head_config)
|
46 |
+
|
47 |
+
# Initialize the lseg feature extractor
|
48 |
+
self.lseg_feature_extractor = LSegFeatureExtractor.from_pretrained(**lseg_config)
|
49 |
+
for param in self.lseg_feature_extractor.parameters():
|
50 |
+
param.requires_grad = False
|
51 |
+
self.lseg_feature_extractor.eval()
|
52 |
+
|
53 |
+
# Define two linear layers
|
54 |
+
d_gs_feats = gaussian_head_config.get('d_gs_feats', 32)
|
55 |
+
self.feature_reduction = nn.Sequential(
|
56 |
+
nn.Conv2d(512, d_gs_feats, kernel_size=1),
|
57 |
+
nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
|
58 |
+
) # (b, 512, h//2, w//2) -> (b, d_features, h, w)
|
59 |
+
|
60 |
+
self.feature_expansion = nn.Sequential(
|
61 |
+
nn.Conv2d(d_gs_feats, 512, kernel_size=1),
|
62 |
+
nn.Upsample(scale_factor=0.5, mode='bilinear', align_corners=True)
|
63 |
+
) # (b, d_features, h, w) -> (b, 512, h//2, w//2)
|
64 |
+
|
65 |
+
def forward(self, view1, view2):
|
66 |
+
# AsymmetricMASt3R forward pass
|
67 |
+
mast3r_output = self.mast3r(view1, view2)
|
68 |
+
|
69 |
+
# merge points from two views
|
70 |
+
data_dict = merge_points(mast3r_output, view1, view2)
|
71 |
+
|
72 |
+
# PointTransformerV3 forward pass
|
73 |
+
point_transformer_output = self.point_transformer(data_dict)
|
74 |
+
|
75 |
+
# extract lseg features
|
76 |
+
lseg_features = self.extract_lseg_features(view1, view2)
|
77 |
+
|
78 |
+
# Gaussian head forward pass
|
79 |
+
final_output = self.gaussian_head(point_transformer_output, lseg_features)
|
80 |
+
|
81 |
+
return final_output
|
82 |
+
|
83 |
+
def extract_lseg_features(self, view1, view2):
|
84 |
+
# concat view1 and view2
|
85 |
+
img = torch.cat([view1['img'], view2['img']], dim=0) # (v*b, 3, h, w)
|
86 |
+
# extract features
|
87 |
+
lseg_features = self.lseg_feature_extractor.extract_features(img) # (v*b, 512, h//2, w//2)
|
88 |
+
# reduce dimensions
|
89 |
+
lseg_features = self.feature_reduction(lseg_features) # (v*b, d_features, h, w)
|
90 |
+
|
91 |
+
return lseg_features
|
92 |
+
|
93 |
+
@staticmethod
|
94 |
+
def from_pretrained(checkpoint_path, device='cuda'):
|
95 |
+
# Load the checkpoint
|
96 |
+
ckpt = torch.load(checkpoint_path, map_location='cpu')
|
97 |
+
|
98 |
+
# Extract the configuration from the checkpoint
|
99 |
+
config = ckpt['args']
|
100 |
+
|
101 |
+
# Create a new instance of LSM_MASt3R
|
102 |
+
model = eval(config.model)
|
103 |
+
|
104 |
+
# Load the state dict
|
105 |
+
model.load_state_dict(ckpt['model'])
|
106 |
+
|
107 |
+
# Move the model to the specified device
|
108 |
+
model = model.to(device)
|
109 |
+
|
110 |
+
return model
|
111 |
+
|
112 |
+
def state_dict(self, destination=None, prefix='', keep_vars=False):
|
113 |
+
# 获取所有参数的state_dict
|
114 |
+
full_state_dict = super().state_dict(destination=destination, prefix=prefix, keep_vars=keep_vars)
|
115 |
+
|
116 |
+
# 只保留需要训练的参数
|
117 |
+
trainable_state_dict = {
|
118 |
+
k: v for k, v in full_state_dict.items()
|
119 |
+
if not (k.startswith('mast3r.') or k.startswith('lseg_feature_extractor.'))
|
120 |
+
}
|
121 |
+
|
122 |
+
return trainable_state_dict
|
123 |
+
|
124 |
+
def load_state_dict(self, state_dict, strict=True):
|
125 |
+
# 获取当前模型的完整state_dict
|
126 |
+
model_state = super().state_dict()
|
127 |
+
|
128 |
+
# 只更新需要训练的参数
|
129 |
+
for k in list(state_dict.keys()):
|
130 |
+
if k in model_state and not (k.startswith('mast3r.') or k.startswith('lseg_feature_extractor.')):
|
131 |
+
model_state[k] = state_dict[k]
|
132 |
+
|
133 |
+
# 使用更新后的state_dict加载模型
|
134 |
+
super().load_state_dict(model_state, strict=False)
|
135 |
+
|
136 |
+
if __name__ == "__main__":
|
137 |
+
from torch.utils.data import DataLoader
|
138 |
+
import argparse
|
139 |
+
parser = argparse.ArgumentParser()
|
140 |
+
parser.add_argument('--checkpoint', type=str)
|
141 |
+
args = parser.parse_args()
|
142 |
+
|
143 |
+
# Load config
|
144 |
+
with open("configs/model_config.yaml", "r") as f:
|
145 |
+
config = yaml.safe_load(f)
|
146 |
+
# Initialize model
|
147 |
+
if args.checkpoint is not None:
|
148 |
+
model = LSM_MASt3R.from_pretrained(args.checkpoint, device='cuda')
|
149 |
+
else:
|
150 |
+
model = LSM_MASt3R(**config).to('cuda')
|
151 |
+
|
152 |
+
model.eval()
|
153 |
+
|
154 |
+
# Print model
|
155 |
+
print(model)
|
156 |
+
# Load dataset
|
157 |
+
from src.datasets.scannet import Scannet
|
158 |
+
dataset = Scannet(split='train', ROOT="data/scannet_processed", resolution=[(512, 384)])
|
159 |
+
# Print dataset
|
160 |
+
print(dataset)
|
161 |
+
# Test model
|
162 |
+
data_loader = DataLoader(dataset, batch_size=3, shuffle=True)
|
163 |
+
data = next(iter(data_loader))
|
164 |
+
# move data to cuda
|
165 |
+
for view in data:
|
166 |
+
view['img'] = view['img'].to('cuda')
|
167 |
+
view['depthmap'] = view['depthmap'].to('cuda')
|
168 |
+
view['camera_pose'] = view['camera_pose'].to('cuda')
|
169 |
+
view['camera_intrinsics'] = view['camera_intrinsics'].to('cuda')
|
170 |
+
# Forward pass
|
171 |
+
output = model(*data[:2])
|
172 |
+
|
173 |
+
# Loss
|
174 |
+
loss = GaussianLoss()
|
175 |
+
loss_value = loss(*data, *output, model)
|
176 |
+
print(loss_value)
|
src/ptv3.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from PointTransformerV3.model import *
|
2 |
+
|
3 |
+
class PTV3(PointTransformerV3):
|
4 |
+
def __init__(self, **kwargs):
|
5 |
+
super().__init__(**kwargs)
|
6 |
+
|
7 |
+
def encode(self, data_dict):
|
8 |
+
point = Point(data_dict)
|
9 |
+
point.serialization(order=self.order, shuffle_orders=self.shuffle_orders)
|
10 |
+
point.sparsify()
|
11 |
+
point = self.embedding(point)
|
12 |
+
point = self.enc(point)
|
13 |
+
return point.feats
|
src/train.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# Copyright (C) 2024-present Naver Corporation. All rights reserved.
|
3 |
+
# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
|
4 |
+
#
|
5 |
+
# --------------------------------------------------------
|
6 |
+
# training executable for MASt3R
|
7 |
+
# --------------------------------------------------------
|
8 |
+
import sys
|
9 |
+
sys.path.append('.')
|
10 |
+
sys.path.append('submodules/mast3r')
|
11 |
+
from mast3r.model import AsymmetricMASt3R
|
12 |
+
from mast3r.losses import ConfMatchingLoss, MatchingLoss, APLoss, Regr3D, InfoNCE, Regr3D_ScaleShiftInv
|
13 |
+
from mast3r.datasets import ARKitScenes, BlendedMVS, Co3d, MegaDepth, ScanNetpp, StaticThings3D, Waymo, WildRGBD
|
14 |
+
|
15 |
+
import mast3r.utils.path_to_dust3r # noqa
|
16 |
+
# add mast3r classes to dust3r imports
|
17 |
+
import dust3r.training
|
18 |
+
dust3r.training.AsymmetricMASt3R = AsymmetricMASt3R
|
19 |
+
dust3r.training.Regr3D = Regr3D
|
20 |
+
dust3r.training.Regr3D_ScaleShiftInv = Regr3D_ScaleShiftInv
|
21 |
+
dust3r.training.MatchingLoss = MatchingLoss
|
22 |
+
dust3r.training.ConfMatchingLoss = ConfMatchingLoss
|
23 |
+
dust3r.training.InfoNCE = InfoNCE
|
24 |
+
dust3r.training.APLoss = APLoss
|
25 |
+
|
26 |
+
import dust3r.datasets
|
27 |
+
dust3r.datasets.ARKitScenes = ARKitScenes
|
28 |
+
dust3r.datasets.BlendedMVS = BlendedMVS
|
29 |
+
dust3r.datasets.Co3d = Co3d
|
30 |
+
dust3r.datasets.MegaDepth = MegaDepth
|
31 |
+
dust3r.datasets.ScanNetpp = ScanNetpp
|
32 |
+
dust3r.datasets.StaticThings3D = StaticThings3D
|
33 |
+
dust3r.datasets.Waymo = Waymo
|
34 |
+
dust3r.datasets.WildRGBD = WildRGBD
|
35 |
+
from src.datasets.scannet import Scannet
|
36 |
+
from src.datasets.scannetpp import Scannetpp
|
37 |
+
from src.datasets.megadepth import MegaDepth
|
38 |
+
dust3r.datasets.Scannet = Scannet
|
39 |
+
dust3r.datasets.Scannetpp = Scannetpp
|
40 |
+
dust3r.datasets.MegaDepth = MegaDepth
|
41 |
+
|
42 |
+
from src.model import LSM_MASt3R
|
43 |
+
dust3r.training.LSM_MASt3R = LSM_MASt3R
|
44 |
+
from src.losses import GaussianLoss
|
45 |
+
dust3r.training.GaussianLoss = GaussianLoss
|
46 |
+
|
47 |
+
from dust3r.training import get_args_parser as dust3r_get_args_parser # noqa
|
48 |
+
from dust3r.training import train # noqa
|
49 |
+
|
50 |
+
import yaml
|
51 |
+
|
52 |
+
|
53 |
+
def get_args_parser():
|
54 |
+
parser = dust3r_get_args_parser()
|
55 |
+
parser.prog = 'LSM_MASt3R training'
|
56 |
+
|
57 |
+
# Load the configuration
|
58 |
+
with open("configs/model_config.yaml", "r") as f:
|
59 |
+
config = yaml.safe_load(f)
|
60 |
+
|
61 |
+
# Convert the config dict to a string of keyword arguments
|
62 |
+
config_str = ", ".join(f"{k}={v}" for k, v in config.items())
|
63 |
+
|
64 |
+
# Set the default model string with parameters
|
65 |
+
parser.set_defaults(model=f"LSM_MASt3R({config_str})")
|
66 |
+
|
67 |
+
return parser
|
68 |
+
|
69 |
+
|
70 |
+
if __name__ == '__main__':
|
71 |
+
args = get_args_parser()
|
72 |
+
args = args.parse_args()
|
73 |
+
train(args)
|
src/utils/camera_utils.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import torch
|
3 |
+
from dust3r.utils.geometry import inv
|
4 |
+
from src.utils.cuda_splatting import DummyCamera
|
5 |
+
|
6 |
+
def get_scaled_camera(ref_camera_extrinsics, target_camera_extrinsics, target_camera_intrinsics, scale, image_shape):
|
7 |
+
"""
|
8 |
+
get a scaled camera from a reference camera to a target camera
|
9 |
+
|
10 |
+
"""
|
11 |
+
|
12 |
+
# get extrinsics(target_camera to ref_camera)
|
13 |
+
target_camera_extrinsics = inv(ref_camera_extrinsics) @ target_camera_extrinsics
|
14 |
+
# scale translation
|
15 |
+
target_camera_extrinsics[:3, 3] = target_camera_extrinsics[:3, 3] * scale
|
16 |
+
# invert extrinsics(ref_camera to target_camera)
|
17 |
+
target_camera_extrinsics_inv = inv(target_camera_extrinsics)
|
18 |
+
# calculate fov
|
19 |
+
fovx = 2 * math.atan(image_shape[1] / (2 * target_camera_intrinsics[0, 0]))
|
20 |
+
fovy = 2 * math.atan(image_shape[0] / (2 * target_camera_intrinsics[1, 1]))
|
21 |
+
# return camera(numpy)
|
22 |
+
R = target_camera_extrinsics_inv[:3, :3].cpu().numpy().transpose() # R.transpose() : ref_camera_2_target_camera
|
23 |
+
T = target_camera_extrinsics_inv[:3, 3].cpu().numpy() # T : ref_camera_2_target_camera
|
24 |
+
image_shape = image_shape.cpu().numpy()
|
25 |
+
return DummyCamera(R, T, fovx, fovy, image_shape[1], image_shape[0])
|
26 |
+
|
27 |
+
def move_c2w_along_z(extrinsics: torch.Tensor, distance: float) -> torch.Tensor:
|
28 |
+
"""
|
29 |
+
向后移动多个 Camera-to-World (C2W) 矩阵,使相机沿各自 Z 轴方向远离原点。
|
30 |
+
|
31 |
+
参数:
|
32 |
+
extrinsics (torch.Tensor): 形状为 [N, 4, 4] 的张量,包含 N 个 C2W 矩阵。
|
33 |
+
distance (float): 向后移动的距离。
|
34 |
+
|
35 |
+
返回:
|
36 |
+
torch.Tensor: 更新后的 C2W 矩阵,形状与输入相同。
|
37 |
+
"""
|
38 |
+
# 确保输入是一个四维矩阵,且最后一维是 4x4
|
39 |
+
assert extrinsics.dim() == 3 and extrinsics.shape[1:] == (4, 4), \
|
40 |
+
"输入的 extrinsics 必须是形状为 [N, 4, 4] 的张量"
|
41 |
+
|
42 |
+
# 创建一个拷贝以免修改原矩阵
|
43 |
+
updated_extrinsics = extrinsics.clone()
|
44 |
+
|
45 |
+
# 遍历每个 C2W 矩阵
|
46 |
+
for i in range(updated_extrinsics.shape[0]):
|
47 |
+
# 提取旋转矩阵 R 和平移向量 t
|
48 |
+
R = updated_extrinsics[i, :3, :3] # 形状为 [3, 3]
|
49 |
+
t = updated_extrinsics[i, :3, 3] # 形状为 [3]
|
50 |
+
|
51 |
+
# 获取相机的 Z 轴方向(第三列)
|
52 |
+
z_axis = R[:, 2] # 形状为 [3]
|
53 |
+
|
54 |
+
# 计算新的平移向量,沿 Z 轴方向向后移动
|
55 |
+
t_new = t - distance * z_axis
|
56 |
+
|
57 |
+
# 更新 C2W 矩阵的平移部分
|
58 |
+
updated_extrinsics[i, :3, 3] = t_new
|
59 |
+
|
60 |
+
return updated_extrinsics
|
src/utils/cuda_splatting.py
ADDED
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (C) 2023, Inria
|
3 |
+
# GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
# All rights reserved.
|
5 |
+
#
|
6 |
+
# This software is free for non-commercial, research and evaluation use
|
7 |
+
# under the terms of the LICENSE.md file.
|
8 |
+
#
|
9 |
+
# For inquiries contact [email protected]
|
10 |
+
#
|
11 |
+
import numpy as np
|
12 |
+
import torch
|
13 |
+
import math
|
14 |
+
from diff_gaussian_rasterization import GaussianRasterizationSettings, GaussianRasterizer
|
15 |
+
from .gaussian_model import GaussianModel
|
16 |
+
from .sh_utils import eval_sh
|
17 |
+
from .graphics_utils import getWorld2View2, getProjectionMatrix
|
18 |
+
|
19 |
+
class DummyCamera:
|
20 |
+
def __init__(self, R, T, FoVx, FoVy, W, H):
|
21 |
+
self.projection_matrix = getProjectionMatrix(znear=0.01, zfar=100.0, fovX=FoVx, fovY=FoVy).transpose(0,1).cuda()
|
22 |
+
self.R = R
|
23 |
+
self.T = T
|
24 |
+
self.world_view_transform = torch.tensor(getWorld2View2(R, T, np.array([0,0,0]), 1.0)).transpose(0, 1).cuda()
|
25 |
+
self.full_proj_transform = (self.world_view_transform.unsqueeze(0).bmm(self.projection_matrix.unsqueeze(0))).squeeze(0)
|
26 |
+
self.camera_center = self.world_view_transform.inverse()[3, :3]
|
27 |
+
self.image_width = W
|
28 |
+
self.image_height = H
|
29 |
+
self.FoVx = FoVx
|
30 |
+
self.FoVy = FoVy
|
31 |
+
|
32 |
+
class DummyPipeline:
|
33 |
+
convert_SHs_python = False
|
34 |
+
compute_cov3D_python = False
|
35 |
+
debug = False
|
36 |
+
|
37 |
+
def calculate_fov(output_width, output_height, focal_length, aspect_ratio=1.0, invert_y=False):
|
38 |
+
fovx = 2 * math.atan((output_width / (2 * focal_length)))
|
39 |
+
fovy = 2 * math.atan((output_height / aspect_ratio) / (2 * focal_length))
|
40 |
+
|
41 |
+
if invert_y:
|
42 |
+
fovy = -fovy
|
43 |
+
|
44 |
+
return fovx, fovy
|
45 |
+
|
46 |
+
# def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None):
|
47 |
+
# """
|
48 |
+
# Render the scene.
|
49 |
+
|
50 |
+
# Background tensor (bg_color) must be on GPU!
|
51 |
+
# """
|
52 |
+
|
53 |
+
# # Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
|
54 |
+
# screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
|
55 |
+
# try:
|
56 |
+
# screenspace_points.retain_grad()
|
57 |
+
# except:
|
58 |
+
# pass
|
59 |
+
|
60 |
+
# # Set up rasterization configuration
|
61 |
+
# tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
|
62 |
+
# tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
|
63 |
+
|
64 |
+
# raster_settings = GaussianRasterizationSettings(
|
65 |
+
# image_height=int(viewpoint_camera.image_height),
|
66 |
+
# image_width=int(viewpoint_camera.image_width),
|
67 |
+
# tanfovx=tanfovx,
|
68 |
+
# tanfovy=tanfovy,
|
69 |
+
# bg=bg_color,
|
70 |
+
# scale_modifier=scaling_modifier,
|
71 |
+
# viewmatrix=viewpoint_camera.world_view_transform,
|
72 |
+
# projmatrix=viewpoint_camera.full_proj_transform,
|
73 |
+
# sh_degree=pc.active_sh_degree,
|
74 |
+
# campos=viewpoint_camera.camera_center,
|
75 |
+
# prefiltered=False,
|
76 |
+
# debug=pipe.debug
|
77 |
+
# )
|
78 |
+
|
79 |
+
# rasterizer = GaussianRasterizer(raster_settings=raster_settings)
|
80 |
+
|
81 |
+
# means3D = pc.get_xyz
|
82 |
+
# means2D = screenspace_points
|
83 |
+
# opacity = pc.get_opacity
|
84 |
+
|
85 |
+
# # If precomputed 3d covariance is provided, use it. If not, then it will be computed from
|
86 |
+
# # scaling / rotation by the rasterizer.
|
87 |
+
# scales = None
|
88 |
+
# rotations = None
|
89 |
+
# cov3D_precomp = None
|
90 |
+
# if pipe.compute_cov3D_python:
|
91 |
+
# cov3D_precomp = pc.get_covariance(scaling_modifier)
|
92 |
+
# else:
|
93 |
+
# scales = pc.get_scaling
|
94 |
+
# rotations = pc.get_rotation
|
95 |
+
|
96 |
+
# # If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
|
97 |
+
# # from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
|
98 |
+
# shs = None
|
99 |
+
# colors_precomp = None
|
100 |
+
# if override_color is None:
|
101 |
+
# if pipe.convert_SHs_python:
|
102 |
+
# shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2)
|
103 |
+
# dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1))
|
104 |
+
# dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True)
|
105 |
+
# sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
|
106 |
+
# colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
|
107 |
+
# else:
|
108 |
+
# shs = pc.get_features
|
109 |
+
# else:
|
110 |
+
# colors_precomp = override_color
|
111 |
+
|
112 |
+
# # Rasterize visible Gaussians to image, obtain their radii (on screen).
|
113 |
+
# rendered_image, radii = rasterizer(
|
114 |
+
# means3D = means3D,
|
115 |
+
# means2D = means2D,
|
116 |
+
# shs = shs,
|
117 |
+
# colors_precomp = colors_precomp,
|
118 |
+
# opacities = opacity,
|
119 |
+
# scales = scales,
|
120 |
+
# rotations = rotations,
|
121 |
+
# cov3D_precomp = cov3D_precomp)
|
122 |
+
|
123 |
+
# # Those Gaussians that were frustum culled or had a radius of 0 were not visible.
|
124 |
+
# # They will be excluded from value updates used in the splitting criteria.
|
125 |
+
# return {"render": rendered_image,
|
126 |
+
# "viewspace_points": screenspace_points,
|
127 |
+
# "visibility_filter" : radii > 0,
|
128 |
+
# "radii": radii}
|
129 |
+
|
130 |
+
def render(viewpoint_camera, pc : GaussianModel, pipe, bg_color : torch.Tensor, scaling_modifier = 1.0, override_color = None):
|
131 |
+
"""
|
132 |
+
Render the scene.
|
133 |
+
|
134 |
+
Background tensor (bg_color) must be on GPU!
|
135 |
+
"""
|
136 |
+
|
137 |
+
# Create zero tensor. We will use it to make pytorch return gradients of the 2D (screen-space) means
|
138 |
+
screenspace_points = torch.zeros_like(pc.get_xyz, dtype=pc.get_xyz.dtype, requires_grad=True, device="cuda") + 0
|
139 |
+
try:
|
140 |
+
screenspace_points.retain_grad()
|
141 |
+
except:
|
142 |
+
pass
|
143 |
+
|
144 |
+
# Set up rasterization configuration
|
145 |
+
tanfovx = math.tan(viewpoint_camera.FoVx * 0.5)
|
146 |
+
tanfovy = math.tan(viewpoint_camera.FoVy * 0.5)
|
147 |
+
|
148 |
+
raster_settings = GaussianRasterizationSettings(
|
149 |
+
image_height=int(viewpoint_camera.image_height),
|
150 |
+
image_width=int(viewpoint_camera.image_width),
|
151 |
+
tanfovx=tanfovx,
|
152 |
+
tanfovy=tanfovy,
|
153 |
+
bg=bg_color,
|
154 |
+
scale_modifier=scaling_modifier,
|
155 |
+
viewmatrix=viewpoint_camera.world_view_transform,
|
156 |
+
projmatrix=viewpoint_camera.full_proj_transform,
|
157 |
+
sh_degree=pc.active_sh_degree,
|
158 |
+
campos=viewpoint_camera.camera_center,
|
159 |
+
prefiltered=False,
|
160 |
+
debug=pipe.debug
|
161 |
+
)
|
162 |
+
|
163 |
+
rasterizer = GaussianRasterizer(raster_settings=raster_settings)
|
164 |
+
|
165 |
+
means3D = pc.get_xyz
|
166 |
+
means2D = screenspace_points
|
167 |
+
opacity = pc.get_opacity
|
168 |
+
|
169 |
+
# If precomputed 3d covariance is provided, use it. If not, then it will be computed from
|
170 |
+
# scaling / rotation by the rasterizer.
|
171 |
+
scales = None
|
172 |
+
rotations = None
|
173 |
+
cov3D_precomp = None
|
174 |
+
if pipe.compute_cov3D_python:
|
175 |
+
cov3D_precomp = pc.get_covariance(scaling_modifier)
|
176 |
+
else:
|
177 |
+
scales = pc.get_scaling
|
178 |
+
rotations = pc.get_rotation
|
179 |
+
|
180 |
+
# If precomputed colors are provided, use them. Otherwise, if it is desired to precompute colors
|
181 |
+
# from SHs in Python, do it. If not, then SH -> RGB conversion will be done by rasterizer.
|
182 |
+
shs = None
|
183 |
+
colors_precomp = None
|
184 |
+
if override_color is None:
|
185 |
+
if pipe.convert_SHs_python:
|
186 |
+
shs_view = pc.get_features.transpose(1, 2).view(-1, 3, (pc.max_sh_degree+1)**2)
|
187 |
+
dir_pp = (pc.get_xyz - viewpoint_camera.camera_center.repeat(pc.get_features.shape[0], 1))
|
188 |
+
dir_pp_normalized = dir_pp/dir_pp.norm(dim=1, keepdim=True)
|
189 |
+
sh2rgb = eval_sh(pc.active_sh_degree, shs_view, dir_pp_normalized)
|
190 |
+
colors_precomp = torch.clamp_min(sh2rgb + 0.5, 0.0)
|
191 |
+
else:
|
192 |
+
shs = pc.get_features
|
193 |
+
else:
|
194 |
+
colors_precomp = override_color
|
195 |
+
semantic_feature = pc.get_semantic_feature
|
196 |
+
|
197 |
+
# Rasterize visible Gaussians to image, obtain their radii (on screen).
|
198 |
+
rendered_image, feature_map, radii, depth = rasterizer(
|
199 |
+
means3D = means3D,
|
200 |
+
means2D = means2D,
|
201 |
+
shs = shs,
|
202 |
+
colors_precomp = colors_precomp,
|
203 |
+
semantic_feature = semantic_feature,
|
204 |
+
opacities = opacity,
|
205 |
+
scales = scales,
|
206 |
+
rotations = rotations,
|
207 |
+
cov3D_precomp = cov3D_precomp)
|
208 |
+
|
209 |
+
# Those Gaussians that were frustum culled or had a radius of 0 were not visible.
|
210 |
+
# They will be excluded from value updates used in the splitting criteria.
|
211 |
+
return {"render": rendered_image,
|
212 |
+
"viewspace_points": screenspace_points,
|
213 |
+
"visibility_filter" : radii > 0,
|
214 |
+
"radii": radii,
|
215 |
+
'feature_map': feature_map,
|
216 |
+
"depth": depth} ###d
|
src/utils/gaussian_model.py
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
from einops import rearrange
|
4 |
+
import numpy as np
|
5 |
+
from plyfile import PlyData, PlyElement
|
6 |
+
from os import makedirs, path
|
7 |
+
from errno import EEXIST
|
8 |
+
|
9 |
+
def mkdir_p(folder_path):
|
10 |
+
# Creates a directory. equivalent to using mkdir -p on the command line
|
11 |
+
try:
|
12 |
+
makedirs(folder_path)
|
13 |
+
except OSError as exc: # Python >2.5
|
14 |
+
if exc.errno == EEXIST and path.isdir(folder_path):
|
15 |
+
pass
|
16 |
+
else:
|
17 |
+
raise
|
18 |
+
|
19 |
+
def RGB2SH(rgb):
|
20 |
+
return (rgb - 0.5) / C0
|
21 |
+
|
22 |
+
C0 = 0.28209479177387814
|
23 |
+
|
24 |
+
# https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/transforms/rotation_conversions.py
|
25 |
+
def quaternion_to_matrix(
|
26 |
+
quaternions,
|
27 |
+
eps=1e-8,
|
28 |
+
) :
|
29 |
+
# Order changed to match scipy format!
|
30 |
+
i, j, k, r = torch.unbind(quaternions, dim=-1)
|
31 |
+
two_s = 2 / ((quaternions * quaternions).sum(dim=-1) + eps)
|
32 |
+
|
33 |
+
o = torch.stack(
|
34 |
+
(
|
35 |
+
1 - two_s * (j * j + k * k),
|
36 |
+
two_s * (i * j - k * r),
|
37 |
+
two_s * (i * k + j * r),
|
38 |
+
two_s * (i * j + k * r),
|
39 |
+
1 - two_s * (i * i + k * k),
|
40 |
+
two_s * (j * k - i * r),
|
41 |
+
two_s * (i * k - j * r),
|
42 |
+
two_s * (j * k + i * r),
|
43 |
+
1 - two_s * (i * i + j * j),
|
44 |
+
),
|
45 |
+
-1,
|
46 |
+
)
|
47 |
+
return rearrange(o, "... (i j) -> ... i j", i=3, j=3)
|
48 |
+
|
49 |
+
|
50 |
+
def build_covariance(
|
51 |
+
scale,
|
52 |
+
rotation_xyzw,
|
53 |
+
):
|
54 |
+
scale = scale.diag_embed()
|
55 |
+
rotation = quaternion_to_matrix(rotation_xyzw)
|
56 |
+
return (
|
57 |
+
rotation
|
58 |
+
@ scale
|
59 |
+
@ rearrange(scale, "... i j -> ... j i")
|
60 |
+
@ rearrange(rotation, "... i j -> ... j i")
|
61 |
+
)
|
62 |
+
|
63 |
+
def inverse_sigmoid(x):
|
64 |
+
return torch.log(x/(1-x))
|
65 |
+
|
66 |
+
class GaussianModel:
|
67 |
+
|
68 |
+
def __init__(self, sh_degree : int):
|
69 |
+
self.active_sh_degree = 0
|
70 |
+
self.max_sh_degree = sh_degree
|
71 |
+
self._xyz = torch.empty(0)
|
72 |
+
self._features_dc = torch.empty(0)
|
73 |
+
self._features_rest = torch.empty(0)
|
74 |
+
self._scaling = torch.empty(0)
|
75 |
+
self._rotation = torch.empty(0)
|
76 |
+
self._opacity = torch.empty(0)
|
77 |
+
self.max_radii2D = torch.empty(0)
|
78 |
+
self.xyz_gradient_accum = torch.empty(0)
|
79 |
+
self.denom = torch.empty(0)
|
80 |
+
self.optimizer = None
|
81 |
+
self.percent_dense = 0
|
82 |
+
self.spatial_lr_scale = 0
|
83 |
+
self._semantic_feature = torch.empty(0)
|
84 |
+
|
85 |
+
@property
|
86 |
+
def get_scaling(self):
|
87 |
+
return self._scaling
|
88 |
+
|
89 |
+
@property
|
90 |
+
def get_rotation(self):
|
91 |
+
return self._rotation
|
92 |
+
|
93 |
+
@property
|
94 |
+
def get_xyz(self):
|
95 |
+
return self._xyz
|
96 |
+
|
97 |
+
@property
|
98 |
+
def get_features(self):
|
99 |
+
features_dc = self._features_dc
|
100 |
+
features_rest = self._features_rest
|
101 |
+
return torch.cat((features_dc, features_rest), dim=1)
|
102 |
+
|
103 |
+
@property
|
104 |
+
def get_opacity(self):
|
105 |
+
return self._opacity
|
106 |
+
|
107 |
+
@property
|
108 |
+
def get_semantic_feature(self):
|
109 |
+
return self._semantic_feature
|
110 |
+
|
111 |
+
def construct_list_of_attributes(self):
|
112 |
+
l = ['x', 'y', 'z', 'nx', 'ny', 'nz']
|
113 |
+
# All channels except the 3 DC
|
114 |
+
for i in range(self._features_dc.shape[1]*self._features_dc.shape[2]):
|
115 |
+
l.append('f_dc_{}'.format(i))
|
116 |
+
for i in range(self._features_rest.shape[1]*self._features_rest.shape[2]):
|
117 |
+
l.append('f_rest_{}'.format(i))
|
118 |
+
|
119 |
+
l.append('opacity')
|
120 |
+
for i in range(self._scaling.shape[1]):
|
121 |
+
l.append('scale_{}'.format(i))
|
122 |
+
for i in range(self._rotation.shape[1]):
|
123 |
+
l.append('rot_{}'.format(i))
|
124 |
+
# Add semantic features
|
125 |
+
for i in range(self._semantic_feature.shape[1]*self._semantic_feature.shape[2]):
|
126 |
+
l.append('semantic_{}'.format(i))
|
127 |
+
return l
|
128 |
+
|
129 |
+
@staticmethod
|
130 |
+
def from_predictions(pred, sh_degree):
|
131 |
+
gaussians = GaussianModel(sh_degree=sh_degree)
|
132 |
+
gaussians._xyz = pred['means']
|
133 |
+
gaussians._features_dc = pred['sh_coeffs'][:, :1] # N, 1, d_sh
|
134 |
+
gaussians._features_rest = pred['sh_coeffs'][:, 1:] # N, d_sh-1, d_sh
|
135 |
+
gaussians._opacity = pred['opacities'] # N, 1
|
136 |
+
gaussians._scaling = pred['scales'] # N, 3, 3
|
137 |
+
gaussians._rotation = pred['rotations'] # N, 4
|
138 |
+
gaussians._semantic_feature = pred['gs_feats'][:, None, :] # N, 1, d_feats
|
139 |
+
return gaussians
|
140 |
+
|
141 |
+
def save_ply(self, path):
|
142 |
+
mkdir_p(os.path.dirname(path))
|
143 |
+
|
144 |
+
xyz = self._xyz.detach().cpu().numpy()
|
145 |
+
normals = np.zeros_like(xyz)
|
146 |
+
f_dc = self._features_dc.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy()
|
147 |
+
f_rest = self._features_rest.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy()
|
148 |
+
opacities = inverse_sigmoid(self._opacity).detach().cpu().numpy()
|
149 |
+
scale = torch.log(self._scaling).detach().cpu().numpy()
|
150 |
+
rotation = self._rotation.detach().cpu().numpy()
|
151 |
+
semantic_feature = self._semantic_feature.detach().transpose(1, 2).flatten(start_dim=1).contiguous().cpu().numpy()
|
152 |
+
|
153 |
+
dtype_full = [(attribute, 'f4') for attribute in self.construct_list_of_attributes()]
|
154 |
+
|
155 |
+
elements = np.empty(xyz.shape[0], dtype=dtype_full)
|
156 |
+
attributes = np.concatenate((xyz, normals, f_dc, f_rest, opacities, scale, rotation, semantic_feature), axis=1)
|
157 |
+
# attributes = np.concatenate((xyz, normals, f_dc, f_rest, opacities, scale, rotation), axis=1)
|
158 |
+
elements[:] = list(map(tuple, attributes))
|
159 |
+
el = PlyElement.describe(elements, 'vertex')
|
160 |
+
PlyData([el]).write(path)
|
src/utils/graphics_utils.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright (C) 2023, Inria
|
3 |
+
# GRAPHDECO research group, https://team.inria.fr/graphdeco
|
4 |
+
# All rights reserved.
|
5 |
+
#
|
6 |
+
# This software is free for non-commercial, research and evaluation use
|
7 |
+
# under the terms of the LICENSE.md file.
|
8 |
+
#
|
9 |
+
# For inquiries contact [email protected]
|
10 |
+
#
|
11 |
+
|
12 |
+
import torch
|
13 |
+
import math
|
14 |
+
import numpy as np
|
15 |
+
from typing import NamedTuple
|
16 |
+
|
17 |
+
class BasicPointCloud(NamedTuple):
|
18 |
+
points : np.array
|
19 |
+
colors : np.array
|
20 |
+
normals : np.array
|
21 |
+
|
22 |
+
def geom_transform_points(points, transf_matrix):
|
23 |
+
P, _ = points.shape
|
24 |
+
ones = torch.ones(P, 1, dtype=points.dtype, device=points.device)
|
25 |
+
points_hom = torch.cat([points, ones], dim=1)
|
26 |
+
points_out = torch.matmul(points_hom, transf_matrix.unsqueeze(0))
|
27 |
+
|
28 |
+
denom = points_out[..., 3:] + 0.0000001
|
29 |
+
return (points_out[..., :3] / denom).squeeze(dim=0)
|
30 |
+
|
31 |
+
def getWorld2View(R, t):
|
32 |
+
Rt = np.zeros((4, 4))
|
33 |
+
Rt[:3, :3] = R.transpose()
|
34 |
+
Rt[:3, 3] = t
|
35 |
+
Rt[3, 3] = 1.0
|
36 |
+
return np.float32(Rt)
|
37 |
+
|
38 |
+
def getWorld2View2(R, t, translate=np.array([.0, .0, .0]), scale=1.0):
|
39 |
+
Rt = np.zeros((4, 4))
|
40 |
+
Rt[:3, :3] = R.transpose()
|
41 |
+
Rt[:3, 3] = t
|
42 |
+
Rt[3, 3] = 1.0
|
43 |
+
|
44 |
+
C2W = np.linalg.inv(Rt)
|
45 |
+
cam_center = C2W[:3, 3]
|
46 |
+
cam_center = (cam_center + translate) * scale
|
47 |
+
C2W[:3, 3] = cam_center
|
48 |
+
Rt = np.linalg.inv(C2W)
|
49 |
+
return np.float32(Rt)
|
50 |
+
|
51 |
+
def getProjectionMatrix(znear, zfar, fovX, fovY):
|
52 |
+
tanHalfFovY = math.tan((fovY / 2))
|
53 |
+
tanHalfFovX = math.tan((fovX / 2))
|
54 |
+
|
55 |
+
top = tanHalfFovY * znear
|
56 |
+
bottom = -top
|
57 |
+
right = tanHalfFovX * znear
|
58 |
+
left = -right
|
59 |
+
|
60 |
+
P = torch.zeros(4, 4)
|
61 |
+
|
62 |
+
z_sign = 1.0
|
63 |
+
|
64 |
+
P[0, 0] = 2.0 * znear / (right - left)
|
65 |
+
P[1, 1] = 2.0 * znear / (top - bottom)
|
66 |
+
P[0, 2] = (right + left) / (right - left)
|
67 |
+
P[1, 2] = (top + bottom) / (top - bottom)
|
68 |
+
P[3, 2] = z_sign
|
69 |
+
P[2, 2] = z_sign * zfar / (zfar - znear)
|
70 |
+
P[2, 3] = -(zfar * znear) / (zfar - znear)
|
71 |
+
return P
|
72 |
+
|
73 |
+
def fov2focal(fov, pixels):
|
74 |
+
return pixels / (2 * math.tan(fov / 2))
|
75 |
+
|
76 |
+
def focal2fov(focal, pixels):
|
77 |
+
return 2*math.atan(pixels/(2*focal))
|
src/utils/points_process.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from einops import rearrange
|
3 |
+
|
4 |
+
# merge points from two views and add color information
|
5 |
+
def merge_points(mast3r_output, view1, view2, grid_size=0.01):
|
6 |
+
# get points from mast3r_output
|
7 |
+
points1 = mast3r_output[0]['pts3d'].detach() # B, H, W, 3
|
8 |
+
points2 = mast3r_output[1]['pts3d_in_other_view'].detach() # B, H, W, 3
|
9 |
+
shape = points1.shape
|
10 |
+
# add color information
|
11 |
+
colors = torch.stack([view1['img'], view2['img']], dim=1) # B, V, 3, H, W
|
12 |
+
colors = rearrange(colors, 'b v c h w -> b (v h w) c') # B, V * H * W, 3
|
13 |
+
# merge points
|
14 |
+
points = torch.stack([points1, points2], dim=1) # B, V, H, W, 3
|
15 |
+
points = rearrange(points, 'b v h w c -> b (v h w) c') # B, V * H * W, 3
|
16 |
+
B, N, _ = points.shape
|
17 |
+
offset = torch.arange(1, B + 1, device=points.device) * N
|
18 |
+
# Center and normalize points
|
19 |
+
center = torch.mean(points, dim=1, keepdim=True)
|
20 |
+
points = points - center
|
21 |
+
scale = torch.max(torch.norm(points, dim=2, keepdim=True), dim=1, keepdim=True)[0]
|
22 |
+
points = points / scale
|
23 |
+
# concat points and colors
|
24 |
+
feat = torch.cat([points, colors], dim=-1) # B, V * H * W, 6
|
25 |
+
|
26 |
+
data_dict = {
|
27 |
+
'coord': rearrange(points, 'b n c -> (b n) c'),
|
28 |
+
'color': rearrange(colors, 'b n c -> (b n) c'),
|
29 |
+
'feat': rearrange(feat, 'b n c -> (b n) c'),
|
30 |
+
'offset': offset,
|
31 |
+
'grid_size': grid_size,
|
32 |
+
'center': center,
|
33 |
+
'scale': scale,
|
34 |
+
'shape': shape,
|
35 |
+
}
|
36 |
+
|
37 |
+
return data_dict
|
src/utils/sh_utils.py
ADDED
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2021 The PlenOctree Authors.
|
2 |
+
# Redistribution and use in source and binary forms, with or without
|
3 |
+
# modification, are permitted provided that the following conditions are met:
|
4 |
+
#
|
5 |
+
# 1. Redistributions of source code must retain the above copyright notice,
|
6 |
+
# this list of conditions and the following disclaimer.
|
7 |
+
#
|
8 |
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
9 |
+
# this list of conditions and the following disclaimer in the documentation
|
10 |
+
# and/or other materials provided with the distribution.
|
11 |
+
#
|
12 |
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
13 |
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
14 |
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
15 |
+
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
16 |
+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17 |
+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
18 |
+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
19 |
+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
20 |
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
21 |
+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
22 |
+
# POSSIBILITY OF SUCH DAMAGE.
|
23 |
+
|
24 |
+
|
25 |
+
C0 = 0.28209479177387814
|
26 |
+
C1 = 0.4886025119029199
|
27 |
+
C2 = [
|
28 |
+
1.0925484305920792,
|
29 |
+
-1.0925484305920792,
|
30 |
+
0.31539156525252005,
|
31 |
+
-1.0925484305920792,
|
32 |
+
0.5462742152960396
|
33 |
+
]
|
34 |
+
C3 = [
|
35 |
+
-0.5900435899266435,
|
36 |
+
2.890611442640554,
|
37 |
+
-0.4570457994644658,
|
38 |
+
0.3731763325901154,
|
39 |
+
-0.4570457994644658,
|
40 |
+
1.445305721320277,
|
41 |
+
-0.5900435899266435
|
42 |
+
]
|
43 |
+
C4 = [
|
44 |
+
2.5033429417967046,
|
45 |
+
-1.7701307697799304,
|
46 |
+
0.9461746957575601,
|
47 |
+
-0.6690465435572892,
|
48 |
+
0.10578554691520431,
|
49 |
+
-0.6690465435572892,
|
50 |
+
0.47308734787878004,
|
51 |
+
-1.7701307697799304,
|
52 |
+
0.6258357354491761,
|
53 |
+
]
|
54 |
+
|
55 |
+
|
56 |
+
def eval_sh(deg, sh, dirs):
|
57 |
+
"""
|
58 |
+
Evaluate spherical harmonics at unit directions
|
59 |
+
using hardcoded SH polynomials.
|
60 |
+
Works with torch/np/jnp.
|
61 |
+
... Can be 0 or more batch dimensions.
|
62 |
+
Args:
|
63 |
+
deg: int SH deg. Currently, 0-3 supported
|
64 |
+
sh: jnp.ndarray SH coeffs [..., C, (deg + 1) ** 2]
|
65 |
+
dirs: jnp.ndarray unit directions [..., 3]
|
66 |
+
Returns:
|
67 |
+
[..., C]
|
68 |
+
"""
|
69 |
+
assert deg <= 4 and deg >= 0
|
70 |
+
coeff = (deg + 1) ** 2
|
71 |
+
assert sh.shape[-1] >= coeff
|
72 |
+
|
73 |
+
result = C0 * sh[..., 0]
|
74 |
+
if deg > 0:
|
75 |
+
x, y, z = dirs[..., 0:1], dirs[..., 1:2], dirs[..., 2:3]
|
76 |
+
result = (result -
|
77 |
+
C1 * y * sh[..., 1] +
|
78 |
+
C1 * z * sh[..., 2] -
|
79 |
+
C1 * x * sh[..., 3])
|
80 |
+
|
81 |
+
if deg > 1:
|
82 |
+
xx, yy, zz = x * x, y * y, z * z
|
83 |
+
xy, yz, xz = x * y, y * z, x * z
|
84 |
+
result = (result +
|
85 |
+
C2[0] * xy * sh[..., 4] +
|
86 |
+
C2[1] * yz * sh[..., 5] +
|
87 |
+
C2[2] * (2.0 * zz - xx - yy) * sh[..., 6] +
|
88 |
+
C2[3] * xz * sh[..., 7] +
|
89 |
+
C2[4] * (xx - yy) * sh[..., 8])
|
90 |
+
|
91 |
+
if deg > 2:
|
92 |
+
result = (result +
|
93 |
+
C3[0] * y * (3 * xx - yy) * sh[..., 9] +
|
94 |
+
C3[1] * xy * z * sh[..., 10] +
|
95 |
+
C3[2] * y * (4 * zz - xx - yy)* sh[..., 11] +
|
96 |
+
C3[3] * z * (2 * zz - 3 * xx - 3 * yy) * sh[..., 12] +
|
97 |
+
C3[4] * x * (4 * zz - xx - yy) * sh[..., 13] +
|
98 |
+
C3[5] * z * (xx - yy) * sh[..., 14] +
|
99 |
+
C3[6] * x * (xx - 3 * yy) * sh[..., 15])
|
100 |
+
|
101 |
+
if deg > 3:
|
102 |
+
result = (result + C4[0] * xy * (xx - yy) * sh[..., 16] +
|
103 |
+
C4[1] * yz * (3 * xx - yy) * sh[..., 17] +
|
104 |
+
C4[2] * xy * (7 * zz - 1) * sh[..., 18] +
|
105 |
+
C4[3] * yz * (7 * zz - 3) * sh[..., 19] +
|
106 |
+
C4[4] * (zz * (35 * zz - 30) + 3) * sh[..., 20] +
|
107 |
+
C4[5] * xz * (7 * zz - 3) * sh[..., 21] +
|
108 |
+
C4[6] * (xx - yy) * (7 * zz - 1) * sh[..., 22] +
|
109 |
+
C4[7] * xz * (xx - 3 * yy) * sh[..., 23] +
|
110 |
+
C4[8] * (xx * (xx - 3 * yy) - yy * (3 * xx - yy)) * sh[..., 24])
|
111 |
+
return result
|
112 |
+
|
113 |
+
def RGB2SH(rgb):
|
114 |
+
return (rgb - 0.5) / C0
|
115 |
+
|
116 |
+
def SH2RGB(sh):
|
117 |
+
return sh * C0 + 0.5
|
src/utils/visualization_utils.py
ADDED
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import os
|
3 |
+
import numpy as np
|
4 |
+
import scipy.interpolate
|
5 |
+
import PIL
|
6 |
+
import torch
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from sklearn.preprocessing import StandardScaler
|
9 |
+
from sklearn.decomposition import PCA
|
10 |
+
import moviepy.editor as mpy
|
11 |
+
|
12 |
+
sys.path.append('submodules/mast3r/dust3r')
|
13 |
+
from dust3r.utils.image import heif_support_enabled, exif_transpose, _resize_pil_image, ImgNorm
|
14 |
+
from dust3r.image_pairs import make_pairs
|
15 |
+
from dust3r.inference import inference
|
16 |
+
from dust3r.cloud_opt import global_aligner, GlobalAlignerMode
|
17 |
+
|
18 |
+
sys.path.append('.')
|
19 |
+
from src.utils.cuda_splatting import render, DummyPipeline
|
20 |
+
from src.utils.gaussian_model import GaussianModel
|
21 |
+
from src.utils.camera_utils import get_scaled_camera
|
22 |
+
from src.losses import merge_and_split_predictions
|
23 |
+
from src.utils.camera_utils import move_c2w_along_z
|
24 |
+
|
25 |
+
from einops import rearrange
|
26 |
+
LABELS = ['wall', 'floor', 'ceiling', 'chair', 'table', 'sofa', 'bed', 'other']
|
27 |
+
NUM_LABELS = len(LABELS) + 1
|
28 |
+
PALLETE = plt.cm.get_cmap('tab10', NUM_LABELS)
|
29 |
+
COLORS_LIST = [PALLETE(i)[:3] for i in range(NUM_LABELS)]
|
30 |
+
COLORS = torch.tensor(COLORS_LIST, dtype=torch.float32)
|
31 |
+
|
32 |
+
def load_images(folder_or_list, size, square_ok=False, verbose=True, save_dir=None):
|
33 |
+
""" open and convert all images in a list or folder to proper input format for DUSt3R
|
34 |
+
"""
|
35 |
+
if isinstance(folder_or_list, str):
|
36 |
+
if verbose:
|
37 |
+
print(f'>> Loading images from {folder_or_list}')
|
38 |
+
root, folder_content = folder_or_list, sorted(os.listdir(folder_or_list))
|
39 |
+
|
40 |
+
elif isinstance(folder_or_list, list):
|
41 |
+
if verbose:
|
42 |
+
print(f'>> Loading a list of {len(folder_or_list)} images')
|
43 |
+
root, folder_content = '', folder_or_list
|
44 |
+
|
45 |
+
else:
|
46 |
+
raise ValueError(f'bad {folder_or_list=} ({type(folder_or_list)})')
|
47 |
+
|
48 |
+
supported_images_extensions = ['.jpg', '.jpeg', '.png']
|
49 |
+
if heif_support_enabled:
|
50 |
+
supported_images_extensions += ['.heic', '.heif']
|
51 |
+
supported_images_extensions = tuple(supported_images_extensions)
|
52 |
+
|
53 |
+
imgs = []
|
54 |
+
for path in folder_content:
|
55 |
+
if not path.lower().endswith(supported_images_extensions):
|
56 |
+
continue
|
57 |
+
img = exif_transpose(PIL.Image.open(os.path.join(root, path))).convert('RGB')
|
58 |
+
W1, H1 = img.size
|
59 |
+
if size == 224:
|
60 |
+
# resize short side to 224 (then crop)
|
61 |
+
img = _resize_pil_image(img, round(size * max(W1/H1, H1/W1)))
|
62 |
+
else:
|
63 |
+
# resize long side to 512
|
64 |
+
img = _resize_pil_image(img, size)
|
65 |
+
W, H = img.size
|
66 |
+
cx, cy = W//2, H//2
|
67 |
+
if size == 224:
|
68 |
+
half = min(cx, cy)
|
69 |
+
img = img.crop((cx-half, cy-half, cx+half, cy+half))
|
70 |
+
else:
|
71 |
+
halfw, halfh = ((2*cx)//32)*16, ((2*cy)//32)*16
|
72 |
+
if not (square_ok) and W == H:
|
73 |
+
halfh = 3*halfw/4
|
74 |
+
img = img.crop((cx-halfw, cy-halfh, cx+halfw, cy+halfh))
|
75 |
+
|
76 |
+
W2, H2 = img.size
|
77 |
+
if verbose:
|
78 |
+
print(f' - adding {path} with resolution {W1}x{H1} --> {W2}x{H2}')
|
79 |
+
|
80 |
+
# Save the processed image if save_dir is provided
|
81 |
+
if save_dir:
|
82 |
+
os.makedirs(save_dir, exist_ok=True)
|
83 |
+
save_path = os.path.join(save_dir, f"processed_{len(imgs):03d}.png")
|
84 |
+
img.save(save_path)
|
85 |
+
if verbose:
|
86 |
+
print(f' - saved processed image to {save_path}')
|
87 |
+
|
88 |
+
imgs.append(dict(img=ImgNorm(img)[None], true_shape=np.int32(
|
89 |
+
[img.size[::-1]]), idx=len(imgs), instance=str(len(imgs))))
|
90 |
+
|
91 |
+
assert imgs, 'no images foud at '+root
|
92 |
+
if verbose:
|
93 |
+
print(f' (Found {len(imgs)} images)')
|
94 |
+
return imgs
|
95 |
+
|
96 |
+
def normalize(x):
|
97 |
+
"""Normalization helper function."""
|
98 |
+
return x / np.linalg.norm(x)
|
99 |
+
|
100 |
+
def viewmatrix(lookdir, up, position):
|
101 |
+
"""Construct lookat view matrix."""
|
102 |
+
vec2 = normalize(lookdir)
|
103 |
+
vec0 = normalize(np.cross(up, vec2))
|
104 |
+
vec1 = normalize(np.cross(vec2, vec0))
|
105 |
+
m = np.stack([vec0, vec1, vec2, position], axis=1)
|
106 |
+
return m
|
107 |
+
|
108 |
+
def poses_to_points(poses, dist):
|
109 |
+
"""Converts from pose matrices to (position, lookat, up) format."""
|
110 |
+
pos = poses[:, :3, -1]
|
111 |
+
lookat = poses[:, :3, -1] - dist * poses[:, :3, 2]
|
112 |
+
up = poses[:, :3, -1] + dist * poses[:, :3, 1]
|
113 |
+
return np.stack([pos, lookat, up], 1)
|
114 |
+
|
115 |
+
def points_to_poses(points):
|
116 |
+
"""Converts from (position, lookat, up) format to pose matrices."""
|
117 |
+
return np.array([viewmatrix(p - l, u - p, p) for p, l, u in points])
|
118 |
+
|
119 |
+
def interp(points, n, k, s):
|
120 |
+
"""Runs multidimensional B-spline interpolation on the input points."""
|
121 |
+
sh = points.shape
|
122 |
+
pts = np.reshape(points, (sh[0], -1))
|
123 |
+
k = min(k, sh[0] - 1)
|
124 |
+
tck, _ = scipy.interpolate.splprep(pts.T, k=k, s=s)
|
125 |
+
u = np.linspace(0, 1, n, endpoint=False)
|
126 |
+
new_points = np.array(scipy.interpolate.splev(u, tck))
|
127 |
+
new_points = np.reshape(new_points.T, (n, sh[1], sh[2]))
|
128 |
+
return new_points
|
129 |
+
|
130 |
+
def generate_interpolated_path(poses, n_interp, spline_degree=5,
|
131 |
+
smoothness=.03, rot_weight=.1):
|
132 |
+
"""Creates a smooth spline path between input keyframe camera poses.
|
133 |
+
|
134 |
+
Spline is calculated with poses in format (position, lookat-point, up-point).
|
135 |
+
|
136 |
+
Args:
|
137 |
+
poses: (n, 3, 4) array of input pose keyframes.
|
138 |
+
n_interp: returned path will have n_interp * (n - 1) total poses.
|
139 |
+
spline_degree: polynomial degree of B-spline.
|
140 |
+
smoothness: parameter for spline smoothing, 0 forces exact interpolation.
|
141 |
+
rot_weight: relative weighting of rotation/translation in spline solve.
|
142 |
+
|
143 |
+
Returns:
|
144 |
+
Array of new camera poses with shape (n_interp * (n - 1), 3, 4).
|
145 |
+
"""
|
146 |
+
|
147 |
+
points = poses_to_points(poses, dist=rot_weight)
|
148 |
+
new_points = interp(points,
|
149 |
+
n_interp * (points.shape[0] - 1),
|
150 |
+
k=spline_degree,
|
151 |
+
s=smoothness)
|
152 |
+
return points_to_poses(new_points)
|
153 |
+
|
154 |
+
def batch_visualize_tensor_global_pca(tensor_batch, num_components=3):
|
155 |
+
B, C, H, W = tensor_batch.shape
|
156 |
+
|
157 |
+
tensor_flat_all = tensor_batch.reshape(B, C, -1).permute(1, 0, 2).reshape(C, -1).T
|
158 |
+
|
159 |
+
tensor_flat_all_np = tensor_flat_all.cpu().numpy()
|
160 |
+
|
161 |
+
scaler = StandardScaler()
|
162 |
+
tensor_flat_all_np = scaler.fit_transform(tensor_flat_all_np)
|
163 |
+
|
164 |
+
pca = PCA(n_components=num_components)
|
165 |
+
tensor_reduced_all_np = pca.fit_transform(tensor_flat_all_np)
|
166 |
+
|
167 |
+
tensor_reduced_all = torch.tensor(tensor_reduced_all_np, dtype=tensor_batch.dtype).T.reshape(num_components, B, H * W).permute(1, 0, 2)
|
168 |
+
|
169 |
+
output_tensor = torch.zeros((B, 3, H, W))
|
170 |
+
|
171 |
+
for i in range(B):
|
172 |
+
tensor_reduced = tensor_reduced_all[i].reshape(num_components, H, W)
|
173 |
+
tensor_reduced -= tensor_reduced.min()
|
174 |
+
tensor_reduced /= tensor_reduced.max()
|
175 |
+
output_tensor[i] = tensor_reduced[:3]
|
176 |
+
|
177 |
+
return output_tensor
|
178 |
+
|
179 |
+
def depth_to_colormap(depth_tensor, colormap='jet'):
|
180 |
+
B, _, _, _ = depth_tensor.shape
|
181 |
+
|
182 |
+
depth_tensor = (depth_tensor - depth_tensor.min()) / (depth_tensor.max() - depth_tensor.min())
|
183 |
+
|
184 |
+
depth_np = depth_tensor.squeeze(1).cpu().numpy()
|
185 |
+
|
186 |
+
cmap = plt.get_cmap(colormap)
|
187 |
+
colored_images = []
|
188 |
+
|
189 |
+
for i in range(B):
|
190 |
+
colored_image = cmap(depth_np[i])
|
191 |
+
colored_images.append(colored_image[..., :3])
|
192 |
+
|
193 |
+
colored_tensor = torch.tensor(np.array(colored_images), dtype=torch.float32).permute(0, 3, 1, 2)
|
194 |
+
|
195 |
+
return colored_tensor
|
196 |
+
|
197 |
+
def save_video(frames, video_path, fps=24):
|
198 |
+
clips = [mpy.ImageClip(frame).set_duration(1/fps) for frame in frames]
|
199 |
+
video = mpy.concatenate_videoclips(clips, method="compose")
|
200 |
+
video.write_videofile(video_path, fps=fps)
|
201 |
+
|
202 |
+
def tensors_to_videos(all_images, all_depth_vis, all_fmap_vis, all_sems_vis, video_dir='videos', fps=24):
|
203 |
+
B, C, H, W = all_images.shape
|
204 |
+
assert all_depth_vis.shape == (B, C, H, W)
|
205 |
+
assert all_fmap_vis.shape == (B, C, H, W)
|
206 |
+
assert all_sems_vis.shape == (B, C, H, W)
|
207 |
+
os.makedirs(video_dir, exist_ok=True)
|
208 |
+
|
209 |
+
all_images = (all_images.permute(0, 2, 3, 1).cpu().numpy() * 255).astype(np.uint8)
|
210 |
+
all_depth_vis = (all_depth_vis.permute(0, 2, 3, 1).cpu().numpy() * 255).astype(np.uint8)
|
211 |
+
all_fmap_vis = (all_fmap_vis.permute(0, 2, 3, 1).cpu().numpy() * 255).astype(np.uint8)
|
212 |
+
all_sems_vis = (all_sems_vis.permute(0, 2, 3, 1).cpu().numpy() * 255).astype(np.uint8)
|
213 |
+
|
214 |
+
save_video(all_images, os.path.join(video_dir, 'output_images_video.mp4'), fps=fps)
|
215 |
+
save_video(all_depth_vis, os.path.join(video_dir, 'output_depth_video.mp4'), fps=fps)
|
216 |
+
save_video(all_fmap_vis, os.path.join(video_dir, 'output_fmap_video.mp4'), fps=fps)
|
217 |
+
# save_video(all_sems_vis, os.path.join(video_dir, 'output_sems_video.mp4'), fps=fps)
|
218 |
+
|
219 |
+
print(f'Videos saved to {video_dir}')
|
220 |
+
|
221 |
+
def transfer_images_to_device(images, device):
|
222 |
+
"""
|
223 |
+
Transfer the loaded images to the specified device.
|
224 |
+
|
225 |
+
Args:
|
226 |
+
images (list): List of dictionaries containing image data.
|
227 |
+
device (str or torch.device): The device to transfer the data to.
|
228 |
+
|
229 |
+
Returns:
|
230 |
+
list: List of dictionaries with image data transferred to the specified device.
|
231 |
+
"""
|
232 |
+
transferred_images = []
|
233 |
+
for img_dict in images:
|
234 |
+
transferred_dict = {
|
235 |
+
'img': img_dict['img'].to(device),
|
236 |
+
'true_shape': torch.tensor(img_dict['true_shape'], device=device),
|
237 |
+
'idx': img_dict['idx'],
|
238 |
+
'instance': img_dict['instance']
|
239 |
+
}
|
240 |
+
transferred_images.append(transferred_dict)
|
241 |
+
return transferred_images
|
242 |
+
|
243 |
+
def render_camera_path(video_poses, camera_params, gaussians, model, device, pipeline, bg_color, image_shape):
|
244 |
+
"""渲染相机路径的帮助函数
|
245 |
+
|
246 |
+
Args:
|
247 |
+
video_poses: 相机位姿列表
|
248 |
+
camera_params: 包含extrinsics和intrinsics的相机参数
|
249 |
+
gaussians: 高斯模型
|
250 |
+
model: 特征提取模型
|
251 |
+
device: 计算设备
|
252 |
+
pipeline: 渲染管线
|
253 |
+
bg_color: 背景颜色
|
254 |
+
image_shape: 图像尺寸
|
255 |
+
|
256 |
+
Returns:
|
257 |
+
rendered_images: 渲染的图像
|
258 |
+
rendered_feats: 渲染的特征图
|
259 |
+
rendered_depths: 渲染的深度图
|
260 |
+
rendered_sems: 渲染的语义图
|
261 |
+
"""
|
262 |
+
extrinsics, intrinsics = camera_params
|
263 |
+
rendered_images = []
|
264 |
+
rendered_feats = []
|
265 |
+
rendered_depths = []
|
266 |
+
rendered_sems = []
|
267 |
+
|
268 |
+
for i in range(len(video_poses)):
|
269 |
+
target_extrinsics = torch.zeros(4, 4).to(device)
|
270 |
+
target_extrinsics[3, 3] = 1.0
|
271 |
+
target_extrinsics[:3, :4] = torch.tensor(video_poses[i], device=device)
|
272 |
+
camera = get_scaled_camera(extrinsics[0], target_extrinsics, intrinsics[0], 1.0, image_shape)
|
273 |
+
|
274 |
+
rendered_output = render(camera, gaussians, pipeline, bg_color)
|
275 |
+
rendered_images.append(rendered_output['render'])
|
276 |
+
|
277 |
+
# 处理特征图
|
278 |
+
feature_map = rendered_output['feature_map']
|
279 |
+
feature_map = model.feature_expansion(feature_map[None, ...])
|
280 |
+
|
281 |
+
# 处理语义图
|
282 |
+
logits = model.lseg_feature_extractor.decode_feature(feature_map, labelset=LABELS)
|
283 |
+
semantic_map = torch.argmax(logits, dim=1) + 1
|
284 |
+
mask = COLORS[semantic_map.cpu()]
|
285 |
+
mask = rearrange(mask, 'b h w c -> b c h w')
|
286 |
+
rendered_sems.append(mask.squeeze(0))
|
287 |
+
|
288 |
+
# 降采样并上采样特征图
|
289 |
+
feature_map = feature_map[:, ::16, ...]
|
290 |
+
feature_map = torch.nn.functional.interpolate(feature_map, scale_factor=2, mode='bilinear', align_corners=True)
|
291 |
+
rendered_feats.append(feature_map[0])
|
292 |
+
del feature_map
|
293 |
+
|
294 |
+
rendered_depths.append(rendered_output['depth'])
|
295 |
+
|
296 |
+
# 堆叠并处理结果
|
297 |
+
rendered_images = torch.clamp(torch.stack(rendered_images, dim=0), 0, 1)
|
298 |
+
rendered_feats = torch.stack(rendered_feats, dim=0)
|
299 |
+
rendered_depths = torch.stack(rendered_depths, dim=0)
|
300 |
+
rendered_sems = torch.stack(rendered_sems, dim=0)
|
301 |
+
|
302 |
+
return rendered_images, rendered_feats, rendered_depths, rendered_sems
|
303 |
+
|
304 |
+
@torch.no_grad()
|
305 |
+
def render_video_from_file(file_list, model, output_path, device='cuda', resolution=224, n_interp=90, fps=30, path_type='default'):
|
306 |
+
# 1. load images
|
307 |
+
images = load_images(file_list, resolution, save_dir=os.path.join(output_path, 'processed_images'))
|
308 |
+
images = transfer_images_to_device(images, device) # Transfer images to the specified device
|
309 |
+
image_shape = images[0]['true_shape'][0]
|
310 |
+
# 2. get camera pose
|
311 |
+
pairs = make_pairs(images, prefilter=None, symmetrize=True)
|
312 |
+
output = inference(pairs, model.mast3r, device, batch_size=1)
|
313 |
+
mode = GlobalAlignerMode.PairViewer
|
314 |
+
scene = global_aligner(output, device=device, mode=mode)
|
315 |
+
extrinsics = scene.get_im_poses()
|
316 |
+
intrinsics = scene.get_intrinsics()
|
317 |
+
video_poses = generate_interpolated_path(extrinsics[:, :3, :].cpu().numpy(), n_interp=n_interp) # extrinsics: (b, 3, 4)
|
318 |
+
# 3. get gaussians
|
319 |
+
pred1, pred2 = model(*images)
|
320 |
+
pred = merge_and_split_predictions(pred1, pred2)
|
321 |
+
gaussians = GaussianModel.from_predictions(pred[0], sh_degree=3)
|
322 |
+
# 4. 渲染原始视角
|
323 |
+
pipeline = DummyPipeline()
|
324 |
+
bg_color = torch.tensor([0.0, 0.0, 0.0]).to(device)
|
325 |
+
camera_params = (extrinsics, intrinsics)
|
326 |
+
|
327 |
+
rendered_images, rendered_feats, rendered_depths, rendered_sems = render_camera_path(
|
328 |
+
video_poses, camera_params, gaussians, model, device, pipeline, bg_color, image_shape)
|
329 |
+
|
330 |
+
# 5. 可视化
|
331 |
+
all_fmap_vis = batch_visualize_tensor_global_pca(rendered_feats)
|
332 |
+
all_depth_vis = depth_to_colormap(rendered_depths)
|
333 |
+
all_sems_vis = rendered_sems
|
334 |
+
|
335 |
+
# 6. 保存视频和高斯点云
|
336 |
+
tensors_to_videos(rendered_images, all_depth_vis, all_fmap_vis, all_sems_vis, output_path, fps=fps)
|
337 |
+
gaussians.save_ply(os.path.join(output_path, 'gaussians.ply'))
|
338 |
+
|
339 |
+
# 7. 渲染移动后的视角
|
340 |
+
moved_extrinsics = move_c2w_along_z(extrinsics, 2.0)
|
341 |
+
moved_video_poses = generate_interpolated_path(moved_extrinsics[:, :3, :].cpu().numpy(), n_interp=n_interp)
|
342 |
+
camera_params = (extrinsics, intrinsics)
|
343 |
+
|
344 |
+
moved_rendered_images, moved_rendered_feats, moved_rendered_depths, moved_rendered_sems = render_camera_path(
|
345 |
+
moved_video_poses, camera_params, gaussians, model, device, pipeline, bg_color, image_shape)
|
346 |
+
|
347 |
+
# 8. 可视化和保存移动后的结果
|
348 |
+
moved_all_fmap_vis = batch_visualize_tensor_global_pca(moved_rendered_feats)
|
349 |
+
moved_all_depth_vis = depth_to_colormap(moved_rendered_depths)
|
350 |
+
moved_all_sems_vis = moved_rendered_sems
|
351 |
+
|
352 |
+
moved_output_path = os.path.join(output_path, 'moved')
|
353 |
+
os.makedirs(moved_output_path, exist_ok=True)
|
354 |
+
tensors_to_videos(moved_rendered_images, moved_all_depth_vis, moved_all_fmap_vis, moved_all_sems_vis,
|
355 |
+
moved_output_path, fps=fps)
|
submodules/PointTransformerV3/.gitmodules
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "Pointcept"]
|
2 |
+
path = Pointcept
|
3 |
+
url = https://github.com/Pointcept/Pointcept
|
submodules/PointTransformerV3/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Pointcept
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
submodules/PointTransformerV3/Pointcept/.github/workflows/formatter.yml
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Formatter
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch:
|
5 |
+
push:
|
6 |
+
branches:
|
7 |
+
- main
|
8 |
+
pull_request:
|
9 |
+
types: [opened, reopened, synchronize]
|
10 |
+
|
11 |
+
concurrency:
|
12 |
+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
13 |
+
cancel-in-progress: true
|
14 |
+
|
15 |
+
jobs:
|
16 |
+
formatter:
|
17 |
+
runs-on: ubuntu-latest
|
18 |
+
steps:
|
19 |
+
- uses: actions/checkout@v3
|
20 |
+
- uses: psf/black@stable
|
submodules/PointTransformerV3/Pointcept/.gitignore
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
image/
|
2 |
+
__pycache__
|
3 |
+
**/build/
|
4 |
+
**/*.egg-info/
|
5 |
+
**/dist/
|
6 |
+
*.so
|
7 |
+
exp
|
8 |
+
weights
|
9 |
+
data
|
10 |
+
log
|
11 |
+
outputs/
|
12 |
+
.vscode
|
13 |
+
.idea
|
14 |
+
*/.DS_Store
|
15 |
+
**/*.out
|
16 |
+
Dockerfile
|
submodules/PointTransformerV3/Pointcept/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2023 Pointcept
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
submodules/PointTransformerV3/Pointcept/README.md
ADDED
@@ -0,0 +1,896 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<p align="center">
|
2 |
+
<!-- pypi-strip -->
|
3 |
+
<picture>
|
4 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo_dark.png">
|
5 |
+
<source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo.png">
|
6 |
+
<!-- /pypi-strip -->
|
7 |
+
<img alt="pointcept" src="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/logo.png" width="400">
|
8 |
+
<!-- pypi-strip -->
|
9 |
+
</picture><br>
|
10 |
+
<!-- /pypi-strip -->
|
11 |
+
</p>
|
12 |
+
|
13 |
+
[](https://github.com/pointcept/pointcept/actions/workflows/formatter.yml)
|
14 |
+
|
15 |
+
**Pointcept** is a powerful and flexible codebase for point cloud perception research. It is also an official implementation of the following paper:
|
16 |
+
- **Point Transformer V3: Simpler, Faster, Stronger**
|
17 |
+
*Xiaoyang Wu, Li Jiang, Peng-Shuai Wang, Zhijian Liu, Xihui Liu, Yu Qiao, Wanli Ouyang, Tong He, Hengshuang Zhao*
|
18 |
+
IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024 - Oral
|
19 |
+
[ Backbone ] [PTv3] - [ [arXiv](https://arxiv.org/abs/2312.10035) ] [ [Bib](https://xywu.me/research/ptv3/bib.txt) ] [ [Project](https://github.com/Pointcept/PointTransformerV3) ] → [here](https://github.com/Pointcept/PointTransformerV3)
|
20 |
+
|
21 |
+
- **OA-CNNs: Omni-Adaptive Sparse CNNs for 3D Semantic Segmentation**
|
22 |
+
*Bohao Peng, Xiaoyang Wu, Li Jiang, Yukang Chen, Hengshuang Zhao, Zhuotao Tian, Jiaya Jia*
|
23 |
+
IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024
|
24 |
+
[ Backbone ] [ OA-CNNs ] - [ [arXiv](https://arxiv.org/abs/2403.14418) ] [ [Bib](https://xywu.me/research/oacnns/bib.txt) ] → [here](#oa-cnns)
|
25 |
+
|
26 |
+
- **PonderV2: Pave the Way for 3D Foundation Model with A Universal Pre-training Paradigm**
|
27 |
+
*Haoyi Zhu\*, Honghui Yang\*, Xiaoyang Wu\*, Di Huang\*, Sha Zhang, Xianglong He, Tong He, Hengshuang Zhao, Chunhua Shen, Yu Qiao, Wanli Ouyang*
|
28 |
+
arXiv Preprint 2023
|
29 |
+
[ Pretrain ] [PonderV2] - [ [arXiv](https://arxiv.org/abs/2310.08586) ] [ [Bib](https://xywu.me/research/ponderv2/bib.txt) ] [ [Project](https://github.com/OpenGVLab/PonderV2) ] → [here](https://github.com/OpenGVLab/PonderV2)
|
30 |
+
|
31 |
+
|
32 |
+
- **Towards Large-scale 3D Representation Learning with Multi-dataset Point Prompt Training**
|
33 |
+
*Xiaoyang Wu, Zhuotao Tian, Xin Wen, Bohao Peng, Xihui Liu, Kaicheng Yu, Hengshuang Zhao*
|
34 |
+
IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2024
|
35 |
+
[ Pretrain ] [PPT] - [ [arXiv](https://arxiv.org/abs/2308.09718) ] [ [Bib](https://xywu.me/research/ppt/bib.txt) ] → [here](#point-prompt-training-ppt)
|
36 |
+
|
37 |
+
- **Masked Scene Contrast: A Scalable Framework for Unsupervised 3D Representation Learning**
|
38 |
+
*Xiaoyang Wu, Xin Wen, Xihui Liu, Hengshuang Zhao*
|
39 |
+
IEEE Conference on Computer Vision and Pattern Recognition (**CVPR**) 2023
|
40 |
+
[ Pretrain ] [ MSC ] - [ [arXiv](https://arxiv.org/abs/2303.14191) ] [ [Bib](https://xywu.me/research/msc/bib.txt) ] → [here](#masked-scene-contrast-msc)
|
41 |
+
|
42 |
+
|
43 |
+
- **Learning Context-aware Classifier for Semantic Segmentation** (3D Part)
|
44 |
+
*Zhuotao Tian, Jiequan Cui, Li Jiang, Xiaojuan Qi, Xin Lai, Yixin Chen, Shu Liu, Jiaya Jia*
|
45 |
+
AAAI Conference on Artificial Intelligence (**AAAI**) 2023 - Oral
|
46 |
+
[ SemSeg ] [ CAC ] - [ [arXiv](https://arxiv.org/abs/2303.11633) ] [ [Bib](https://xywu.me/research/cac/bib.txt) ] [ [2D Part](https://github.com/tianzhuotao/CAC) ] → [here](#context-aware-classifier)
|
47 |
+
|
48 |
+
|
49 |
+
- **Point Transformer V2: Grouped Vector Attention and Partition-based Pooling**
|
50 |
+
*Xiaoyang Wu, Yixing Lao, Li Jiang, Xihui Liu, Hengshuang Zhao*
|
51 |
+
Conference on Neural Information Processing Systems (**NeurIPS**) 2022
|
52 |
+
[ Backbone ] [ PTv2 ] - [ [arXiv](https://arxiv.org/abs/2210.05666) ] [ [Bib](https://xywu.me/research/ptv2/bib.txt) ] → [here](#point-transformers)
|
53 |
+
|
54 |
+
|
55 |
+
- **Point Transformer**
|
56 |
+
*Hengshuang Zhao, Li Jiang, Jiaya Jia, Philip Torr, Vladlen Koltun*
|
57 |
+
IEEE International Conference on Computer Vision (**ICCV**) 2021 - Oral
|
58 |
+
[ Backbone ] [ PTv1 ] - [ [arXiv](https://arxiv.org/abs/2012.09164) ] [ [Bib](https://hszhao.github.io/papers/iccv21_pointtransformer_bib.txt) ] → [here](#point-transformers)
|
59 |
+
|
60 |
+
Additionally, **Pointcept** integrates the following excellent work (contain above):
|
61 |
+
Backbone:
|
62 |
+
[MinkUNet](https://github.com/NVIDIA/MinkowskiEngine) ([here](#sparseunet)),
|
63 |
+
[SpUNet](https://github.com/traveller59/spconv) ([here](#sparseunet)),
|
64 |
+
[SPVCNN](https://github.com/mit-han-lab/spvnas) ([here](#spvcnn)),
|
65 |
+
[OACNNs](https://arxiv.org/abs/2403.14418) ([here](#oa-cnns)),
|
66 |
+
[PTv1](https://arxiv.org/abs/2012.09164) ([here](#point-transformers)),
|
67 |
+
[PTv2](https://arxiv.org/abs/2210.05666) ([here](#point-transformers)),
|
68 |
+
[PTv3](https://arxiv.org/abs/2312.10035) ([here](#point-transformers)),
|
69 |
+
[StratifiedFormer](https://github.com/dvlab-research/Stratified-Transformer) ([here](#stratified-transformer)),
|
70 |
+
[OctFormer](https://github.com/octree-nn/octformer) ([here](#octformer)),
|
71 |
+
[Swin3D](https://github.com/microsoft/Swin3D) ([here](#swin3d));
|
72 |
+
Semantic Segmentation:
|
73 |
+
[Mix3d](https://github.com/kumuji/mix3d) ([here](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-spunet-v1m1-0-base.py#L5)),
|
74 |
+
[CAC](https://arxiv.org/abs/2303.11633) ([here](#context-aware-classifier));
|
75 |
+
Instance Segmentation:
|
76 |
+
[PointGroup](https://github.com/dvlab-research/PointGroup) ([here](#pointgroup));
|
77 |
+
Pre-training:
|
78 |
+
[PointContrast](https://github.com/facebookresearch/PointContrast) ([here](#pointcontrast)),
|
79 |
+
[Contrastive Scene Contexts](https://github.com/facebookresearch/ContrastiveSceneContexts) ([here](#contrastive-scene-contexts)),
|
80 |
+
[Masked Scene Contrast](https://arxiv.org/abs/2303.14191) ([here](#masked-scene-contrast-msc)),
|
81 |
+
[Point Prompt Training](https://arxiv.org/abs/2308.09718) ([here](#point-prompt-training-ppt));
|
82 |
+
Datasets:
|
83 |
+
[ScanNet](http://www.scan-net.org/) ([here](#scannet-v2)),
|
84 |
+
[ScanNet200](http://www.scan-net.org/) ([here](#scannet-v2)),
|
85 |
+
[ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) ([here](#scannet)),
|
86 |
+
[S3DIS](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1) ([here](#s3dis)),
|
87 |
+
[Matterport3D](https://niessner.github.io/Matterport/) ([here](#matterport3d)),
|
88 |
+
[ArkitScene](https://github.com/apple/ARKitScenes),
|
89 |
+
[Structured3D](https://structured3d-dataset.org/) ([here](#structured3d)),
|
90 |
+
[SemanticKITTI](http://www.semantic-kitti.org/) ([here](#semantickitti)),
|
91 |
+
[nuScenes](https://www.nuscenes.org/nuscenes) ([here](#nuscenes)),
|
92 |
+
[ModelNet40](https://modelnet.cs.princeton.edu/) ([here](#modelnet)),
|
93 |
+
[Waymo](https://waymo.com/open/) ([here](#waymo)).
|
94 |
+
|
95 |
+
|
96 |
+
## Highlights
|
97 |
+
- *May, 2024*: In v1.5.2, we redesigned the default structure for each dataset for better performance. Please **re-preprocess** datasets or **download** our preprocessed datasets from **[here](https://huggingface.co/Pointcept)**.
|
98 |
+
- *Apr, 2024*: **PTv3** is selected as one of the 90 **Oral** papers (3.3% accepted papers, 0.78% submissions) by CVPR'24!
|
99 |
+
- *Mar, 2024*: We release code for **OA-CNNs**, accepted by CVPR'24. Issue related to **OA-CNNs** can @Pbihao.
|
100 |
+
- *Feb, 2024*: **PTv3** and **PPT** are accepted by CVPR'24, another **two** papers by our Pointcept team have also been accepted by CVPR'24 🎉🎉🎉. We will make them publicly available soon!
|
101 |
+
- *Dec, 2023*: **PTv3** is released on arXiv, and the code is available in Pointcept. PTv3 is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios.
|
102 |
+
- *Aug, 2023*: **PPT** is released on arXiv. PPT presents a multi-dataset pre-training framework that achieves SOTA performance in both **indoor** and **outdoor** scenarios. It is compatible with various existing pre-training frameworks and backbones. A **pre-release** version of the code is accessible; for those interested, please feel free to contact me directly for access.
|
103 |
+
- *Mar, 2023*: We released our codebase, **Pointcept**, a highly potent tool for point cloud representation learning and perception. We welcome new work to join the _Pointcept_ family and highly recommend reading [Quick Start](#quick-start) before starting your trail.
|
104 |
+
- *Feb, 2023*: **MSC** and **CeCo** accepted by CVPR 2023. _MSC_ is a highly efficient and effective pretraining framework that facilitates cross-dataset large-scale pretraining, while _CeCo_ is a segmentation method specifically designed for long-tail datasets. Both approaches are compatible with all existing backbone models in our codebase, and we will soon make the code available for public use.
|
105 |
+
- *Jan, 2023*: **CAC**, oral work of AAAI 2023, has expanded its 3D result with the incorporation of Pointcept. This addition will allow CAC to serve as a pluggable segmentor within our codebase.
|
106 |
+
- *Sep, 2022*: **PTv2** accepted by NeurIPS 2022. It is a continuation of the Point Transformer. The proposed GVA theory can apply to most existing attention mechanisms, while Grid Pooling is also a practical addition to existing pooling methods.
|
107 |
+
|
108 |
+
## Citation
|
109 |
+
If you find _Pointcept_ useful to your research, please cite our work as encouragement. (੭ˊ꒳ˋ)੭✧
|
110 |
+
```
|
111 |
+
@misc{pointcept2023,
|
112 |
+
title={Pointcept: A Codebase for Point Cloud Perception Research},
|
113 |
+
author={Pointcept Contributors},
|
114 |
+
howpublished = {\url{https://github.com/Pointcept/Pointcept}},
|
115 |
+
year={2023}
|
116 |
+
}
|
117 |
+
```
|
118 |
+
|
119 |
+
## Overview
|
120 |
+
|
121 |
+
- [Installation](#installation)
|
122 |
+
- [Data Preparation](#data-preparation)
|
123 |
+
- [Quick Start](#quick-start)
|
124 |
+
- [Model Zoo](#model-zoo)
|
125 |
+
- [Citation](#citation)
|
126 |
+
- [Acknowledgement](#acknowledgement)
|
127 |
+
|
128 |
+
## Installation
|
129 |
+
|
130 |
+
### Requirements
|
131 |
+
- Ubuntu: 18.04 and above.
|
132 |
+
- CUDA: 11.3 and above.
|
133 |
+
- PyTorch: 1.10.0 and above.
|
134 |
+
|
135 |
+
### Conda Environment
|
136 |
+
|
137 |
+
```bash
|
138 |
+
conda create -n pointcept python=3.8 -y
|
139 |
+
conda activate pointcept
|
140 |
+
conda install ninja -y
|
141 |
+
# Choose version you want here: https://pytorch.org/get-started/previous-versions/
|
142 |
+
conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch -y
|
143 |
+
conda install h5py pyyaml -c anaconda -y
|
144 |
+
conda install sharedarray tensorboard tensorboardx yapf addict einops scipy plyfile termcolor timm -c conda-forge -y
|
145 |
+
conda install pytorch-cluster pytorch-scatter pytorch-sparse -c pyg -y
|
146 |
+
pip install torch-geometric
|
147 |
+
|
148 |
+
# spconv (SparseUNet)
|
149 |
+
# refer https://github.com/traveller59/spconv
|
150 |
+
pip install spconv-cu113
|
151 |
+
|
152 |
+
# PPT (clip)
|
153 |
+
pip install ftfy regex tqdm
|
154 |
+
pip install git+https://github.com/openai/CLIP.git
|
155 |
+
|
156 |
+
# PTv1 & PTv2 or precise eval
|
157 |
+
cd libs/pointops
|
158 |
+
# usual
|
159 |
+
python setup.py install
|
160 |
+
# docker & multi GPU arch
|
161 |
+
TORCH_CUDA_ARCH_LIST="ARCH LIST" python setup.py install
|
162 |
+
# e.g. 7.5: RTX 3000; 8.0: a100 More available in: https://developer.nvidia.com/cuda-gpus
|
163 |
+
TORCH_CUDA_ARCH_LIST="7.5 8.0" python setup.py install
|
164 |
+
cd ../..
|
165 |
+
|
166 |
+
# Open3D (visualization, optional)
|
167 |
+
pip install open3d
|
168 |
+
```
|
169 |
+
|
170 |
+
## Data Preparation
|
171 |
+
|
172 |
+
### ScanNet v2
|
173 |
+
|
174 |
+
The preprocessing supports semantic and instance segmentation for both `ScanNet20`, `ScanNet200`, and `ScanNet Data Efficient`.
|
175 |
+
- Download the [ScanNet](http://www.scan-net.org/) v2 dataset.
|
176 |
+
- Run preprocessing code for raw ScanNet as follows:
|
177 |
+
|
178 |
+
```bash
|
179 |
+
# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset.
|
180 |
+
# PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset (output dir).
|
181 |
+
python pointcept/datasets/preprocessing/scannet/preprocess_scannet.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_DIR}
|
182 |
+
```
|
183 |
+
- (Optional) Download ScanNet Data Efficient files:
|
184 |
+
```bash
|
185 |
+
# download-scannet.py is the official download script
|
186 |
+
# or follow instructions here: https://kaldir.vc.in.tum.de/scannet_benchmark/data_efficient/documentation#download
|
187 |
+
python download-scannet.py --data_efficient -o ${RAW_SCANNET_DIR}
|
188 |
+
# unzip downloads
|
189 |
+
cd ${RAW_SCANNET_DIR}/tasks
|
190 |
+
unzip limited-annotation-points.zip
|
191 |
+
unzip limited-reconstruction-scenes.zip
|
192 |
+
# copy files to processed dataset folder
|
193 |
+
mkdir ${PROCESSED_SCANNET_DIR}/tasks
|
194 |
+
cp -r ${RAW_SCANNET_DIR}/tasks/points ${PROCESSED_SCANNET_DIR}/tasks
|
195 |
+
cp -r ${RAW_SCANNET_DIR}/tasks/scenes ${PROCESSED_SCANNET_DIR}/tasks
|
196 |
+
```
|
197 |
+
- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannet-compressed)], please agree the official license before download it.
|
198 |
+
|
199 |
+
- Link processed dataset to codebase:
|
200 |
+
```bash
|
201 |
+
# PROCESSED_SCANNET_DIR: the directory of the processed ScanNet dataset.
|
202 |
+
mkdir data
|
203 |
+
ln -s ${PROCESSED_SCANNET_DIR} ${CODEBASE_DIR}/data/scannet
|
204 |
+
```
|
205 |
+
|
206 |
+
### ScanNet++
|
207 |
+
- Download the [ScanNet++](https://kaldir.vc.in.tum.de/scannetpp/) dataset.
|
208 |
+
- Run preprocessing code for raw ScanNet++ as follows:
|
209 |
+
```bash
|
210 |
+
# RAW_SCANNETPP_DIR: the directory of downloaded ScanNet++ raw dataset.
|
211 |
+
# PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir).
|
212 |
+
# NUM_WORKERS: the number of workers for parallel preprocessing.
|
213 |
+
python pointcept/datasets/preprocessing/scannetpp/preprocess_scannetpp.py --dataset_root ${RAW_SCANNETPP_DIR} --output_root ${PROCESSED_SCANNETPP_DIR} --num_workers ${NUM_WORKERS}
|
214 |
+
```
|
215 |
+
- Sampling and chunking large point cloud data in train/val split as follows (only used for training):
|
216 |
+
```bash
|
217 |
+
# PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet++ dataset (output dir).
|
218 |
+
# NUM_WORKERS: the number of workers for parallel preprocessing.
|
219 |
+
python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split train --num_workers ${NUM_WORKERS}
|
220 |
+
python pointcept/datasets/preprocessing/sampling_chunking_data.py --dataset_root ${PROCESSED_SCANNETPP_DIR} --grid_size 0.01 --chunk_range 6 6 --chunk_stride 3 3 --split val --num_workers ${NUM_WORKERS}
|
221 |
+
```
|
222 |
+
- (Alternative) Our preprocess data can be directly downloaded [[here](https://huggingface.co/datasets/Pointcept/scannetpp-compressed)], please agree the official license before download it.
|
223 |
+
- Link processed dataset to codebase:
|
224 |
+
```bash
|
225 |
+
# PROCESSED_SCANNETPP_DIR: the directory of the processed ScanNet dataset.
|
226 |
+
mkdir data
|
227 |
+
ln -s ${PROCESSED_SCANNETPP_DIR} ${CODEBASE_DIR}/data/scannetpp
|
228 |
+
```
|
229 |
+
|
230 |
+
### S3DIS
|
231 |
+
|
232 |
+
- Download S3DIS data by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLScDimvNMCGhy_rmBA2gHfDu3naktRm6A8BPwAWWDv-Uhm6Shw/viewform?c=0&w=1). Download the `Stanford3dDataset_v1.2.zip` file and unzip it.
|
233 |
+
- Fix error in `Area_5/office_19/Annotations/ceiling` Line 323474 (103.0�0000 => 103.000000).
|
234 |
+
- (Optional) Download Full 2D-3D S3DIS dataset (no XYZ) from [here](https://github.com/alexsax/2D-3D-Semantics) for parsing normal.
|
235 |
+
- Run preprocessing code for S3DIS as follows:
|
236 |
+
|
237 |
+
```bash
|
238 |
+
# S3DIS_DIR: the directory of downloaded Stanford3dDataset_v1.2 dataset.
|
239 |
+
# RAW_S3DIS_DIR: the directory of Stanford2d3dDataset_noXYZ dataset. (optional, for parsing normal)
|
240 |
+
# PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset (output dir).
|
241 |
+
|
242 |
+
# S3DIS without aligned angle
|
243 |
+
python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR}
|
244 |
+
# S3DIS with aligned angle
|
245 |
+
python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --align_angle
|
246 |
+
# S3DIS with normal vector (recommended, normal is helpful)
|
247 |
+
python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --parse_normal
|
248 |
+
python pointcept/datasets/preprocessing/s3dis/preprocess_s3dis.py --dataset_root ${S3DIS_DIR} --output_root ${PROCESSED_S3DIS_DIR} --raw_root ${RAW_S3DIS_DIR} --align_angle --parse_normal
|
249 |
+
```
|
250 |
+
|
251 |
+
- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/s3dis-compressed
|
252 |
+
)] (with normal vector and aligned angle), please agree with the official license before downloading it.
|
253 |
+
|
254 |
+
- Link processed dataset to codebase.
|
255 |
+
```bash
|
256 |
+
# PROCESSED_S3DIS_DIR: the directory of processed S3DIS dataset.
|
257 |
+
mkdir data
|
258 |
+
ln -s ${PROCESSED_S3DIS_DIR} ${CODEBASE_DIR}/data/s3dis
|
259 |
+
```
|
260 |
+
### Structured3D
|
261 |
+
|
262 |
+
- Download Structured3D panorama related and perspective (full) related zip files by filling this [Google form](https://docs.google.com/forms/d/e/1FAIpQLSc0qtvh4vHSoZaW6UvlXYy79MbcGdZfICjh4_t4bYofQIVIdw/viewform?pli=1) (no need to unzip them).
|
263 |
+
- Organize all downloaded zip file in one folder (`${STRUCT3D_DIR}`).
|
264 |
+
- Run preprocessing code for Structured3D as follows:
|
265 |
+
```bash
|
266 |
+
# STRUCT3D_DIR: the directory of downloaded Structured3D dataset.
|
267 |
+
# PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir).
|
268 |
+
# NUM_WORKERS: Number for workers for preprocessing, default same as cpu count (might OOM).
|
269 |
+
export PYTHONPATH=./
|
270 |
+
python pointcept/datasets/preprocessing/structured3d/preprocess_structured3d.py --dataset_root ${STRUCT3D_DIR} --output_root ${PROCESSED_STRUCT3D_DIR} --num_workers ${NUM_WORKERS} --grid_size 0.01 --fuse_prsp --fuse_pano
|
271 |
+
```
|
272 |
+
Following the instruction of [Swin3D](https://arxiv.org/abs/2304.06906), we keep 25 categories with frequencies of more than 0.001, out of the original 40 categories.
|
273 |
+
|
274 |
+
[//]: # (- (Alternative) Our preprocess data can also be downloaded [[here]()], please agree the official license before download it.)
|
275 |
+
|
276 |
+
- (Alternative) Our preprocess data can also be downloaded [[here](https://huggingface.co/datasets/Pointcept/structured3d-compressed
|
277 |
+
)] (with perspective views and panorama view, 471.7G after unzipping), please agree the official license before download it.
|
278 |
+
|
279 |
+
- Link processed dataset to codebase.
|
280 |
+
```bash
|
281 |
+
# PROCESSED_STRUCT3D_DIR: the directory of processed Structured3D dataset (output dir).
|
282 |
+
mkdir data
|
283 |
+
ln -s ${PROCESSED_STRUCT3D_DIR} ${CODEBASE_DIR}/data/structured3d
|
284 |
+
```
|
285 |
+
### Matterport3D
|
286 |
+
- Follow [this page](https://niessner.github.io/Matterport/#download) to request access to the dataset.
|
287 |
+
- Download the "region_segmentation" type, which represents the division of a scene into individual rooms.
|
288 |
+
```bash
|
289 |
+
# download-mp.py is the official download script
|
290 |
+
# MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
|
291 |
+
python download-mp.py -o {MATTERPORT3D_DIR} --type region_segmentations
|
292 |
+
```
|
293 |
+
- Unzip the region_segmentations data
|
294 |
+
```bash
|
295 |
+
# MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
|
296 |
+
python pointcept/datasets/preprocessing/matterport3d/unzip_matterport3d_region_segmentation.py --dataset_root {MATTERPORT3D_DIR}
|
297 |
+
```
|
298 |
+
- Run preprocessing code for Matterport3D as follows:
|
299 |
+
```bash
|
300 |
+
# MATTERPORT3D_DIR: the directory of downloaded Matterport3D dataset.
|
301 |
+
# PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir).
|
302 |
+
# NUM_WORKERS: the number of workers for this preprocessing.
|
303 |
+
python pointcept/datasets/preprocessing/matterport3d/preprocess_matterport3d_mesh.py --dataset_root ${MATTERPORT3D_DIR} --output_root ${PROCESSED_MATTERPORT3D_DIR} --num_workers ${NUM_WORKERS}
|
304 |
+
```
|
305 |
+
- Link processed dataset to codebase.
|
306 |
+
```bash
|
307 |
+
# PROCESSED_MATTERPORT3D_DIR: the directory of processed Matterport3D dataset (output dir).
|
308 |
+
mkdir data
|
309 |
+
ln -s ${PROCESSED_MATTERPORT3D_DIR} ${CODEBASE_DIR}/data/matterport3d
|
310 |
+
```
|
311 |
+
|
312 |
+
Following the instruction of [OpenRooms](https://github.com/ViLab-UCSD/OpenRooms), we remapped Matterport3D's categories to ScanNet 20 semantic categories with the addition of a ceiling category.
|
313 |
+
* (Alternative) Our preprocess data can also be downloaded [here](https://huggingface.co/datasets/Pointcept/matterport3d-compressed), please agree the official license before download it.
|
314 |
+
|
315 |
+
### SemanticKITTI
|
316 |
+
- Download [SemanticKITTI](http://www.semantic-kitti.org/dataset.html#download) dataset.
|
317 |
+
- Link dataset to codebase.
|
318 |
+
```bash
|
319 |
+
# SEMANTIC_KITTI_DIR: the directory of SemanticKITTI dataset.
|
320 |
+
# |- SEMANTIC_KITTI_DIR
|
321 |
+
# |- dataset
|
322 |
+
# |- sequences
|
323 |
+
# |- 00
|
324 |
+
# |- 01
|
325 |
+
# |- ...
|
326 |
+
|
327 |
+
mkdir -p data
|
328 |
+
ln -s ${SEMANTIC_KITTI_DIR} ${CODEBASE_DIR}/data/semantic_kitti
|
329 |
+
```
|
330 |
+
|
331 |
+
### nuScenes
|
332 |
+
- Download the official [NuScene](https://www.nuscenes.org/nuscenes#download) dataset (with Lidar Segmentation) and organize the downloaded files as follows:
|
333 |
+
```bash
|
334 |
+
NUSCENES_DIR
|
335 |
+
│── samples
|
336 |
+
│── sweeps
|
337 |
+
│── lidarseg
|
338 |
+
...
|
339 |
+
│── v1.0-trainval
|
340 |
+
│── v1.0-test
|
341 |
+
```
|
342 |
+
- Run information preprocessing code (modified from OpenPCDet) for nuScenes as follows:
|
343 |
+
```bash
|
344 |
+
# NUSCENES_DIR: the directory of downloaded nuScenes dataset.
|
345 |
+
# PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
|
346 |
+
# MAX_SWEEPS: Max number of sweeps. Default: 10.
|
347 |
+
pip install nuscenes-devkit pyquaternion
|
348 |
+
python pointcept/datasets/preprocessing/nuscenes/preprocess_nuscenes_info.py --dataset_root ${NUSCENES_DIR} --output_root ${PROCESSED_NUSCENES_DIR} --max_sweeps ${MAX_SWEEPS} --with_camera
|
349 |
+
```
|
350 |
+
- (Alternative) Our preprocess nuScenes information data can also be downloaded [[here](
|
351 |
+
https://huggingface.co/datasets/Pointcept/nuscenes-compressed)] (only processed information, still need to download raw dataset and link to the folder), please agree the official license before download it.
|
352 |
+
|
353 |
+
- Link raw dataset to processed NuScene dataset folder:
|
354 |
+
```bash
|
355 |
+
# NUSCENES_DIR: the directory of downloaded nuScenes dataset.
|
356 |
+
# PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
|
357 |
+
ln -s ${NUSCENES_DIR} {PROCESSED_NUSCENES_DIR}/raw
|
358 |
+
```
|
359 |
+
then the processed nuscenes folder is organized as follows:
|
360 |
+
```bash
|
361 |
+
nuscene
|
362 |
+
|── raw
|
363 |
+
│── samples
|
364 |
+
│── sweeps
|
365 |
+
│── lidarseg
|
366 |
+
...
|
367 |
+
│── v1.0-trainval
|
368 |
+
│── v1.0-test
|
369 |
+
|── info
|
370 |
+
```
|
371 |
+
|
372 |
+
- Link processed dataset to codebase.
|
373 |
+
```bash
|
374 |
+
# PROCESSED_NUSCENES_DIR: the directory of processed nuScenes dataset (output dir).
|
375 |
+
mkdir data
|
376 |
+
ln -s ${PROCESSED_NUSCENES_DIR} ${CODEBASE_DIR}/data/nuscenes
|
377 |
+
```
|
378 |
+
|
379 |
+
### Waymo
|
380 |
+
- Download the official [Waymo](https://waymo.com/open/download/) dataset (v1.4.3) and organize the downloaded files as follows:
|
381 |
+
```bash
|
382 |
+
WAYMO_RAW_DIR
|
383 |
+
│── training
|
384 |
+
│── validation
|
385 |
+
│── testing
|
386 |
+
```
|
387 |
+
- Install the following dependence:
|
388 |
+
```bash
|
389 |
+
# If shows "No matching distribution found", download whl directly from Pypi and install the package.
|
390 |
+
conda create -n waymo python=3.10 -y
|
391 |
+
conda activate waymo
|
392 |
+
pip install waymo-open-dataset-tf-2-12-0
|
393 |
+
```
|
394 |
+
- Run the preprocessing code as follows:
|
395 |
+
```bash
|
396 |
+
# WAYMO_DIR: the directory of the downloaded Waymo dataset.
|
397 |
+
# PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir).
|
398 |
+
# NUM_WORKERS: num workers for preprocessing
|
399 |
+
python pointcept/datasets/preprocessing/waymo/preprocess_waymo.py --dataset_root ${WAYMO_DIR} --output_root ${PROCESSED_WAYMO_DIR} --splits training validation --num_workers ${NUM_WORKERS}
|
400 |
+
```
|
401 |
+
|
402 |
+
- Link processed dataset to the codebase.
|
403 |
+
```bash
|
404 |
+
# PROCESSED_WAYMO_DIR: the directory of the processed Waymo dataset (output dir).
|
405 |
+
mkdir data
|
406 |
+
ln -s ${PROCESSED_WAYMO_DIR} ${CODEBASE_DIR}/data/waymo
|
407 |
+
```
|
408 |
+
|
409 |
+
### ModelNet
|
410 |
+
- Download [modelnet40_normal_resampled.zip](https://shapenet.cs.stanford.edu/media/modelnet40_normal_resampled.zip) and unzip
|
411 |
+
- Link dataset to the codebase.
|
412 |
+
```bash
|
413 |
+
mkdir -p data
|
414 |
+
ln -s ${MODELNET_DIR} ${CODEBASE_DIR}/data/modelnet40_normal_resampled
|
415 |
+
```
|
416 |
+
|
417 |
+
## Quick Start
|
418 |
+
|
419 |
+
### Training
|
420 |
+
**Train from scratch.** The training processing is based on configs in `configs` folder.
|
421 |
+
The training script will generate an experiment folder in `exp` folder and backup essential code in the experiment folder.
|
422 |
+
Training config, log, tensorboard, and checkpoints will also be saved into the experiment folder during the training process.
|
423 |
+
```bash
|
424 |
+
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
|
425 |
+
# Script (Recommended)
|
426 |
+
sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME}
|
427 |
+
# Direct
|
428 |
+
export PYTHONPATH=./
|
429 |
+
python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH}
|
430 |
+
```
|
431 |
+
|
432 |
+
For example:
|
433 |
+
```bash
|
434 |
+
# By script (Recommended)
|
435 |
+
# -p is default set as python and can be ignored
|
436 |
+
sh scripts/train.sh -p python -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
437 |
+
# Direct
|
438 |
+
export PYTHONPATH=./
|
439 |
+
python tools/train.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base
|
440 |
+
```
|
441 |
+
**Resume training from checkpoint.** If the training process is interrupted by accident, the following script can resume training from a given checkpoint.
|
442 |
+
```bash
|
443 |
+
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}
|
444 |
+
# Script (Recommended)
|
445 |
+
# simply add "-r true"
|
446 |
+
sh scripts/train.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -c ${CONFIG_NAME} -n ${EXP_NAME} -r true
|
447 |
+
# Direct
|
448 |
+
export PYTHONPATH=./
|
449 |
+
python tools/train.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} resume=True weight=${CHECKPOINT_PATH}
|
450 |
+
```
|
451 |
+
|
452 |
+
### Testing
|
453 |
+
During training, model evaluation is performed on point clouds after grid sampling (voxelization), providing an initial assessment of model performance. However, to obtain precise evaluation results, testing is **essential**. The testing process involves subsampling a dense point cloud into a sequence of voxelized point clouds, ensuring comprehensive coverage of all points. These sub-results are then predicted and collected to form a complete prediction of the entire point cloud. This approach yields higher evaluation results compared to simply mapping/interpolating the prediction. In addition, our testing code supports TTA (test time augmentation) testing, which further enhances the stability of evaluation performance.
|
454 |
+
|
455 |
+
```bash
|
456 |
+
# By script (Based on experiment folder created by training script)
|
457 |
+
sh scripts/test.sh -p ${INTERPRETER_PATH} -g ${NUM_GPU} -d ${DATASET_NAME} -n ${EXP_NAME} -w ${CHECKPOINT_NAME}
|
458 |
+
# Direct
|
459 |
+
export PYTHONPATH=./
|
460 |
+
python tools/test.py --config-file ${CONFIG_PATH} --num-gpus ${NUM_GPU} --options save_path=${SAVE_PATH} weight=${CHECKPOINT_PATH}
|
461 |
+
```
|
462 |
+
For example:
|
463 |
+
```bash
|
464 |
+
# By script (Based on experiment folder created by training script)
|
465 |
+
# -p is default set as python and can be ignored
|
466 |
+
# -w is default set as model_best and can be ignored
|
467 |
+
sh scripts/test.sh -p python -d scannet -n semseg-pt-v2m2-0-base -w model_best
|
468 |
+
# Direct
|
469 |
+
export PYTHONPATH=./
|
470 |
+
python tools/test.py --config-file configs/scannet/semseg-pt-v2m2-0-base.py --options save_path=exp/scannet/semseg-pt-v2m2-0-base weight=exp/scannet/semseg-pt-v2m2-0-base/model/model_best.pth
|
471 |
+
```
|
472 |
+
|
473 |
+
The TTA can be disabled by replace `data.test.test_cfg.aug_transform = [...]` with:
|
474 |
+
|
475 |
+
```python
|
476 |
+
data = dict(
|
477 |
+
train = dict(...),
|
478 |
+
val = dict(...),
|
479 |
+
test = dict(
|
480 |
+
...,
|
481 |
+
test_cfg = dict(
|
482 |
+
...,
|
483 |
+
aug_transform = [
|
484 |
+
[dict(type="RandomRotateTargetAngle", angle=[0], axis="z", center=[0, 0, 0], p=1)]
|
485 |
+
]
|
486 |
+
)
|
487 |
+
)
|
488 |
+
)
|
489 |
+
```
|
490 |
+
|
491 |
+
### Offset
|
492 |
+
`Offset` is the separator of point clouds in batch data, and it is similar to the concept of `Batch` in PyG.
|
493 |
+
A visual illustration of batch and offset is as follows:
|
494 |
+
<p align="center">
|
495 |
+
<!-- pypi-strip -->
|
496 |
+
<picture>
|
497 |
+
<source media="(prefers-color-scheme: dark)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset_dark.png">
|
498 |
+
<source media="(prefers-color-scheme: light)" srcset="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset.png">
|
499 |
+
<!-- /pypi-strip -->
|
500 |
+
<img alt="pointcept" src="https://raw.githubusercontent.com/Pointcept/Pointcept/main/docs/offset.png" width="480">
|
501 |
+
<!-- pypi-strip -->
|
502 |
+
</picture><br>
|
503 |
+
<!-- /pypi-strip -->
|
504 |
+
</p>
|
505 |
+
|
506 |
+
## Model Zoo
|
507 |
+
### 1. Backbones and Semantic Segmentation
|
508 |
+
#### SparseUNet
|
509 |
+
|
510 |
+
_Pointcept_ provides `SparseUNet` implemented by `SpConv` and `MinkowskiEngine`. The SpConv version is recommended since SpConv is easy to install and faster than MinkowskiEngine. Meanwhile, SpConv is also widely applied in outdoor perception.
|
511 |
+
|
512 |
+
- **SpConv (recommend)**
|
513 |
+
|
514 |
+
The SpConv version `SparseUNet` in the codebase was fully rewrite from `MinkowskiEngine` version, example running script is as follows:
|
515 |
+
|
516 |
+
```bash
|
517 |
+
# ScanNet val
|
518 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
519 |
+
# ScanNet200
|
520 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
521 |
+
# S3DIS
|
522 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
523 |
+
# S3DIS (with normal)
|
524 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-spunet-v1m1-0-cn-base -n semseg-spunet-v1m1-0-cn-base
|
525 |
+
# SemanticKITTI
|
526 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
527 |
+
# nuScenes
|
528 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
529 |
+
# ModelNet40
|
530 |
+
sh scripts/train.sh -g 2 -d modelnet40 -c cls-spunet-v1m1-0-base -n cls-spunet-v1m1-0-base
|
531 |
+
|
532 |
+
# ScanNet Data Efficient
|
533 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la20 -n semseg-spunet-v1m1-2-efficient-la20
|
534 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la50 -n semseg-spunet-v1m1-2-efficient-la50
|
535 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la100 -n semseg-spunet-v1m1-2-efficient-la100
|
536 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-la200 -n semseg-spunet-v1m1-2-efficient-la200
|
537 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr1 -n semseg-spunet-v1m1-2-efficient-lr1
|
538 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr5 -n semseg-spunet-v1m1-2-efficient-lr5
|
539 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr10 -n semseg-spunet-v1m1-2-efficient-lr10
|
540 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-2-efficient-lr20 -n semseg-spunet-v1m1-2-efficient-lr20
|
541 |
+
|
542 |
+
# Profile model run time
|
543 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-spunet-v1m1-0-enable-profiler -n semseg-spunet-v1m1-0-enable-profiler
|
544 |
+
```
|
545 |
+
|
546 |
+
- **MinkowskiEngine**
|
547 |
+
|
548 |
+
The MinkowskiEngine version `SparseUNet` in the codebase was modified from the original MinkowskiEngine repo, and example running scripts are as follows:
|
549 |
+
1. Install MinkowskiEngine, refer https://github.com/NVIDIA/MinkowskiEngine
|
550 |
+
2. Training with the following example scripts:
|
551 |
+
```bash
|
552 |
+
# Uncomment "# from .sparse_unet import *" in "pointcept/models/__init__.py"
|
553 |
+
# Uncomment "# from .mink_unet import *" in "pointcept/models/sparse_unet/__init__.py"
|
554 |
+
# ScanNet
|
555 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
|
556 |
+
# ScanNet200
|
557 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
|
558 |
+
# S3DIS
|
559 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
|
560 |
+
# SemanticKITTI
|
561 |
+
sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-minkunet34c-0-base -n semseg-minkunet34c-0-base
|
562 |
+
```
|
563 |
+
|
564 |
+
#### OA-CNNs
|
565 |
+
Introducing Omni-Adaptive 3D CNNs (**OA-CNNs**), a family of networks that integrates a lightweight module to greatly enhance the adaptivity of sparse CNNs at minimal computational cost. Without any self-attention modules, **OA-CNNs** favorably surpass point transformers in terms of accuracy in both indoor and outdoor scenes, with much less latency and memory cost. Issue related to **OA-CNNs** can @Pbihao.
|
566 |
+
```bash
|
567 |
+
# ScanNet
|
568 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-oacnns-v1m1-0-base -n semseg-oacnns-v1m1-0-base
|
569 |
+
```
|
570 |
+
|
571 |
+
#### Point Transformers
|
572 |
+
- **PTv3**
|
573 |
+
|
574 |
+
[PTv3](https://arxiv.org/abs/2312.10035) is an efficient backbone model that achieves SOTA performances across indoor and outdoor scenarios. The full PTv3 relies on FlashAttention, while FlashAttention relies on CUDA 11.6 and above, make sure your local Pointcept environment satisfies the requirements.
|
575 |
+
|
576 |
+
If you can not upgrade your local environment to satisfy the requirements (CUDA >= 11.6), then you can disable FlashAttention by setting the model parameter `enable_flash` to `false` and reducing the `enc_patch_size` and `dec_patch_size` to a level (e.g. 128).
|
577 |
+
|
578 |
+
FlashAttention force disables RPE and forces the accuracy reduced to fp16. If you require these features, please disable `enable_flash` and adjust `enable_rpe`, `upcast_attention` and`upcast_softmax`.
|
579 |
+
|
580 |
+
Detailed instructions and experiment records (containing weights) are available on the [project repository](https://github.com/Pointcept/PointTransformerV3). Example running scripts are as follows:
|
581 |
+
```bash
|
582 |
+
# Scratched ScanNet
|
583 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
584 |
+
# PPT joint training (ScanNet + Structured3D) and evaluate in ScanNet
|
585 |
+
sh scripts/train.sh -g 8 -d scannet -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme
|
586 |
+
|
587 |
+
# Scratched ScanNet200
|
588 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
589 |
+
# Fine-tuning from PPT joint training (ScanNet + Structured3D) with ScanNet200
|
590 |
+
# PTV3_PPT_WEIGHT_PATH: Path to model weight trained by PPT multi-dataset joint training
|
591 |
+
# e.g. exp/scannet/semseg-pt-v3m1-1-ppt-extreme/model/model_best.pth
|
592 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v3m1-1-ppt-ft -n semseg-pt-v3m1-1-ppt-ft -w ${PTV3_PPT_WEIGHT_PATH}
|
593 |
+
|
594 |
+
# Scratched ScanNet++
|
595 |
+
sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
596 |
+
# Scratched ScanNet++ test
|
597 |
+
sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v3m1-1-submit -n semseg-pt-v3m1-1-submit
|
598 |
+
|
599 |
+
|
600 |
+
# Scratched S3DIS
|
601 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
602 |
+
# an example for disbale flash_attention and enable rpe.
|
603 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v3m1-1-rpe -n semseg-pt-v3m1-0-rpe
|
604 |
+
# PPT joint training (ScanNet + S3DIS + Structured3D) and evaluate in ScanNet
|
605 |
+
sh scripts/train.sh -g 8 -d s3dis -c semseg-pt-v3m1-1-ppt-extreme -n semseg-pt-v3m1-1-ppt-extreme
|
606 |
+
# S3DIS 6-fold cross validation
|
607 |
+
# 1. The default configs are evaluated on Area_5, modify the "data.train.split", "data.val.split", and "data.test.split" to make the config evaluated on Area_1 ~ Area_6 respectively.
|
608 |
+
# 2. Train and evaluate the model on each split of areas and gather result files located in "exp/s3dis/EXP_NAME/result/Area_x.pth" in one single folder, noted as RECORD_FOLDER.
|
609 |
+
# 3. Run the following script to get S3DIS 6-fold cross validation performance:
|
610 |
+
export PYTHONPATH=./
|
611 |
+
python tools/test_s3dis_6fold.py --record_root ${RECORD_FOLDER}
|
612 |
+
|
613 |
+
# Scratched nuScenes
|
614 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
615 |
+
# Scratched Waymo
|
616 |
+
sh scripts/train.sh -g 4 -d waymo -c semseg-pt-v3m1-0-base -n semseg-pt-v3m1-0-base
|
617 |
+
|
618 |
+
# More configs and exp records for PTv3 will be available soon.
|
619 |
+
```
|
620 |
+
|
621 |
+
Indoor semantic segmentation
|
622 |
+
| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record |
|
623 |
+
| :---: | :---: |:---------------:| :---: | :---: | :---: | :---: | :---: |
|
624 |
+
| PTv3 | ScanNet | ✗ | 4 | 77.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-0-base) |
|
625 |
+
| PTv3 + PPT | ScanNet | ✓ | 8 | 78.5% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet-semseg-pt-v3m1-1-ppt-extreme) |
|
626 |
+
| PTv3 | ScanNet200 | ✗ | 4 | 35.3% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/scannet200/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) |[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/scannet200-semseg-pt-v3m1-0-base)|
|
627 |
+
| PTv3 + PPT | ScanNet200 | ✓ (f.t.) | 4 | | | | |
|
628 |
+
| PTv3 | S3DIS (Area5) | ✗ | 4 | 73.6% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-0-rpe.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-0-rpe) |
|
629 |
+
| PTv3 + PPT | S3DIS (Area5) | ✓ | 8 | 75.4% | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/s3dis/semseg-pt-v3m1-1-ppt-extreme.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/s3dis-semseg-pt-v3m1-1-ppt-extreme) |
|
630 |
+
|
631 |
+
Outdoor semantic segmentation
|
632 |
+
| Model | Benchmark | Additional Data | Num GPUs | Val mIoU | Config | Tensorboard | Exp Record |
|
633 |
+
| :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
|
634 |
+
| PTv3 | nuScenes | ✗ | 4 | 80.3 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/nuscenes/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard)|[link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/nuscenes-semseg-pt-v3m1-0-base) |
|
635 |
+
| PTv3 + PPT | nuScenes | ✓ | 8 | | | | |
|
636 |
+
| PTv3 | SemanticKITTI | ✗ | 4 | | | | |
|
637 |
+
| PTv3 + PPT | SemanticKITTI | ✓ | 8 | | | | |
|
638 |
+
| PTv3 | Waymo | ✗ | 4 | 71.2 | [link](https://github.com/Pointcept/Pointcept/blob/main/configs/waymo/semseg-pt-v3m1-0-base.py) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tensorboard) | [link](https://huggingface.co/Pointcept/PointTransformerV3/tree/main/waymo-semseg-pt-v3m1-0-base) (log only) |
|
639 |
+
| PTv3 + PPT | Waymo | ✓ | 8 | | | | |
|
640 |
+
|
641 |
+
_**\*Released model weights are trained for v1.5.1, weights for v1.5.2 and later is still ongoing.**_
|
642 |
+
|
643 |
+
- **PTv2 mode2**
|
644 |
+
|
645 |
+
The original PTv2 was trained on 4 * RTX a6000 (48G memory). Even enabling AMP, the memory cost of the original PTv2 is slightly larger than 24G. Considering GPUs with 24G memory are much more accessible, I tuned the PTv2 on the latest Pointcept and made it runnable on 4 * RTX 3090 machines.
|
646 |
+
|
647 |
+
`PTv2 Mode2` enables AMP and disables _Position Encoding Multiplier_ & _Grouped Linear_. During our further research, we found that precise coordinates are not necessary for point cloud understanding (Replacing precise coordinates with grid coordinates doesn't influence the performance. Also, SparseUNet is an example). As for Grouped Linear, my implementation of Grouped Linear seems to cost more memory than the Linear layer provided by PyTorch. Benefiting from the codebase and better parameter tuning, we also relieve the overfitting problem. The reproducing performance is even better than the results reported in our paper.
|
648 |
+
|
649 |
+
Example running scripts are as follows:
|
650 |
+
|
651 |
+
```bash
|
652 |
+
# ptv2m2: PTv2 mode2, disable PEM & Grouped Linear, GPU memory cost < 24G (recommend)
|
653 |
+
# ScanNet
|
654 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
655 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-3-lovasz -n semseg-pt-v2m2-3-lovasz
|
656 |
+
|
657 |
+
# ScanNet test
|
658 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit
|
659 |
+
# ScanNet200
|
660 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
661 |
+
# ScanNet++
|
662 |
+
sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
663 |
+
# ScanNet++ test
|
664 |
+
sh scripts/train.sh -g 4 -d scannetpp -c semseg-pt-v2m2-1-submit -n semseg-pt-v2m2-1-submit
|
665 |
+
# S3DIS
|
666 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
667 |
+
# SemanticKITTI
|
668 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
669 |
+
# nuScenes
|
670 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
671 |
+
```
|
672 |
+
|
673 |
+
- **PTv2 mode1**
|
674 |
+
|
675 |
+
`PTv2 mode1` is the original PTv2 we reported in our paper, example running scripts are as follows:
|
676 |
+
|
677 |
+
```bash
|
678 |
+
# ptv2m1: PTv2 mode1, Original PTv2, GPU memory cost > 24G
|
679 |
+
# ScanNet
|
680 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
|
681 |
+
# ScanNet200
|
682 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
|
683 |
+
# S3DIS
|
684 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v2m1-0-base -n semseg-pt-v2m1-0-base
|
685 |
+
```
|
686 |
+
|
687 |
+
- **PTv1**
|
688 |
+
|
689 |
+
The original PTv1 is also available in our Pointcept codebase. I haven't run PTv1 for a long time, but I have ensured that the example running script works well.
|
690 |
+
|
691 |
+
```bash
|
692 |
+
# ScanNet
|
693 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
|
694 |
+
# ScanNet200
|
695 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
|
696 |
+
# S3DIS
|
697 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-pt-v1-0-base -n semseg-pt-v1-0-base
|
698 |
+
```
|
699 |
+
|
700 |
+
|
701 |
+
#### Stratified Transformer
|
702 |
+
1. Additional requirements:
|
703 |
+
```bash
|
704 |
+
pip install torch-points3d
|
705 |
+
# Fix dependence, caused by installing torch-points3d
|
706 |
+
pip uninstall SharedArray
|
707 |
+
pip install SharedArray==3.2.1
|
708 |
+
|
709 |
+
cd libs/pointops2
|
710 |
+
python setup.py install
|
711 |
+
cd ../..
|
712 |
+
```
|
713 |
+
2. Uncomment `# from .stratified_transformer import *` in `pointcept/models/__init__.py`.
|
714 |
+
3. Refer [Optional Installation](installation) to install dependence.
|
715 |
+
4. Training with the following example scripts:
|
716 |
+
```bash
|
717 |
+
# stv1m1: Stratified Transformer mode1, Modified from the original Stratified Transformer code.
|
718 |
+
# PTv2m2: Stratified Transformer mode2, My rewrite version (recommend).
|
719 |
+
|
720 |
+
# ScanNet
|
721 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
|
722 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-st-v1m1-0-origin -n semseg-st-v1m1-0-origin
|
723 |
+
# ScanNet200
|
724 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
|
725 |
+
# S3DIS
|
726 |
+
sh scripts/train.sh -g 4 -d s3dis -c semseg-st-v1m2-0-refined -n semseg-st-v1m2-0-refined
|
727 |
+
```
|
728 |
+
|
729 |
+
#### SPVCNN
|
730 |
+
`SPVCNN` is a baseline model of [SPVNAS](https://github.com/mit-han-lab/spvnas), it is also a practical baseline for outdoor datasets.
|
731 |
+
1. Install torchsparse:
|
732 |
+
```bash
|
733 |
+
# refer https://github.com/mit-han-lab/torchsparse
|
734 |
+
# install method without sudo apt install
|
735 |
+
conda install google-sparsehash -c bioconda
|
736 |
+
export C_INCLUDE_PATH=${CONDA_PREFIX}/include:$C_INCLUDE_PATH
|
737 |
+
export CPLUS_INCLUDE_PATH=${CONDA_PREFIX}/include:CPLUS_INCLUDE_PATH
|
738 |
+
pip install --upgrade git+https://github.com/mit-han-lab/torchsparse.git
|
739 |
+
```
|
740 |
+
2. Training with the following example scripts:
|
741 |
+
```bash
|
742 |
+
# SemanticKITTI
|
743 |
+
sh scripts/train.sh -g 2 -d semantic_kitti -c semseg-spvcnn-v1m1-0-base -n semseg-spvcnn-v1m1-0-base
|
744 |
+
```
|
745 |
+
|
746 |
+
#### OctFormer
|
747 |
+
OctFormer from _OctFormer: Octree-based Transformers for 3D Point Clouds_.
|
748 |
+
1. Additional requirements:
|
749 |
+
```bash
|
750 |
+
cd libs
|
751 |
+
git clone https://github.com/octree-nn/dwconv.git
|
752 |
+
pip install ./dwconv
|
753 |
+
pip install ocnn
|
754 |
+
```
|
755 |
+
2. Uncomment `# from .octformer import *` in `pointcept/models/__init__.py`.
|
756 |
+
2. Training with the following example scripts:
|
757 |
+
```bash
|
758 |
+
# ScanNet
|
759 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-octformer-v1m1-0-base -n semseg-octformer-v1m1-0-base
|
760 |
+
```
|
761 |
+
|
762 |
+
#### Swin3D
|
763 |
+
Swin3D from _Swin3D: A Pretrained Transformer Backbone for 3D Indoor Scene Understanding_.
|
764 |
+
1. Additional requirements:
|
765 |
+
```bash
|
766 |
+
# 1. Install MinkEngine v0.5.4, follow readme in https://github.com/NVIDIA/MinkowskiEngine;
|
767 |
+
# 2. Install Swin3D, mainly for cuda operation:
|
768 |
+
cd libs
|
769 |
+
git clone https://github.com/microsoft/Swin3D.git
|
770 |
+
cd Swin3D
|
771 |
+
pip install ./
|
772 |
+
```
|
773 |
+
2. Uncomment `# from .swin3d import *` in `pointcept/models/__init__.py`.
|
774 |
+
3. Pre-Training with the following example scripts (Structured3D preprocessing refer [here](#structured3d)):
|
775 |
+
```bash
|
776 |
+
# Structured3D + Swin-S
|
777 |
+
sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
|
778 |
+
# Structured3D + Swin-L
|
779 |
+
sh scripts/train.sh -g 4 -d structured3d -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
|
780 |
+
|
781 |
+
# Addition
|
782 |
+
# Structured3D + SpUNet
|
783 |
+
sh scripts/train.sh -g 4 -d structured3d -c semseg-spunet-v1m1-0-base -n semseg-spunet-v1m1-0-base
|
784 |
+
# Structured3D + PTv2
|
785 |
+
sh scripts/train.sh -g 4 -d structured3d -c semseg-pt-v2m2-0-base -n semseg-pt-v2m2-0-base
|
786 |
+
```
|
787 |
+
4. Fine-tuning with the following example scripts:
|
788 |
+
```bash
|
789 |
+
# ScanNet + Swin-S
|
790 |
+
sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
|
791 |
+
# ScanNet + Swin-L
|
792 |
+
sh scripts/train.sh -g 4 -d scannet -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
|
793 |
+
|
794 |
+
# S3DIS + Swin-S (here we provide config support S3DIS normal vector)
|
795 |
+
sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-0-small -n semseg-swin3d-v1m1-0-small
|
796 |
+
# S3DIS + Swin-L (here we provide config support S3DIS normal vector)
|
797 |
+
sh scripts/train.sh -g 4 -d s3dis -w exp/structured3d/semseg-swin3d-v1m1-1-large/model/model_last.pth -c semseg-swin3d-v1m1-1-large -n semseg-swin3d-v1m1-1-large
|
798 |
+
```
|
799 |
+
|
800 |
+
#### Context-Aware Classifier
|
801 |
+
`Context-Aware Classifier` is a segmentor that can further boost the performance of each backbone, as a replacement for `Default Segmentor`. Training with the following example scripts:
|
802 |
+
```bash
|
803 |
+
# ScanNet
|
804 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base
|
805 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz
|
806 |
+
sh scripts/train.sh -g 4 -d scannet -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz
|
807 |
+
|
808 |
+
# ScanNet200
|
809 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-0-spunet-base -n semseg-cac-v1m1-0-spunet-base
|
810 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-1-spunet-lovasz -n semseg-cac-v1m1-1-spunet-lovasz
|
811 |
+
sh scripts/train.sh -g 4 -d scannet200 -c semseg-cac-v1m1-2-ptv2-lovasz -n semseg-cac-v1m1-2-ptv2-lovasz
|
812 |
+
```
|
813 |
+
|
814 |
+
|
815 |
+
### 2. Instance Segmentation
|
816 |
+
#### PointGroup
|
817 |
+
[PointGroup](https://github.com/dvlab-research/PointGroup) is a baseline framework for point cloud instance segmentation.
|
818 |
+
1. Additional requirements:
|
819 |
+
```bash
|
820 |
+
conda install -c bioconda google-sparsehash
|
821 |
+
cd libs/pointgroup_ops
|
822 |
+
python setup.py install --include_dirs=${CONDA_PREFIX}/include
|
823 |
+
cd ../..
|
824 |
+
```
|
825 |
+
2. Uncomment `# from .point_group import *` in `pointcept/models/__init__.py`.
|
826 |
+
3. Training with the following example scripts:
|
827 |
+
```bash
|
828 |
+
# ScanNet
|
829 |
+
sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base
|
830 |
+
# S3DIS
|
831 |
+
sh scripts/train.sh -g 4 -d scannet -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-pointgroup-v1m1-0-spunet-base
|
832 |
+
```
|
833 |
+
|
834 |
+
### 3. Pre-training
|
835 |
+
#### Masked Scene Contrast (MSC)
|
836 |
+
1. Pre-training with the following example scripts:
|
837 |
+
```bash
|
838 |
+
# ScanNet
|
839 |
+
sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-0-spunet-base -n pretrain-msc-v1m1-0-spunet-base
|
840 |
+
```
|
841 |
+
|
842 |
+
2. Fine-tuning with the following example scripts:
|
843 |
+
enable PointGroup ([here](#pointgroup)) before fine-tuning on instance segmentation task.
|
844 |
+
```bash
|
845 |
+
# ScanNet20 Semantic Segmentation
|
846 |
+
sh scripts/train.sh -g 8 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c semseg-spunet-v1m1-4-ft -n semseg-msc-v1m1-0f-spunet-base
|
847 |
+
# ScanNet20 Instance Segmentation (enable PointGroup before running the script)
|
848 |
+
sh scripts/train.sh -g 4 -d scannet -w exp/scannet/pretrain-msc-v1m1-0-spunet-base/model/model_last.pth -c insseg-pointgroup-v1m1-0-spunet-base -n insseg-msc-v1m1-0f-pointgroup-spunet-base
|
849 |
+
```
|
850 |
+
3. Example log and weight: [[Pretrain](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EYvNV4XUJ_5Mlk-g15RelN4BW_P8lVBfC_zhjC_BlBDARg?e=UoGFWH)] [[Semseg](https://connecthkuhk-my.sharepoint.com/:u:/g/personal/wuxy_connect_hku_hk/EQkDiv5xkOFKgCpGiGtAlLwBon7i8W6my3TIbGVxuiTttQ?e=tQFnbr)]
|
851 |
+
|
852 |
+
#### Point Prompt Training (PPT)
|
853 |
+
PPT presents a multi-dataset pre-training framework, and it is compatible with various existing pre-training frameworks and backbones.
|
854 |
+
1. PPT supervised joint training with the following example scripts:
|
855 |
+
```bash
|
856 |
+
# ScanNet + Structured3d, validate on ScanNet (S3DIS might cause long data time, w/o S3DIS for a quick validation) >= 3090 * 8
|
857 |
+
sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-0-sc-st-spunet -n semseg-ppt-v1m1-0-sc-st-spunet
|
858 |
+
sh scripts/train.sh -g 8 -d scannet -c semseg-ppt-v1m1-1-sc-st-spunet-submit -n semseg-ppt-v1m1-1-sc-st-spunet-submit
|
859 |
+
# ScanNet + S3DIS + Structured3d, validate on S3DIS (>= a100 * 8)
|
860 |
+
sh scripts/train.sh -g 8 -d s3dis -c semseg-ppt-v1m1-0-s3-sc-st-spunet -n semseg-ppt-v1m1-0-s3-sc-st-spunet
|
861 |
+
# SemanticKITTI + nuScenes + Waymo, validate on SemanticKITTI (bs12 >= 3090 * 4 >= 3090 * 8, v1m1-0 is still on tuning)
|
862 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet
|
863 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-0-sk-nu-wa-spunet -n semseg-ppt-v1m2-0-sk-nu-wa-spunet
|
864 |
+
sh scripts/train.sh -g 4 -d semantic_kitti -c semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit -n semseg-ppt-v1m2-1-sk-nu-wa-spunet-submit
|
865 |
+
# SemanticKITTI + nuScenes + Waymo, validate on nuScenes (bs12 >= 3090 * 4; bs24 >= 3090 * 8, v1m1-0 is still on tuning))
|
866 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m1-0-nu-sk-wa-spunet -n semseg-ppt-v1m1-0-nu-sk-wa-spunet
|
867 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-0-nu-sk-wa-spunet -n semseg-ppt-v1m2-0-nu-sk-wa-spunet
|
868 |
+
sh scripts/train.sh -g 4 -d nuscenes -c semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit -n semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit
|
869 |
+
```
|
870 |
+
|
871 |
+
#### PointContrast
|
872 |
+
1. Preprocess and link ScanNet-Pair dataset (pair-wise matching with ScanNet raw RGB-D frame, ~1.5T):
|
873 |
+
```bash
|
874 |
+
# RAW_SCANNET_DIR: the directory of downloaded ScanNet v2 raw dataset.
|
875 |
+
# PROCESSED_SCANNET_PAIR_DIR: the directory of processed ScanNet pair dataset (output dir).
|
876 |
+
python pointcept/datasets/preprocessing/scannet/scannet_pair/preprocess.py --dataset_root ${RAW_SCANNET_DIR} --output_root ${PROCESSED_SCANNET_PAIR_DIR}
|
877 |
+
ln -s ${PROCESSED_SCANNET_PAIR_DIR} ${CODEBASE_DIR}/data/scannet
|
878 |
+
```
|
879 |
+
2. Pre-training with the following example scripts:
|
880 |
+
```bash
|
881 |
+
# ScanNet
|
882 |
+
sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m1-1-spunet-pointcontrast -n pretrain-msc-v1m1-1-spunet-pointcontrast
|
883 |
+
```
|
884 |
+
3. Fine-tuning refer [MSC](#masked-scene-contrast-msc).
|
885 |
+
|
886 |
+
#### Contrastive Scene Contexts
|
887 |
+
1. Preprocess and link ScanNet-Pair dataset (refer [PointContrast](#pointcontrast)):
|
888 |
+
2. Pre-training with the following example scripts:
|
889 |
+
```bash
|
890 |
+
# ScanNet
|
891 |
+
sh scripts/train.sh -g 8 -d scannet -c pretrain-msc-v1m2-0-spunet-csc -n pretrain-msc-v1m2-0-spunet-csc
|
892 |
+
```
|
893 |
+
3. Fine-tuning refer [MSC](#masked-scene-contrast-msc).
|
894 |
+
|
895 |
+
## Acknowledgement
|
896 |
+
_Pointcept_ is designed by [Xiaoyang](https://xywu.me/), named by [Yixing](https://github.com/yxlao) and the logo is created by [Yuechen](https://julianjuaner.github.io/). It is derived from [Hengshuang](https://hszhao.github.io/)'s [Semseg](https://github.com/hszhao/semseg) and inspirited by several repos, e.g., [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine), [pointnet2](https://github.com/charlesq34/pointnet2), [mmcv](https://github.com/open-mmlab/mmcv/tree/master/mmcv), and [Detectron2](https://github.com/facebookresearch/detectron2).
|
submodules/PointTransformerV3/Pointcept/configs/_base_/dataset/scannetpp.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data = dict(
|
2 |
+
names=[
|
3 |
+
"wall",
|
4 |
+
"ceiling",
|
5 |
+
"floor",
|
6 |
+
"table",
|
7 |
+
"door",
|
8 |
+
"ceiling lamp",
|
9 |
+
"cabinet",
|
10 |
+
"blinds",
|
11 |
+
"curtain",
|
12 |
+
"chair",
|
13 |
+
"storage cabinet",
|
14 |
+
"office chair",
|
15 |
+
"bookshelf",
|
16 |
+
"whiteboard",
|
17 |
+
"window",
|
18 |
+
"box",
|
19 |
+
"window frame",
|
20 |
+
"monitor",
|
21 |
+
"shelf",
|
22 |
+
"doorframe",
|
23 |
+
"pipe",
|
24 |
+
"heater",
|
25 |
+
"kitchen cabinet",
|
26 |
+
"sofa",
|
27 |
+
"windowsill",
|
28 |
+
"bed",
|
29 |
+
"shower wall",
|
30 |
+
"trash can",
|
31 |
+
"book",
|
32 |
+
"plant",
|
33 |
+
"blanket",
|
34 |
+
"tv",
|
35 |
+
"computer tower",
|
36 |
+
"kitchen counter",
|
37 |
+
"refrigerator",
|
38 |
+
"jacket",
|
39 |
+
"electrical duct",
|
40 |
+
"sink",
|
41 |
+
"bag",
|
42 |
+
"picture",
|
43 |
+
"pillow",
|
44 |
+
"towel",
|
45 |
+
"suitcase",
|
46 |
+
"backpack",
|
47 |
+
"crate",
|
48 |
+
"keyboard",
|
49 |
+
"rack",
|
50 |
+
"toilet",
|
51 |
+
"paper",
|
52 |
+
"printer",
|
53 |
+
"poster",
|
54 |
+
"painting",
|
55 |
+
"microwave",
|
56 |
+
"board",
|
57 |
+
"shoes",
|
58 |
+
"socket",
|
59 |
+
"bottle",
|
60 |
+
"bucket",
|
61 |
+
"cushion",
|
62 |
+
"basket",
|
63 |
+
"shoe rack",
|
64 |
+
"telephone",
|
65 |
+
"file folder",
|
66 |
+
"cloth",
|
67 |
+
"blind rail",
|
68 |
+
"laptop",
|
69 |
+
"plant pot",
|
70 |
+
"exhaust fan",
|
71 |
+
"cup",
|
72 |
+
"coat hanger",
|
73 |
+
"light switch",
|
74 |
+
"speaker",
|
75 |
+
"table lamp",
|
76 |
+
"air vent",
|
77 |
+
"clothes hanger",
|
78 |
+
"kettle",
|
79 |
+
"smoke detector",
|
80 |
+
"container",
|
81 |
+
"power strip",
|
82 |
+
"slippers",
|
83 |
+
"paper bag",
|
84 |
+
"mouse",
|
85 |
+
"cutting board",
|
86 |
+
"toilet paper",
|
87 |
+
"paper towel",
|
88 |
+
"pot",
|
89 |
+
"clock",
|
90 |
+
"pan",
|
91 |
+
"tap",
|
92 |
+
"jar",
|
93 |
+
"soap dispenser",
|
94 |
+
"binder",
|
95 |
+
"bowl",
|
96 |
+
"tissue box",
|
97 |
+
"whiteboard eraser",
|
98 |
+
"toilet brush",
|
99 |
+
"spray bottle",
|
100 |
+
"headphones",
|
101 |
+
"stapler",
|
102 |
+
"marker",
|
103 |
+
]
|
104 |
+
)
|
submodules/PointTransformerV3/Pointcept/configs/_base_/default_runtime.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
weight = None # path to model weight
|
2 |
+
resume = False # whether to resume training process
|
3 |
+
evaluate = True # evaluate after each epoch training process
|
4 |
+
test_only = False # test process
|
5 |
+
|
6 |
+
seed = None # train process will init a random seed and record
|
7 |
+
save_path = "exp/default"
|
8 |
+
num_worker = 16 # total worker in all gpu
|
9 |
+
batch_size = 16 # total batch size in all gpu
|
10 |
+
batch_size_val = None # auto adapt to bs 1 for each gpu
|
11 |
+
batch_size_test = None # auto adapt to bs 1 for each gpu
|
12 |
+
epoch = 100 # total epoch, data loop = epoch // eval_epoch
|
13 |
+
eval_epoch = 100 # sche total eval & checkpoint epoch
|
14 |
+
clip_grad = None # disable with None, enable with a float
|
15 |
+
|
16 |
+
sync_bn = False
|
17 |
+
enable_amp = False
|
18 |
+
empty_cache = False
|
19 |
+
empty_cache_per_epoch = False
|
20 |
+
find_unused_parameters = False
|
21 |
+
|
22 |
+
mix_prob = 0
|
23 |
+
param_dicts = None # example: param_dicts = [dict(keyword="block", lr_scale=0.1)]
|
24 |
+
|
25 |
+
# hook
|
26 |
+
hooks = [
|
27 |
+
dict(type="CheckpointLoader"),
|
28 |
+
dict(type="IterationTimer", warmup_iter=2),
|
29 |
+
dict(type="InformationWriter"),
|
30 |
+
dict(type="SemSegEvaluator"),
|
31 |
+
dict(type="CheckpointSaver", save_freq=None),
|
32 |
+
dict(type="PreciseEvaluator", test_last=False),
|
33 |
+
]
|
34 |
+
|
35 |
+
# Trainer
|
36 |
+
train = dict(type="DefaultTrainer")
|
37 |
+
|
38 |
+
# Tester
|
39 |
+
test = dict(type="SemSegTester", verbose=True)
|
submodules/PointTransformerV3/Pointcept/configs/matterport3d/semseg-pt-v3m1-0-base.py
ADDED
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
|
10 |
+
# model settings
|
11 |
+
model = dict(
|
12 |
+
type="DefaultSegmentorV2",
|
13 |
+
num_classes=21,
|
14 |
+
backbone_out_channels=64,
|
15 |
+
backbone=dict(
|
16 |
+
type="PT-v3m1",
|
17 |
+
in_channels=6,
|
18 |
+
order=("z", "z-trans", "hilbert", "hilbert-trans"),
|
19 |
+
stride=(2, 2, 2, 2),
|
20 |
+
enc_depths=(2, 2, 2, 6, 2),
|
21 |
+
enc_channels=(32, 64, 128, 256, 512),
|
22 |
+
enc_num_head=(2, 4, 8, 16, 32),
|
23 |
+
enc_patch_size=(1024, 1024, 1024, 1024, 1024),
|
24 |
+
dec_depths=(2, 2, 2, 2),
|
25 |
+
dec_channels=(64, 64, 128, 256),
|
26 |
+
dec_num_head=(4, 4, 8, 16),
|
27 |
+
dec_patch_size=(1024, 1024, 1024, 1024),
|
28 |
+
mlp_ratio=4,
|
29 |
+
qkv_bias=True,
|
30 |
+
qk_scale=None,
|
31 |
+
attn_drop=0.0,
|
32 |
+
proj_drop=0.0,
|
33 |
+
drop_path=0.3,
|
34 |
+
shuffle_orders=True,
|
35 |
+
pre_norm=True,
|
36 |
+
enable_rpe=False,
|
37 |
+
enable_flash=True,
|
38 |
+
upcast_attention=False,
|
39 |
+
upcast_softmax=False,
|
40 |
+
cls_mode=False,
|
41 |
+
pdnorm_bn=False,
|
42 |
+
pdnorm_ln=False,
|
43 |
+
pdnorm_decouple=True,
|
44 |
+
pdnorm_adaptive=False,
|
45 |
+
pdnorm_affine=True,
|
46 |
+
pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
|
47 |
+
),
|
48 |
+
criteria=[
|
49 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
50 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
51 |
+
],
|
52 |
+
)
|
53 |
+
|
54 |
+
# scheduler settings
|
55 |
+
epoch = 800
|
56 |
+
optimizer = dict(type="AdamW", lr=0.006, weight_decay=0.05)
|
57 |
+
scheduler = dict(
|
58 |
+
type="OneCycleLR",
|
59 |
+
max_lr=[0.006, 0.0006],
|
60 |
+
pct_start=0.05,
|
61 |
+
anneal_strategy="cos",
|
62 |
+
div_factor=10.0,
|
63 |
+
final_div_factor=1000.0,
|
64 |
+
)
|
65 |
+
param_dicts = [dict(keyword="block", lr=0.0006)]
|
66 |
+
|
67 |
+
# dataset settings
|
68 |
+
dataset_type = "DefaultDataset"
|
69 |
+
data_root = "data/matterport3d"
|
70 |
+
|
71 |
+
data = dict(
|
72 |
+
num_classes=21,
|
73 |
+
ignore_index=-1,
|
74 |
+
names=(
|
75 |
+
"wall",
|
76 |
+
"floor",
|
77 |
+
"cabinet",
|
78 |
+
"bed",
|
79 |
+
"chair",
|
80 |
+
"sofa",
|
81 |
+
"table",
|
82 |
+
"door",
|
83 |
+
"window",
|
84 |
+
"bookshelf",
|
85 |
+
"picture",
|
86 |
+
"counter",
|
87 |
+
"desk",
|
88 |
+
"curtain",
|
89 |
+
"refrigerator",
|
90 |
+
"shower curtain",
|
91 |
+
"toilet",
|
92 |
+
"sink",
|
93 |
+
"bathtub",
|
94 |
+
"other",
|
95 |
+
"ceiling",
|
96 |
+
),
|
97 |
+
train=dict(
|
98 |
+
type=dataset_type,
|
99 |
+
split="train",
|
100 |
+
data_root=data_root,
|
101 |
+
transform=[
|
102 |
+
dict(type="CenterShift", apply_z=True),
|
103 |
+
dict(
|
104 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
105 |
+
),
|
106 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
107 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
108 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
109 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
110 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
111 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
112 |
+
dict(type="RandomFlip", p=0.5),
|
113 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
114 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
115 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
116 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
117 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
118 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
119 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
120 |
+
dict(
|
121 |
+
type="GridSample",
|
122 |
+
grid_size=0.02,
|
123 |
+
hash_type="fnv",
|
124 |
+
mode="train",
|
125 |
+
return_grid_coord=True,
|
126 |
+
),
|
127 |
+
dict(type="SphereCrop", point_max=102400, mode="random"),
|
128 |
+
dict(type="CenterShift", apply_z=False),
|
129 |
+
dict(type="NormalizeColor"),
|
130 |
+
# dict(type="ShufflePoint"),
|
131 |
+
dict(type="ToTensor"),
|
132 |
+
dict(
|
133 |
+
type="Collect",
|
134 |
+
keys=("coord", "grid_coord", "segment"),
|
135 |
+
feat_keys=("color", "normal"),
|
136 |
+
),
|
137 |
+
],
|
138 |
+
test_mode=False,
|
139 |
+
),
|
140 |
+
val=dict(
|
141 |
+
type=dataset_type,
|
142 |
+
split="val",
|
143 |
+
data_root=data_root,
|
144 |
+
transform=[
|
145 |
+
dict(type="CenterShift", apply_z=True),
|
146 |
+
dict(
|
147 |
+
type="GridSample",
|
148 |
+
grid_size=0.02,
|
149 |
+
hash_type="fnv",
|
150 |
+
mode="train",
|
151 |
+
return_grid_coord=True,
|
152 |
+
),
|
153 |
+
dict(type="CenterShift", apply_z=False),
|
154 |
+
dict(type="NormalizeColor"),
|
155 |
+
dict(type="ToTensor"),
|
156 |
+
dict(
|
157 |
+
type="Collect",
|
158 |
+
keys=("coord", "grid_coord", "segment"),
|
159 |
+
feat_keys=("color", "normal"),
|
160 |
+
),
|
161 |
+
],
|
162 |
+
test_mode=False,
|
163 |
+
),
|
164 |
+
test=dict(
|
165 |
+
type=dataset_type,
|
166 |
+
split="val",
|
167 |
+
data_root=data_root,
|
168 |
+
transform=[
|
169 |
+
dict(type="CenterShift", apply_z=True),
|
170 |
+
dict(type="NormalizeColor"),
|
171 |
+
],
|
172 |
+
test_mode=True,
|
173 |
+
test_cfg=dict(
|
174 |
+
voxelize=dict(
|
175 |
+
type="GridSample",
|
176 |
+
grid_size=0.02,
|
177 |
+
hash_type="fnv",
|
178 |
+
mode="test",
|
179 |
+
keys=("coord", "color", "normal"),
|
180 |
+
return_grid_coord=True,
|
181 |
+
),
|
182 |
+
crop=None,
|
183 |
+
post_transform=[
|
184 |
+
dict(type="CenterShift", apply_z=False),
|
185 |
+
dict(type="ToTensor"),
|
186 |
+
dict(
|
187 |
+
type="Collect",
|
188 |
+
keys=("coord", "grid_coord", "index"),
|
189 |
+
feat_keys=("color", "normal"),
|
190 |
+
),
|
191 |
+
],
|
192 |
+
aug_transform=[
|
193 |
+
[
|
194 |
+
dict(
|
195 |
+
type="RandomRotateTargetAngle",
|
196 |
+
angle=[0],
|
197 |
+
axis="z",
|
198 |
+
center=[0, 0, 0],
|
199 |
+
p=1,
|
200 |
+
)
|
201 |
+
],
|
202 |
+
[
|
203 |
+
dict(
|
204 |
+
type="RandomRotateTargetAngle",
|
205 |
+
angle=[1 / 2],
|
206 |
+
axis="z",
|
207 |
+
center=[0, 0, 0],
|
208 |
+
p=1,
|
209 |
+
)
|
210 |
+
],
|
211 |
+
[
|
212 |
+
dict(
|
213 |
+
type="RandomRotateTargetAngle",
|
214 |
+
angle=[1],
|
215 |
+
axis="z",
|
216 |
+
center=[0, 0, 0],
|
217 |
+
p=1,
|
218 |
+
)
|
219 |
+
],
|
220 |
+
[
|
221 |
+
dict(
|
222 |
+
type="RandomRotateTargetAngle",
|
223 |
+
angle=[3 / 2],
|
224 |
+
axis="z",
|
225 |
+
center=[0, 0, 0],
|
226 |
+
p=1,
|
227 |
+
)
|
228 |
+
],
|
229 |
+
[
|
230 |
+
dict(
|
231 |
+
type="RandomRotateTargetAngle",
|
232 |
+
angle=[0],
|
233 |
+
axis="z",
|
234 |
+
center=[0, 0, 0],
|
235 |
+
p=1,
|
236 |
+
),
|
237 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
238 |
+
],
|
239 |
+
[
|
240 |
+
dict(
|
241 |
+
type="RandomRotateTargetAngle",
|
242 |
+
angle=[1 / 2],
|
243 |
+
axis="z",
|
244 |
+
center=[0, 0, 0],
|
245 |
+
p=1,
|
246 |
+
),
|
247 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
248 |
+
],
|
249 |
+
[
|
250 |
+
dict(
|
251 |
+
type="RandomRotateTargetAngle",
|
252 |
+
angle=[1],
|
253 |
+
axis="z",
|
254 |
+
center=[0, 0, 0],
|
255 |
+
p=1,
|
256 |
+
),
|
257 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
258 |
+
],
|
259 |
+
[
|
260 |
+
dict(
|
261 |
+
type="RandomRotateTargetAngle",
|
262 |
+
angle=[3 / 2],
|
263 |
+
axis="z",
|
264 |
+
center=[0, 0, 0],
|
265 |
+
p=1,
|
266 |
+
),
|
267 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
268 |
+
],
|
269 |
+
[
|
270 |
+
dict(
|
271 |
+
type="RandomRotateTargetAngle",
|
272 |
+
angle=[0],
|
273 |
+
axis="z",
|
274 |
+
center=[0, 0, 0],
|
275 |
+
p=1,
|
276 |
+
),
|
277 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
278 |
+
],
|
279 |
+
[
|
280 |
+
dict(
|
281 |
+
type="RandomRotateTargetAngle",
|
282 |
+
angle=[1 / 2],
|
283 |
+
axis="z",
|
284 |
+
center=[0, 0, 0],
|
285 |
+
p=1,
|
286 |
+
),
|
287 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
288 |
+
],
|
289 |
+
[
|
290 |
+
dict(
|
291 |
+
type="RandomRotateTargetAngle",
|
292 |
+
angle=[1],
|
293 |
+
axis="z",
|
294 |
+
center=[0, 0, 0],
|
295 |
+
p=1,
|
296 |
+
),
|
297 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
298 |
+
],
|
299 |
+
[
|
300 |
+
dict(
|
301 |
+
type="RandomRotateTargetAngle",
|
302 |
+
angle=[3 / 2],
|
303 |
+
axis="z",
|
304 |
+
center=[0, 0, 0],
|
305 |
+
p=1,
|
306 |
+
),
|
307 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
308 |
+
],
|
309 |
+
[dict(type="RandomFlip", p=1)],
|
310 |
+
],
|
311 |
+
),
|
312 |
+
),
|
313 |
+
)
|
submodules/PointTransformerV3/Pointcept/configs/matterport3d/semseg-spunet-v1m1-0-base.py
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
mix_prob = 0.8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = True
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultSegmentor",
|
12 |
+
backbone=dict(
|
13 |
+
type="SpUNet-v1m1",
|
14 |
+
in_channels=6,
|
15 |
+
num_classes=21,
|
16 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
17 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
18 |
+
),
|
19 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
20 |
+
)
|
21 |
+
|
22 |
+
|
23 |
+
# scheduler settings
|
24 |
+
epoch = 800
|
25 |
+
optimizer = dict(type="SGD", lr=0.05, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
26 |
+
scheduler = dict(
|
27 |
+
type="OneCycleLR",
|
28 |
+
max_lr=optimizer["lr"],
|
29 |
+
pct_start=0.05,
|
30 |
+
anneal_strategy="cos",
|
31 |
+
div_factor=10.0,
|
32 |
+
final_div_factor=10000.0,
|
33 |
+
)
|
34 |
+
|
35 |
+
# dataset settings
|
36 |
+
dataset_type = "DefaultDataset"
|
37 |
+
data_root = "data/matterport3d"
|
38 |
+
|
39 |
+
data = dict(
|
40 |
+
num_classes=21,
|
41 |
+
ignore_index=-1,
|
42 |
+
names=(
|
43 |
+
"wall",
|
44 |
+
"floor",
|
45 |
+
"cabinet",
|
46 |
+
"bed",
|
47 |
+
"chair",
|
48 |
+
"sofa",
|
49 |
+
"table",
|
50 |
+
"door",
|
51 |
+
"window",
|
52 |
+
"bookshelf",
|
53 |
+
"picture",
|
54 |
+
"counter",
|
55 |
+
"desk",
|
56 |
+
"curtain",
|
57 |
+
"refrigerator",
|
58 |
+
"shower curtain",
|
59 |
+
"toilet",
|
60 |
+
"sink",
|
61 |
+
"bathtub",
|
62 |
+
"other",
|
63 |
+
"ceiling",
|
64 |
+
),
|
65 |
+
train=dict(
|
66 |
+
type=dataset_type,
|
67 |
+
split="train",
|
68 |
+
data_root=data_root,
|
69 |
+
transform=[
|
70 |
+
dict(type="CenterShift", apply_z=True),
|
71 |
+
dict(
|
72 |
+
type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2
|
73 |
+
),
|
74 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
75 |
+
dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
76 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="x", p=0.5),
|
77 |
+
dict(type="RandomRotate", angle=[-1 / 64, 1 / 64], axis="y", p=0.5),
|
78 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
79 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
80 |
+
dict(type="RandomFlip", p=0.5),
|
81 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
82 |
+
dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
83 |
+
dict(type="ChromaticAutoContrast", p=0.2, blend_factor=None),
|
84 |
+
dict(type="ChromaticTranslation", p=0.95, ratio=0.05),
|
85 |
+
dict(type="ChromaticJitter", p=0.95, std=0.05),
|
86 |
+
# dict(type="HueSaturationTranslation", hue_max=0.2, saturation_max=0.2),
|
87 |
+
# dict(type="RandomColorDrop", p=0.2, color_augment=0.0),
|
88 |
+
dict(
|
89 |
+
type="GridSample",
|
90 |
+
grid_size=0.02,
|
91 |
+
hash_type="fnv",
|
92 |
+
mode="train",
|
93 |
+
return_grid_coord=True,
|
94 |
+
),
|
95 |
+
dict(type="SphereCrop", point_max=100000, mode="random"),
|
96 |
+
dict(type="CenterShift", apply_z=False),
|
97 |
+
dict(type="NormalizeColor"),
|
98 |
+
dict(type="ShufflePoint"),
|
99 |
+
dict(type="ToTensor"),
|
100 |
+
dict(
|
101 |
+
type="Collect",
|
102 |
+
keys=("coord", "grid_coord", "segment"),
|
103 |
+
feat_keys=("color", "normal"),
|
104 |
+
),
|
105 |
+
],
|
106 |
+
test_mode=False,
|
107 |
+
),
|
108 |
+
val=dict(
|
109 |
+
type=dataset_type,
|
110 |
+
split="val",
|
111 |
+
data_root=data_root,
|
112 |
+
transform=[
|
113 |
+
dict(type="CenterShift", apply_z=True),
|
114 |
+
dict(
|
115 |
+
type="GridSample",
|
116 |
+
grid_size=0.02,
|
117 |
+
hash_type="fnv",
|
118 |
+
mode="train",
|
119 |
+
return_grid_coord=True,
|
120 |
+
),
|
121 |
+
# dict(type="SphereCrop", point_max=1000000, mode="center"),
|
122 |
+
dict(type="CenterShift", apply_z=False),
|
123 |
+
dict(type="NormalizeColor"),
|
124 |
+
dict(type="ToTensor"),
|
125 |
+
dict(
|
126 |
+
type="Collect",
|
127 |
+
keys=("coord", "grid_coord", "segment"),
|
128 |
+
feat_keys=("color", "normal"),
|
129 |
+
),
|
130 |
+
],
|
131 |
+
test_mode=False,
|
132 |
+
),
|
133 |
+
test=dict(
|
134 |
+
type=dataset_type,
|
135 |
+
split="val",
|
136 |
+
data_root=data_root,
|
137 |
+
transform=[
|
138 |
+
dict(type="CenterShift", apply_z=True),
|
139 |
+
dict(type="NormalizeColor"),
|
140 |
+
],
|
141 |
+
test_mode=True,
|
142 |
+
test_cfg=dict(
|
143 |
+
voxelize=dict(
|
144 |
+
type="GridSample",
|
145 |
+
grid_size=0.02,
|
146 |
+
hash_type="fnv",
|
147 |
+
mode="test",
|
148 |
+
return_grid_coord=True,
|
149 |
+
keys=("coord", "color", "normal"),
|
150 |
+
),
|
151 |
+
crop=None,
|
152 |
+
post_transform=[
|
153 |
+
dict(type="CenterShift", apply_z=False),
|
154 |
+
dict(type="ToTensor"),
|
155 |
+
dict(
|
156 |
+
type="Collect",
|
157 |
+
keys=("coord", "grid_coord", "index"),
|
158 |
+
feat_keys=("color", "normal"),
|
159 |
+
),
|
160 |
+
],
|
161 |
+
aug_transform=[
|
162 |
+
[
|
163 |
+
dict(
|
164 |
+
type="RandomRotateTargetAngle",
|
165 |
+
angle=[0],
|
166 |
+
axis="z",
|
167 |
+
center=[0, 0, 0],
|
168 |
+
p=1,
|
169 |
+
)
|
170 |
+
],
|
171 |
+
[
|
172 |
+
dict(
|
173 |
+
type="RandomRotateTargetAngle",
|
174 |
+
angle=[1 / 2],
|
175 |
+
axis="z",
|
176 |
+
center=[0, 0, 0],
|
177 |
+
p=1,
|
178 |
+
)
|
179 |
+
],
|
180 |
+
[
|
181 |
+
dict(
|
182 |
+
type="RandomRotateTargetAngle",
|
183 |
+
angle=[1],
|
184 |
+
axis="z",
|
185 |
+
center=[0, 0, 0],
|
186 |
+
p=1,
|
187 |
+
)
|
188 |
+
],
|
189 |
+
[
|
190 |
+
dict(
|
191 |
+
type="RandomRotateTargetAngle",
|
192 |
+
angle=[3 / 2],
|
193 |
+
axis="z",
|
194 |
+
center=[0, 0, 0],
|
195 |
+
p=1,
|
196 |
+
)
|
197 |
+
],
|
198 |
+
[
|
199 |
+
dict(
|
200 |
+
type="RandomRotateTargetAngle",
|
201 |
+
angle=[0],
|
202 |
+
axis="z",
|
203 |
+
center=[0, 0, 0],
|
204 |
+
p=1,
|
205 |
+
),
|
206 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
207 |
+
],
|
208 |
+
[
|
209 |
+
dict(
|
210 |
+
type="RandomRotateTargetAngle",
|
211 |
+
angle=[1 / 2],
|
212 |
+
axis="z",
|
213 |
+
center=[0, 0, 0],
|
214 |
+
p=1,
|
215 |
+
),
|
216 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
217 |
+
],
|
218 |
+
[
|
219 |
+
dict(
|
220 |
+
type="RandomRotateTargetAngle",
|
221 |
+
angle=[1],
|
222 |
+
axis="z",
|
223 |
+
center=[0, 0, 0],
|
224 |
+
p=1,
|
225 |
+
),
|
226 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
227 |
+
],
|
228 |
+
[
|
229 |
+
dict(
|
230 |
+
type="RandomRotateTargetAngle",
|
231 |
+
angle=[3 / 2],
|
232 |
+
axis="z",
|
233 |
+
center=[0, 0, 0],
|
234 |
+
p=1,
|
235 |
+
),
|
236 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
237 |
+
],
|
238 |
+
[
|
239 |
+
dict(
|
240 |
+
type="RandomRotateTargetAngle",
|
241 |
+
angle=[0],
|
242 |
+
axis="z",
|
243 |
+
center=[0, 0, 0],
|
244 |
+
p=1,
|
245 |
+
),
|
246 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
247 |
+
],
|
248 |
+
[
|
249 |
+
dict(
|
250 |
+
type="RandomRotateTargetAngle",
|
251 |
+
angle=[1 / 2],
|
252 |
+
axis="z",
|
253 |
+
center=[0, 0, 0],
|
254 |
+
p=1,
|
255 |
+
),
|
256 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
257 |
+
],
|
258 |
+
[
|
259 |
+
dict(
|
260 |
+
type="RandomRotateTargetAngle",
|
261 |
+
angle=[1],
|
262 |
+
axis="z",
|
263 |
+
center=[0, 0, 0],
|
264 |
+
p=1,
|
265 |
+
),
|
266 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
267 |
+
],
|
268 |
+
[
|
269 |
+
dict(
|
270 |
+
type="RandomRotateTargetAngle",
|
271 |
+
angle=[3 / 2],
|
272 |
+
axis="z",
|
273 |
+
center=[0, 0, 0],
|
274 |
+
p=1,
|
275 |
+
),
|
276 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
277 |
+
],
|
278 |
+
[dict(type="RandomFlip", p=1)],
|
279 |
+
],
|
280 |
+
),
|
281 |
+
),
|
282 |
+
)
|
submodules/PointTransformerV3/Pointcept/configs/modelnet40/cls-ptv3-v1m1-0-base.py
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 32 # bs: total bs in all gpus
|
4 |
+
num_worker = 16
|
5 |
+
batch_size_val = 8
|
6 |
+
empty_cache = False
|
7 |
+
enable_amp = False
|
8 |
+
|
9 |
+
# model settings
|
10 |
+
model = dict(
|
11 |
+
type="DefaultClassifier",
|
12 |
+
num_classes=40,
|
13 |
+
backbone_embed_dim=512,
|
14 |
+
backbone=dict(
|
15 |
+
type="PT-v3m1",
|
16 |
+
in_channels=6,
|
17 |
+
order=("z", "z-trans", "hilbert", "hilbert-trans"),
|
18 |
+
stride=(2, 2, 2, 2),
|
19 |
+
enc_depths=(2, 2, 2, 6, 2),
|
20 |
+
enc_channels=(32, 64, 128, 256, 512),
|
21 |
+
enc_num_head=(2, 4, 8, 16, 32),
|
22 |
+
enc_patch_size=(1024, 1024, 1024, 1024, 1024),
|
23 |
+
dec_depths=(2, 2, 2, 2),
|
24 |
+
dec_channels=(64, 64, 128, 256),
|
25 |
+
dec_num_head=(4, 4, 8, 16),
|
26 |
+
dec_patch_size=(1024, 1024, 1024, 1024),
|
27 |
+
mlp_ratio=4,
|
28 |
+
qkv_bias=True,
|
29 |
+
qk_scale=None,
|
30 |
+
attn_drop=0.0,
|
31 |
+
proj_drop=0.0,
|
32 |
+
drop_path=0.3,
|
33 |
+
shuffle_orders=True,
|
34 |
+
pre_norm=True,
|
35 |
+
enable_rpe=False,
|
36 |
+
enable_flash=True,
|
37 |
+
upcast_attention=False,
|
38 |
+
upcast_softmax=False,
|
39 |
+
cls_mode=True,
|
40 |
+
pdnorm_bn=False,
|
41 |
+
pdnorm_ln=False,
|
42 |
+
pdnorm_decouple=True,
|
43 |
+
pdnorm_adaptive=False,
|
44 |
+
pdnorm_affine=True,
|
45 |
+
pdnorm_conditions=("ScanNet", "S3DIS", "Structured3D"),
|
46 |
+
),
|
47 |
+
criteria=[
|
48 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
49 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
50 |
+
],
|
51 |
+
)
|
52 |
+
|
53 |
+
# scheduler settings
|
54 |
+
epoch = 300
|
55 |
+
# optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
56 |
+
# scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
57 |
+
optimizer = dict(type="AdamW", lr=0.001, weight_decay=0.01)
|
58 |
+
scheduler = dict(
|
59 |
+
type="OneCycleLR",
|
60 |
+
max_lr=[0.001, 0.0001],
|
61 |
+
pct_start=0.05,
|
62 |
+
anneal_strategy="cos",
|
63 |
+
div_factor=10.0,
|
64 |
+
final_div_factor=1000.0,
|
65 |
+
)
|
66 |
+
param_dicts = [dict(keyword="block", lr=0.0001)]
|
67 |
+
|
68 |
+
# dataset settings
|
69 |
+
dataset_type = "ModelNetDataset"
|
70 |
+
data_root = "data/modelnet40_normal_resampled"
|
71 |
+
cache_data = False
|
72 |
+
class_names = [
|
73 |
+
"airplane",
|
74 |
+
"bathtub",
|
75 |
+
"bed",
|
76 |
+
"bench",
|
77 |
+
"bookshelf",
|
78 |
+
"bottle",
|
79 |
+
"bowl",
|
80 |
+
"car",
|
81 |
+
"chair",
|
82 |
+
"cone",
|
83 |
+
"cup",
|
84 |
+
"curtain",
|
85 |
+
"desk",
|
86 |
+
"door",
|
87 |
+
"dresser",
|
88 |
+
"flower_pot",
|
89 |
+
"glass_box",
|
90 |
+
"guitar",
|
91 |
+
"keyboard",
|
92 |
+
"lamp",
|
93 |
+
"laptop",
|
94 |
+
"mantel",
|
95 |
+
"monitor",
|
96 |
+
"night_stand",
|
97 |
+
"person",
|
98 |
+
"piano",
|
99 |
+
"plant",
|
100 |
+
"radio",
|
101 |
+
"range_hood",
|
102 |
+
"sink",
|
103 |
+
"sofa",
|
104 |
+
"stairs",
|
105 |
+
"stool",
|
106 |
+
"table",
|
107 |
+
"tent",
|
108 |
+
"toilet",
|
109 |
+
"tv_stand",
|
110 |
+
"vase",
|
111 |
+
"wardrobe",
|
112 |
+
"xbox",
|
113 |
+
]
|
114 |
+
|
115 |
+
data = dict(
|
116 |
+
num_classes=40,
|
117 |
+
ignore_index=-1,
|
118 |
+
names=class_names,
|
119 |
+
train=dict(
|
120 |
+
type=dataset_type,
|
121 |
+
split="train",
|
122 |
+
data_root=data_root,
|
123 |
+
class_names=class_names,
|
124 |
+
transform=[
|
125 |
+
dict(type="NormalizeCoord"),
|
126 |
+
# dict(type="CenterShift", apply_z=True),
|
127 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
128 |
+
# dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5),
|
129 |
+
# dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5),
|
130 |
+
dict(type="RandomScale", scale=[0.7, 1.5], anisotropic=True),
|
131 |
+
dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
|
132 |
+
# dict(type="RandomFlip", p=0.5),
|
133 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
134 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
135 |
+
dict(
|
136 |
+
type="GridSample",
|
137 |
+
grid_size=0.01,
|
138 |
+
hash_type="fnv",
|
139 |
+
mode="train",
|
140 |
+
keys=("coord", "normal"),
|
141 |
+
return_grid_coord=True,
|
142 |
+
),
|
143 |
+
# dict(type="SphereCrop", point_max=10000, mode="random"),
|
144 |
+
# dict(type="CenterShift", apply_z=True),
|
145 |
+
dict(type="ShufflePoint"),
|
146 |
+
dict(type="ToTensor"),
|
147 |
+
dict(
|
148 |
+
type="Collect",
|
149 |
+
keys=("coord", "grid_coord", "category"),
|
150 |
+
feat_keys=["coord", "normal"],
|
151 |
+
),
|
152 |
+
],
|
153 |
+
test_mode=False,
|
154 |
+
),
|
155 |
+
val=dict(
|
156 |
+
type=dataset_type,
|
157 |
+
split="test",
|
158 |
+
data_root=data_root,
|
159 |
+
class_names=class_names,
|
160 |
+
transform=[
|
161 |
+
dict(type="NormalizeCoord"),
|
162 |
+
dict(
|
163 |
+
type="GridSample",
|
164 |
+
grid_size=0.01,
|
165 |
+
hash_type="fnv",
|
166 |
+
mode="train",
|
167 |
+
keys=("coord", "normal"),
|
168 |
+
return_grid_coord=True,
|
169 |
+
),
|
170 |
+
dict(type="ToTensor"),
|
171 |
+
dict(
|
172 |
+
type="Collect",
|
173 |
+
keys=("coord", "grid_coord", "category"),
|
174 |
+
feat_keys=["coord", "normal"],
|
175 |
+
),
|
176 |
+
],
|
177 |
+
test_mode=False,
|
178 |
+
),
|
179 |
+
test=dict(
|
180 |
+
type=dataset_type,
|
181 |
+
split="test",
|
182 |
+
data_root=data_root,
|
183 |
+
class_names=class_names,
|
184 |
+
transform=[
|
185 |
+
dict(type="NormalizeCoord"),
|
186 |
+
],
|
187 |
+
test_mode=True,
|
188 |
+
test_cfg=dict(
|
189 |
+
post_transform=[
|
190 |
+
dict(
|
191 |
+
type="GridSample",
|
192 |
+
grid_size=0.01,
|
193 |
+
hash_type="fnv",
|
194 |
+
mode="train",
|
195 |
+
keys=("coord", "normal"),
|
196 |
+
return_grid_coord=True,
|
197 |
+
),
|
198 |
+
dict(type="ToTensor"),
|
199 |
+
dict(
|
200 |
+
type="Collect",
|
201 |
+
keys=("coord", "grid_coord"),
|
202 |
+
feat_keys=["coord", "normal"],
|
203 |
+
),
|
204 |
+
],
|
205 |
+
aug_transform=[
|
206 |
+
[dict(type="RandomScale", scale=[1, 1], anisotropic=True)], # 1
|
207 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 2
|
208 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 3
|
209 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 4
|
210 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5
|
211 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 5
|
212 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 6
|
213 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 7
|
214 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 8
|
215 |
+
[dict(type="RandomScale", scale=[0.8, 1.2], anisotropic=True)], # 9
|
216 |
+
],
|
217 |
+
),
|
218 |
+
),
|
219 |
+
)
|
220 |
+
|
221 |
+
# hooks
|
222 |
+
hooks = [
|
223 |
+
dict(type="CheckpointLoader"),
|
224 |
+
dict(type="IterationTimer", warmup_iter=2),
|
225 |
+
dict(type="InformationWriter"),
|
226 |
+
dict(type="ClsEvaluator"),
|
227 |
+
dict(type="CheckpointSaver", save_freq=None),
|
228 |
+
dict(type="PreciseEvaluator", test_last=False),
|
229 |
+
]
|
230 |
+
|
231 |
+
# tester
|
232 |
+
test = dict(type="ClsVotingTester", num_repeat=100)
|
submodules/PointTransformerV3/Pointcept/configs/modelnet40/cls-spunet-v1m1-0-base.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
# misc custom setting
|
3 |
+
batch_size = 16 # bs: total bs in all gpus
|
4 |
+
# batch_size_val = 8
|
5 |
+
empty_cache = False
|
6 |
+
enable_amp = False
|
7 |
+
|
8 |
+
# model settings
|
9 |
+
model = dict(
|
10 |
+
type="DefaultClassifier",
|
11 |
+
num_classes=40,
|
12 |
+
backbone_embed_dim=256,
|
13 |
+
backbone=dict(
|
14 |
+
type="SpUNet-v1m1",
|
15 |
+
in_channels=6,
|
16 |
+
num_classes=0,
|
17 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
18 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
19 |
+
cls_mode=True,
|
20 |
+
),
|
21 |
+
criteria=[dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1)],
|
22 |
+
)
|
23 |
+
|
24 |
+
# scheduler settings
|
25 |
+
epoch = 200
|
26 |
+
optimizer = dict(type="SGD", lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
|
27 |
+
scheduler = dict(type="MultiStepLR", milestones=[0.6, 0.8], gamma=0.1)
|
28 |
+
|
29 |
+
# dataset settings
|
30 |
+
dataset_type = "ModelNetDataset"
|
31 |
+
data_root = "data/modelnet40_normal_resampled"
|
32 |
+
cache_data = False
|
33 |
+
class_names = [
|
34 |
+
"airplane",
|
35 |
+
"bathtub",
|
36 |
+
"bed",
|
37 |
+
"bench",
|
38 |
+
"bookshelf",
|
39 |
+
"bottle",
|
40 |
+
"bowl",
|
41 |
+
"car",
|
42 |
+
"chair",
|
43 |
+
"cone",
|
44 |
+
"cup",
|
45 |
+
"curtain",
|
46 |
+
"desk",
|
47 |
+
"door",
|
48 |
+
"dresser",
|
49 |
+
"flower_pot",
|
50 |
+
"glass_box",
|
51 |
+
"guitar",
|
52 |
+
"keyboard",
|
53 |
+
"lamp",
|
54 |
+
"laptop",
|
55 |
+
"mantel",
|
56 |
+
"monitor",
|
57 |
+
"night_stand",
|
58 |
+
"person",
|
59 |
+
"piano",
|
60 |
+
"plant",
|
61 |
+
"radio",
|
62 |
+
"range_hood",
|
63 |
+
"sink",
|
64 |
+
"sofa",
|
65 |
+
"stairs",
|
66 |
+
"stool",
|
67 |
+
"table",
|
68 |
+
"tent",
|
69 |
+
"toilet",
|
70 |
+
"tv_stand",
|
71 |
+
"vase",
|
72 |
+
"wardrobe",
|
73 |
+
"xbox",
|
74 |
+
]
|
75 |
+
|
76 |
+
data = dict(
|
77 |
+
num_classes=40,
|
78 |
+
ignore_index=-1,
|
79 |
+
names=class_names,
|
80 |
+
train=dict(
|
81 |
+
type=dataset_type,
|
82 |
+
split="train",
|
83 |
+
data_root=data_root,
|
84 |
+
class_names=class_names,
|
85 |
+
transform=[
|
86 |
+
dict(type="NormalizeCoord"),
|
87 |
+
# dict(type="CenterShift", apply_z=True),
|
88 |
+
# dict(type="RandomRotate", angle=[-1, 1], axis="z", center=[0, 0, 0], p=0.5),
|
89 |
+
# dict(type="RandomRotate", angle=[-1/24, 1/24], axis="x", p=0.5),
|
90 |
+
# dict(type="RandomRotate", angle=[-1/24, 1/24], axis="y", p=0.5),
|
91 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
92 |
+
dict(type="RandomShift", shift=((-0.2, 0.2), (-0.2, 0.2), (-0.2, 0.2))),
|
93 |
+
# dict(type="RandomFlip", p=0.5),
|
94 |
+
# dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
95 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
96 |
+
dict(
|
97 |
+
type="GridSample",
|
98 |
+
grid_size=0.01,
|
99 |
+
hash_type="fnv",
|
100 |
+
mode="train",
|
101 |
+
keys=("coord", "normal"),
|
102 |
+
return_grid_coord=True,
|
103 |
+
),
|
104 |
+
# dict(type="SphereCrop", point_max=10000, mode="random"),
|
105 |
+
# dict(type="CenterShift", apply_z=True),
|
106 |
+
dict(type="ShufflePoint"),
|
107 |
+
dict(type="ToTensor"),
|
108 |
+
dict(
|
109 |
+
type="Collect",
|
110 |
+
keys=("coord", "grid_coord", "category"),
|
111 |
+
feat_keys=["coord", "normal"],
|
112 |
+
),
|
113 |
+
],
|
114 |
+
test_mode=False,
|
115 |
+
),
|
116 |
+
val=dict(
|
117 |
+
type=dataset_type,
|
118 |
+
split="test",
|
119 |
+
data_root=data_root,
|
120 |
+
class_names=class_names,
|
121 |
+
transform=[
|
122 |
+
dict(type="NormalizeCoord"),
|
123 |
+
dict(
|
124 |
+
type="GridSample",
|
125 |
+
grid_size=0.01,
|
126 |
+
hash_type="fnv",
|
127 |
+
mode="train",
|
128 |
+
keys=("coord", "normal"),
|
129 |
+
return_grid_coord=True,
|
130 |
+
),
|
131 |
+
dict(type="ToTensor"),
|
132 |
+
dict(
|
133 |
+
type="Collect",
|
134 |
+
keys=("coord", "grid_coord", "category"),
|
135 |
+
feat_keys=["coord", "normal"],
|
136 |
+
),
|
137 |
+
],
|
138 |
+
test_mode=False,
|
139 |
+
),
|
140 |
+
test=dict(
|
141 |
+
type=dataset_type,
|
142 |
+
split="test",
|
143 |
+
data_root=data_root,
|
144 |
+
class_names=class_names,
|
145 |
+
transform=[
|
146 |
+
dict(type="NormalizeCoord"),
|
147 |
+
dict(
|
148 |
+
type="GridSample",
|
149 |
+
grid_size=0.01,
|
150 |
+
hash_type="fnv",
|
151 |
+
mode="train",
|
152 |
+
keys=("coord", "normal"),
|
153 |
+
return_grid_coord=True,
|
154 |
+
),
|
155 |
+
dict(type="ToTensor"),
|
156 |
+
dict(
|
157 |
+
type="Collect",
|
158 |
+
keys=("coord", "grid_coord", "category"),
|
159 |
+
feat_keys=["coord", "normal"],
|
160 |
+
),
|
161 |
+
],
|
162 |
+
test_mode=True,
|
163 |
+
),
|
164 |
+
)
|
165 |
+
|
166 |
+
# hooks
|
167 |
+
hooks = [
|
168 |
+
dict(type="CheckpointLoader"),
|
169 |
+
dict(type="IterationTimer", warmup_iter=2),
|
170 |
+
dict(type="InformationWriter"),
|
171 |
+
dict(type="ClsEvaluator"),
|
172 |
+
dict(type="CheckpointSaver", save_freq=None),
|
173 |
+
]
|
174 |
+
|
175 |
+
# tester
|
176 |
+
test = dict(type="ClsTester")
|
submodules/PointTransformerV3/Pointcept/configs/nuscenes/semseg-ppt-v1m1-0-nu-sk-wa-spunet.py
ADDED
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
|
11 |
+
# trainer
|
12 |
+
train = dict(
|
13 |
+
type="MultiDatasetTrainer",
|
14 |
+
)
|
15 |
+
|
16 |
+
# model settings
|
17 |
+
model = dict(
|
18 |
+
type="PPT-v1m1",
|
19 |
+
backbone=dict(
|
20 |
+
type="SpUNet-v1m3",
|
21 |
+
in_channels=4,
|
22 |
+
num_classes=0,
|
23 |
+
base_channels=32,
|
24 |
+
context_channels=256,
|
25 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
26 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
27 |
+
cls_mode=False,
|
28 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
29 |
+
zero_init=False,
|
30 |
+
norm_decouple=True,
|
31 |
+
norm_adaptive=False,
|
32 |
+
norm_affine=True,
|
33 |
+
),
|
34 |
+
criteria=[
|
35 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
36 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
37 |
+
],
|
38 |
+
backbone_out_channels=96,
|
39 |
+
context_channels=256,
|
40 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
41 |
+
template="[x]",
|
42 |
+
clip_model="ViT-B/16",
|
43 |
+
# fmt: off
|
44 |
+
class_name=(
|
45 |
+
# SemanticKITTI
|
46 |
+
"car", "bicycle", "motorcycle", "truck", "other vehicle",
|
47 |
+
"person", "person who rides a bicycle", "person who rides a motorcycle", "road", "parking",
|
48 |
+
"path for pedestrians at the side of a road", "other ground", "building", "fence", "vegetation",
|
49 |
+
"trunk", "terrain", "pole", "traffic sign",
|
50 |
+
# nuScenes
|
51 |
+
"barrier", "bicycle", "bus", "car", "construction vehicle",
|
52 |
+
"motorcycle", "pedestrian", "traffic cone", "trailer", "truck",
|
53 |
+
"path suitable or safe for driving", "other flat", "sidewalk", "terrain", "man made", "vegetation",
|
54 |
+
# waymo
|
55 |
+
"car", "truck", "bus", "other vehicle", "person who rides a motorcycle",
|
56 |
+
"person who rides a bicycle", "pedestrian", "sign", "traffic light", "pole",
|
57 |
+
"construction cone", "bicycle", "motorcycle", "building", "vegetation",
|
58 |
+
"tree trunk", "curb", "road", "lane marker", "other ground", "horizontal surface that can not drive",
|
59 |
+
"surface when pedestrians most likely to walk on",
|
60 |
+
),
|
61 |
+
valid_index=(
|
62 |
+
[i for i in range(19)],
|
63 |
+
[i for i in range(19, 19 + 16)],
|
64 |
+
[i for i in range(19 + 16, 19 + 16 + 22)],
|
65 |
+
),
|
66 |
+
# fmt: on
|
67 |
+
backbone_mode=False,
|
68 |
+
)
|
69 |
+
|
70 |
+
# scheduler settings
|
71 |
+
epoch = 50
|
72 |
+
eval_epoch = 50
|
73 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
74 |
+
scheduler = dict(
|
75 |
+
type="OneCycleLR",
|
76 |
+
max_lr=optimizer["lr"],
|
77 |
+
pct_start=0.04,
|
78 |
+
anneal_strategy="cos",
|
79 |
+
div_factor=10.0,
|
80 |
+
final_div_factor=100.0,
|
81 |
+
)
|
82 |
+
# param_dicts = [dict(keyword="modulation", lr=0.0002)]
|
83 |
+
|
84 |
+
# dataset settings
|
85 |
+
data = dict(
|
86 |
+
num_classes=16,
|
87 |
+
ignore_index=-1,
|
88 |
+
names=[
|
89 |
+
"barrier",
|
90 |
+
"bicycle",
|
91 |
+
"bus",
|
92 |
+
"car",
|
93 |
+
"construction_vehicle",
|
94 |
+
"motorcycle",
|
95 |
+
"pedestrian",
|
96 |
+
"traffic_cone",
|
97 |
+
"trailer",
|
98 |
+
"truck",
|
99 |
+
"driveable_surface",
|
100 |
+
"other_flat",
|
101 |
+
"sidewalk",
|
102 |
+
"terrain",
|
103 |
+
"manmade",
|
104 |
+
"vegetation",
|
105 |
+
],
|
106 |
+
train=dict(
|
107 |
+
type="ConcatDataset",
|
108 |
+
datasets=[
|
109 |
+
# nuScenes
|
110 |
+
dict(
|
111 |
+
type="NuScenesDataset",
|
112 |
+
split="train",
|
113 |
+
data_root="data/nuscenes",
|
114 |
+
transform=[
|
115 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
116 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
117 |
+
dict(
|
118 |
+
type="RandomRotate",
|
119 |
+
angle=[-1, 1],
|
120 |
+
axis="z",
|
121 |
+
center=[0, 0, 0],
|
122 |
+
p=0.5,
|
123 |
+
),
|
124 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
125 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
126 |
+
dict(
|
127 |
+
type="PointClip",
|
128 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
129 |
+
),
|
130 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
131 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
132 |
+
dict(type="RandomFlip", p=0.5),
|
133 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
134 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
135 |
+
dict(
|
136 |
+
type="GridSample",
|
137 |
+
grid_size=0.05,
|
138 |
+
hash_type="fnv",
|
139 |
+
mode="train",
|
140 |
+
keys=("coord", "strength", "segment"),
|
141 |
+
return_grid_coord=True,
|
142 |
+
),
|
143 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
144 |
+
# dict(type="CenterShift", apply_z=False),
|
145 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
146 |
+
dict(type="ToTensor"),
|
147 |
+
dict(
|
148 |
+
type="Collect",
|
149 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
150 |
+
feat_keys=("coord", "strength"),
|
151 |
+
),
|
152 |
+
],
|
153 |
+
test_mode=False,
|
154 |
+
ignore_index=-1,
|
155 |
+
loop=1,
|
156 |
+
),
|
157 |
+
# SemanticKITTI
|
158 |
+
dict(
|
159 |
+
type="SemanticKITTIDataset",
|
160 |
+
split="train",
|
161 |
+
data_root="data/semantic_kitti",
|
162 |
+
transform=[
|
163 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
164 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
165 |
+
dict(
|
166 |
+
type="RandomRotate",
|
167 |
+
angle=[-1, 1],
|
168 |
+
axis="z",
|
169 |
+
center=[0, 0, 0],
|
170 |
+
p=0.5,
|
171 |
+
),
|
172 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
173 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
174 |
+
dict(
|
175 |
+
type="PointClip",
|
176 |
+
point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
|
177 |
+
),
|
178 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
179 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
180 |
+
dict(type="RandomFlip", p=0.5),
|
181 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
182 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
183 |
+
dict(
|
184 |
+
type="GridSample",
|
185 |
+
grid_size=0.05,
|
186 |
+
hash_type="fnv",
|
187 |
+
mode="train",
|
188 |
+
keys=("coord", "strength", "segment"),
|
189 |
+
return_grid_coord=True,
|
190 |
+
),
|
191 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
192 |
+
# dict(type="CenterShift", apply_z=False),
|
193 |
+
dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
|
194 |
+
dict(type="ToTensor"),
|
195 |
+
dict(
|
196 |
+
type="Collect",
|
197 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
198 |
+
feat_keys=("coord", "strength"),
|
199 |
+
),
|
200 |
+
],
|
201 |
+
test_mode=False,
|
202 |
+
ignore_index=-1,
|
203 |
+
loop=1,
|
204 |
+
),
|
205 |
+
# Waymo
|
206 |
+
dict(
|
207 |
+
type="WaymoDataset",
|
208 |
+
split="training",
|
209 |
+
data_root="data/waymo",
|
210 |
+
transform=[
|
211 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
212 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
213 |
+
dict(
|
214 |
+
type="RandomRotate",
|
215 |
+
angle=[-1, 1],
|
216 |
+
axis="z",
|
217 |
+
center=[0, 0, 0],
|
218 |
+
p=0.5,
|
219 |
+
),
|
220 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
221 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
222 |
+
dict(
|
223 |
+
type="PointClip",
|
224 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
225 |
+
),
|
226 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
227 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
228 |
+
dict(type="RandomFlip", p=0.5),
|
229 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
230 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
231 |
+
dict(
|
232 |
+
type="GridSample",
|
233 |
+
grid_size=0.05,
|
234 |
+
hash_type="fnv",
|
235 |
+
mode="train",
|
236 |
+
keys=("coord", "strength", "segment"),
|
237 |
+
return_grid_coord=True,
|
238 |
+
),
|
239 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
240 |
+
# dict(type="CenterShift", apply_z=False),
|
241 |
+
dict(type="Add", keys_dict={"condition": "Waymo"}),
|
242 |
+
dict(type="ToTensor"),
|
243 |
+
dict(
|
244 |
+
type="Collect",
|
245 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
246 |
+
feat_keys=("coord", "strength"),
|
247 |
+
),
|
248 |
+
],
|
249 |
+
test_mode=False,
|
250 |
+
ignore_index=-1,
|
251 |
+
loop=1,
|
252 |
+
),
|
253 |
+
],
|
254 |
+
),
|
255 |
+
val=dict(
|
256 |
+
type="NuScenesDataset",
|
257 |
+
split="val",
|
258 |
+
data_root="data/nuscenes",
|
259 |
+
transform=[
|
260 |
+
dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
|
261 |
+
dict(
|
262 |
+
type="GridSample",
|
263 |
+
grid_size=0.05,
|
264 |
+
hash_type="fnv",
|
265 |
+
mode="train",
|
266 |
+
keys=("coord", "strength", "segment"),
|
267 |
+
return_grid_coord=True,
|
268 |
+
),
|
269 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
270 |
+
dict(type="ToTensor"),
|
271 |
+
dict(
|
272 |
+
type="Collect",
|
273 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
274 |
+
feat_keys=("coord", "strength"),
|
275 |
+
),
|
276 |
+
],
|
277 |
+
test_mode=False,
|
278 |
+
ignore_index=-1,
|
279 |
+
),
|
280 |
+
test=dict(
|
281 |
+
type="NuScenesDataset",
|
282 |
+
split="val",
|
283 |
+
data_root="data/nuscenes",
|
284 |
+
transform=[
|
285 |
+
dict(type="Copy", keys_dict={"segment": "origin_segment"}),
|
286 |
+
dict(
|
287 |
+
type="GridSample",
|
288 |
+
grid_size=0.025,
|
289 |
+
hash_type="fnv",
|
290 |
+
mode="train",
|
291 |
+
keys=("coord", "strength", "segment"),
|
292 |
+
return_inverse=True,
|
293 |
+
),
|
294 |
+
],
|
295 |
+
test_mode=True,
|
296 |
+
test_cfg=dict(
|
297 |
+
voxelize=dict(
|
298 |
+
type="GridSample",
|
299 |
+
grid_size=0.05,
|
300 |
+
hash_type="fnv",
|
301 |
+
mode="test",
|
302 |
+
return_grid_coord=True,
|
303 |
+
keys=("coord", "strength"),
|
304 |
+
),
|
305 |
+
crop=None,
|
306 |
+
post_transform=[
|
307 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
308 |
+
dict(type="ToTensor"),
|
309 |
+
dict(
|
310 |
+
type="Collect",
|
311 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
312 |
+
feat_keys=("coord", "strength"),
|
313 |
+
),
|
314 |
+
],
|
315 |
+
aug_transform=[
|
316 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
317 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
318 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
319 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
320 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
321 |
+
[
|
322 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
323 |
+
dict(type="RandomFlip", p=1),
|
324 |
+
],
|
325 |
+
[
|
326 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
327 |
+
dict(type="RandomFlip", p=1),
|
328 |
+
],
|
329 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
330 |
+
[
|
331 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
332 |
+
dict(type="RandomFlip", p=1),
|
333 |
+
],
|
334 |
+
[
|
335 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
336 |
+
dict(type="RandomFlip", p=1),
|
337 |
+
],
|
338 |
+
],
|
339 |
+
),
|
340 |
+
ignore_index=-1,
|
341 |
+
),
|
342 |
+
)
|
submodules/PointTransformerV3/Pointcept/configs/nuscenes/semseg-ppt-v1m2-0-nu-sk-wa-spunet.py
ADDED
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
|
11 |
+
# trainer
|
12 |
+
train = dict(
|
13 |
+
type="MultiDatasetTrainer",
|
14 |
+
)
|
15 |
+
|
16 |
+
# model settings
|
17 |
+
model = dict(
|
18 |
+
type="PPT-v1m2",
|
19 |
+
backbone=dict(
|
20 |
+
type="SpUNet-v1m3",
|
21 |
+
in_channels=4,
|
22 |
+
num_classes=0,
|
23 |
+
base_channels=32,
|
24 |
+
context_channels=256,
|
25 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
26 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
27 |
+
cls_mode=False,
|
28 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
29 |
+
zero_init=False,
|
30 |
+
norm_decouple=True,
|
31 |
+
norm_adaptive=False,
|
32 |
+
norm_affine=True,
|
33 |
+
),
|
34 |
+
criteria=[
|
35 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
36 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
37 |
+
],
|
38 |
+
backbone_out_channels=96,
|
39 |
+
context_channels=256,
|
40 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
41 |
+
num_classes=(19, 16, 22),
|
42 |
+
)
|
43 |
+
|
44 |
+
# scheduler settings
|
45 |
+
epoch = 50
|
46 |
+
eval_epoch = 50
|
47 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
48 |
+
scheduler = dict(
|
49 |
+
type="OneCycleLR",
|
50 |
+
max_lr=optimizer["lr"],
|
51 |
+
pct_start=0.04,
|
52 |
+
anneal_strategy="cos",
|
53 |
+
div_factor=10.0,
|
54 |
+
final_div_factor=100.0,
|
55 |
+
)
|
56 |
+
# param_dicts = [dict(keyword="modulation", lr=0.0002)]
|
57 |
+
|
58 |
+
# dataset settings
|
59 |
+
data = dict(
|
60 |
+
num_classes=16,
|
61 |
+
ignore_index=-1,
|
62 |
+
names=[
|
63 |
+
"barrier",
|
64 |
+
"bicycle",
|
65 |
+
"bus",
|
66 |
+
"car",
|
67 |
+
"construction_vehicle",
|
68 |
+
"motorcycle",
|
69 |
+
"pedestrian",
|
70 |
+
"traffic_cone",
|
71 |
+
"trailer",
|
72 |
+
"truck",
|
73 |
+
"driveable_surface",
|
74 |
+
"other_flat",
|
75 |
+
"sidewalk",
|
76 |
+
"terrain",
|
77 |
+
"manmade",
|
78 |
+
"vegetation",
|
79 |
+
],
|
80 |
+
train=dict(
|
81 |
+
type="ConcatDataset",
|
82 |
+
datasets=[
|
83 |
+
# nuScenes
|
84 |
+
dict(
|
85 |
+
type="NuScenesDataset",
|
86 |
+
split="train",
|
87 |
+
data_root="data/nuscenes",
|
88 |
+
transform=[
|
89 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
90 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
91 |
+
dict(
|
92 |
+
type="RandomRotate",
|
93 |
+
angle=[-1, 1],
|
94 |
+
axis="z",
|
95 |
+
center=[0, 0, 0],
|
96 |
+
p=0.5,
|
97 |
+
),
|
98 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
99 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
100 |
+
dict(
|
101 |
+
type="PointClip",
|
102 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
103 |
+
),
|
104 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
105 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
106 |
+
dict(type="RandomFlip", p=0.5),
|
107 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
108 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
109 |
+
dict(
|
110 |
+
type="GridSample",
|
111 |
+
grid_size=0.05,
|
112 |
+
hash_type="fnv",
|
113 |
+
mode="train",
|
114 |
+
keys=("coord", "strength", "segment"),
|
115 |
+
return_grid_coord=True,
|
116 |
+
),
|
117 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
118 |
+
# dict(type="CenterShift", apply_z=False),
|
119 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
120 |
+
dict(type="ToTensor"),
|
121 |
+
dict(
|
122 |
+
type="Collect",
|
123 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
124 |
+
feat_keys=("coord", "strength"),
|
125 |
+
),
|
126 |
+
],
|
127 |
+
test_mode=False,
|
128 |
+
ignore_index=-1,
|
129 |
+
loop=1,
|
130 |
+
),
|
131 |
+
# SemanticKITTI
|
132 |
+
dict(
|
133 |
+
type="SemanticKITTIDataset",
|
134 |
+
split="train",
|
135 |
+
data_root="data/semantic_kitti",
|
136 |
+
transform=[
|
137 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
138 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
139 |
+
dict(
|
140 |
+
type="RandomRotate",
|
141 |
+
angle=[-1, 1],
|
142 |
+
axis="z",
|
143 |
+
center=[0, 0, 0],
|
144 |
+
p=0.5,
|
145 |
+
),
|
146 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
147 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
148 |
+
dict(
|
149 |
+
type="PointClip",
|
150 |
+
point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
|
151 |
+
),
|
152 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
153 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
154 |
+
dict(type="RandomFlip", p=0.5),
|
155 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
156 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
157 |
+
dict(
|
158 |
+
type="GridSample",
|
159 |
+
grid_size=0.05,
|
160 |
+
hash_type="fnv",
|
161 |
+
mode="train",
|
162 |
+
keys=("coord", "strength", "segment"),
|
163 |
+
return_grid_coord=True,
|
164 |
+
),
|
165 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
166 |
+
# dict(type="CenterShift", apply_z=False),
|
167 |
+
dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
|
168 |
+
dict(type="ToTensor"),
|
169 |
+
dict(
|
170 |
+
type="Collect",
|
171 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
172 |
+
feat_keys=("coord", "strength"),
|
173 |
+
),
|
174 |
+
],
|
175 |
+
test_mode=False,
|
176 |
+
ignore_index=-1,
|
177 |
+
loop=1,
|
178 |
+
),
|
179 |
+
# Waymo
|
180 |
+
dict(
|
181 |
+
type="WaymoDataset",
|
182 |
+
split="training",
|
183 |
+
data_root="data/waymo",
|
184 |
+
transform=[
|
185 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
186 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
187 |
+
dict(
|
188 |
+
type="RandomRotate",
|
189 |
+
angle=[-1, 1],
|
190 |
+
axis="z",
|
191 |
+
center=[0, 0, 0],
|
192 |
+
p=0.5,
|
193 |
+
),
|
194 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
195 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
196 |
+
dict(
|
197 |
+
type="PointClip",
|
198 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
199 |
+
),
|
200 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
201 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
202 |
+
dict(type="RandomFlip", p=0.5),
|
203 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
204 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
205 |
+
dict(
|
206 |
+
type="GridSample",
|
207 |
+
grid_size=0.05,
|
208 |
+
hash_type="fnv",
|
209 |
+
mode="train",
|
210 |
+
keys=("coord", "strength", "segment"),
|
211 |
+
return_grid_coord=True,
|
212 |
+
),
|
213 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
214 |
+
# dict(type="CenterShift", apply_z=False),
|
215 |
+
dict(type="Add", keys_dict={"condition": "Waymo"}),
|
216 |
+
dict(type="ToTensor"),
|
217 |
+
dict(
|
218 |
+
type="Collect",
|
219 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
220 |
+
feat_keys=("coord", "strength"),
|
221 |
+
),
|
222 |
+
],
|
223 |
+
test_mode=False,
|
224 |
+
ignore_index=-1,
|
225 |
+
loop=1,
|
226 |
+
),
|
227 |
+
],
|
228 |
+
),
|
229 |
+
val=dict(
|
230 |
+
type="NuScenesDataset",
|
231 |
+
split="val",
|
232 |
+
data_root="data/nuscenes",
|
233 |
+
transform=[
|
234 |
+
dict(type="PointClip", point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2)),
|
235 |
+
dict(
|
236 |
+
type="GridSample",
|
237 |
+
grid_size=0.05,
|
238 |
+
hash_type="fnv",
|
239 |
+
mode="train",
|
240 |
+
keys=("coord", "strength", "segment"),
|
241 |
+
return_grid_coord=True,
|
242 |
+
),
|
243 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
244 |
+
dict(type="ToTensor"),
|
245 |
+
dict(
|
246 |
+
type="Collect",
|
247 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
248 |
+
feat_keys=("coord", "strength"),
|
249 |
+
),
|
250 |
+
],
|
251 |
+
test_mode=False,
|
252 |
+
ignore_index=-1,
|
253 |
+
),
|
254 |
+
test=dict(
|
255 |
+
type="NuScenesDataset",
|
256 |
+
split="val",
|
257 |
+
data_root="data/nuscenes",
|
258 |
+
transform=[
|
259 |
+
dict(type="Copy", keys_dict={"segment": "origin_segment"}),
|
260 |
+
dict(
|
261 |
+
type="GridSample",
|
262 |
+
grid_size=0.025,
|
263 |
+
hash_type="fnv",
|
264 |
+
mode="train",
|
265 |
+
keys=("coord", "strength", "segment"),
|
266 |
+
return_inverse=True,
|
267 |
+
),
|
268 |
+
],
|
269 |
+
test_mode=True,
|
270 |
+
test_cfg=dict(
|
271 |
+
voxelize=dict(
|
272 |
+
type="GridSample",
|
273 |
+
grid_size=0.05,
|
274 |
+
hash_type="fnv",
|
275 |
+
mode="test",
|
276 |
+
return_grid_coord=True,
|
277 |
+
keys=("coord", "strength"),
|
278 |
+
),
|
279 |
+
crop=None,
|
280 |
+
post_transform=[
|
281 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
282 |
+
dict(type="ToTensor"),
|
283 |
+
dict(
|
284 |
+
type="Collect",
|
285 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
286 |
+
feat_keys=("coord", "strength"),
|
287 |
+
),
|
288 |
+
],
|
289 |
+
aug_transform=[
|
290 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
291 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
292 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
293 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
294 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
295 |
+
[
|
296 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
297 |
+
dict(type="RandomFlip", p=1),
|
298 |
+
],
|
299 |
+
[
|
300 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
301 |
+
dict(type="RandomFlip", p=1),
|
302 |
+
],
|
303 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
304 |
+
[
|
305 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
306 |
+
dict(type="RandomFlip", p=1),
|
307 |
+
],
|
308 |
+
[
|
309 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
310 |
+
dict(type="RandomFlip", p=1),
|
311 |
+
],
|
312 |
+
],
|
313 |
+
),
|
314 |
+
ignore_index=-1,
|
315 |
+
),
|
316 |
+
)
|
submodules/PointTransformerV3/Pointcept/configs/nuscenes/semseg-ppt-v1m2-1-nu-sk-wa-spunet-submit.py
ADDED
@@ -0,0 +1,292 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_base_ = ["../_base_/default_runtime.py"]
|
2 |
+
|
3 |
+
# misc custom setting
|
4 |
+
batch_size = 12 # bs: total bs in all gpus
|
5 |
+
num_worker = 24
|
6 |
+
mix_prob = 0.8
|
7 |
+
empty_cache = False
|
8 |
+
enable_amp = True
|
9 |
+
find_unused_parameters = True
|
10 |
+
evaluate = False
|
11 |
+
|
12 |
+
# trainer
|
13 |
+
train = dict(
|
14 |
+
type="MultiDatasetTrainer",
|
15 |
+
)
|
16 |
+
|
17 |
+
# model settings
|
18 |
+
model = dict(
|
19 |
+
type="PPT-v1m2",
|
20 |
+
backbone=dict(
|
21 |
+
type="SpUNet-v1m3",
|
22 |
+
in_channels=4,
|
23 |
+
num_classes=0,
|
24 |
+
base_channels=32,
|
25 |
+
context_channels=256,
|
26 |
+
channels=(32, 64, 128, 256, 256, 128, 96, 96),
|
27 |
+
layers=(2, 3, 4, 6, 2, 2, 2, 2),
|
28 |
+
cls_mode=False,
|
29 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
30 |
+
zero_init=False,
|
31 |
+
norm_decouple=True,
|
32 |
+
norm_adaptive=False,
|
33 |
+
norm_affine=True,
|
34 |
+
),
|
35 |
+
criteria=[
|
36 |
+
dict(type="CrossEntropyLoss", loss_weight=1.0, ignore_index=-1),
|
37 |
+
dict(type="LovaszLoss", mode="multiclass", loss_weight=1.0, ignore_index=-1),
|
38 |
+
],
|
39 |
+
backbone_out_channels=96,
|
40 |
+
context_channels=256,
|
41 |
+
conditions=("SemanticKITTI", "nuScenes", "Waymo"),
|
42 |
+
num_classes=(19, 16, 22),
|
43 |
+
)
|
44 |
+
|
45 |
+
# scheduler settings
|
46 |
+
epoch = 50
|
47 |
+
eval_epoch = 50
|
48 |
+
optimizer = dict(type="AdamW", lr=0.002, weight_decay=0.005)
|
49 |
+
scheduler = dict(
|
50 |
+
type="OneCycleLR",
|
51 |
+
max_lr=optimizer["lr"],
|
52 |
+
pct_start=0.04,
|
53 |
+
anneal_strategy="cos",
|
54 |
+
div_factor=10.0,
|
55 |
+
final_div_factor=100.0,
|
56 |
+
)
|
57 |
+
# param_dicts = [dict(keyword="modulation", lr=0.0002)]
|
58 |
+
|
59 |
+
# dataset settings
|
60 |
+
data = dict(
|
61 |
+
num_classes=16,
|
62 |
+
ignore_index=-1,
|
63 |
+
names=[
|
64 |
+
"barrier",
|
65 |
+
"bicycle",
|
66 |
+
"bus",
|
67 |
+
"car",
|
68 |
+
"construction_vehicle",
|
69 |
+
"motorcycle",
|
70 |
+
"pedestrian",
|
71 |
+
"traffic_cone",
|
72 |
+
"trailer",
|
73 |
+
"truck",
|
74 |
+
"driveable_surface",
|
75 |
+
"other_flat",
|
76 |
+
"sidewalk",
|
77 |
+
"terrain",
|
78 |
+
"manmade",
|
79 |
+
"vegetation",
|
80 |
+
],
|
81 |
+
train=dict(
|
82 |
+
type="ConcatDataset",
|
83 |
+
datasets=[
|
84 |
+
# nuScenes
|
85 |
+
dict(
|
86 |
+
type="NuScenesDataset",
|
87 |
+
split=["train", "val"],
|
88 |
+
data_root="data/nuscenes",
|
89 |
+
transform=[
|
90 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
91 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis='z', p=0.75),
|
92 |
+
dict(
|
93 |
+
type="RandomRotate",
|
94 |
+
angle=[-1, 1],
|
95 |
+
axis="z",
|
96 |
+
center=[0, 0, 0],
|
97 |
+
p=0.5,
|
98 |
+
),
|
99 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='x', p=0.5),
|
100 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis='y', p=0.5),
|
101 |
+
dict(
|
102 |
+
type="PointClip",
|
103 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
104 |
+
),
|
105 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
106 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
107 |
+
dict(type="RandomFlip", p=0.5),
|
108 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
109 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
110 |
+
dict(
|
111 |
+
type="GridSample",
|
112 |
+
grid_size=0.05,
|
113 |
+
hash_type="fnv",
|
114 |
+
mode="train",
|
115 |
+
keys=("coord", "strength", "segment"),
|
116 |
+
return_grid_coord=True,
|
117 |
+
),
|
118 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
119 |
+
# dict(type="CenterShift", apply_z=False),
|
120 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
121 |
+
dict(type="ToTensor"),
|
122 |
+
dict(
|
123 |
+
type="Collect",
|
124 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
125 |
+
feat_keys=("coord", "strength"),
|
126 |
+
),
|
127 |
+
],
|
128 |
+
test_mode=False,
|
129 |
+
ignore_index=-1,
|
130 |
+
loop=1,
|
131 |
+
),
|
132 |
+
# SemanticKITTI
|
133 |
+
dict(
|
134 |
+
type="SemanticKITTIDataset",
|
135 |
+
split=["train", "val"],
|
136 |
+
data_root="data/semantic_kitti",
|
137 |
+
transform=[
|
138 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
139 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
140 |
+
dict(
|
141 |
+
type="RandomRotate",
|
142 |
+
angle=[-1, 1],
|
143 |
+
axis="z",
|
144 |
+
center=[0, 0, 0],
|
145 |
+
p=0.5,
|
146 |
+
),
|
147 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
148 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
149 |
+
dict(
|
150 |
+
type="PointClip",
|
151 |
+
point_cloud_range=(-75.2, -75.2, -4, 75.2, 75.2, 2),
|
152 |
+
),
|
153 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
154 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
155 |
+
dict(type="RandomFlip", p=0.5),
|
156 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
157 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
158 |
+
dict(
|
159 |
+
type="GridSample",
|
160 |
+
grid_size=0.05,
|
161 |
+
hash_type="fnv",
|
162 |
+
mode="train",
|
163 |
+
keys=("coord", "strength", "segment"),
|
164 |
+
return_grid_coord=True,
|
165 |
+
),
|
166 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
167 |
+
# dict(type="CenterShift", apply_z=False),
|
168 |
+
dict(type="Add", keys_dict={"condition": "SemanticKITTI"}),
|
169 |
+
dict(type="ToTensor"),
|
170 |
+
dict(
|
171 |
+
type="Collect",
|
172 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
173 |
+
feat_keys=("coord", "strength"),
|
174 |
+
),
|
175 |
+
],
|
176 |
+
test_mode=False,
|
177 |
+
ignore_index=-1,
|
178 |
+
loop=1,
|
179 |
+
),
|
180 |
+
# Waymo
|
181 |
+
dict(
|
182 |
+
type="WaymoDataset",
|
183 |
+
split=["training", "validation"],
|
184 |
+
data_root="data/waymo",
|
185 |
+
transform=[
|
186 |
+
# dict(type="RandomDropout", dropout_ratio=0.2, dropout_application_ratio=0.2),
|
187 |
+
# dict(type="RandomRotateTargetAngle", angle=(1/2, 1, 3/2), center=[0, 0, 0], axis="z", p=0.75),
|
188 |
+
dict(
|
189 |
+
type="RandomRotate",
|
190 |
+
angle=[-1, 1],
|
191 |
+
axis="z",
|
192 |
+
center=[0, 0, 0],
|
193 |
+
p=0.5,
|
194 |
+
),
|
195 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="x", p=0.5),
|
196 |
+
# dict(type="RandomRotate", angle=[-1/6, 1/6], axis="y", p=0.5),
|
197 |
+
dict(
|
198 |
+
type="PointClip",
|
199 |
+
point_cloud_range=(-35.2, -35.2, -4, 35.2, 35.2, 2),
|
200 |
+
),
|
201 |
+
dict(type="RandomScale", scale=[0.9, 1.1]),
|
202 |
+
# dict(type="RandomShift", shift=[0.2, 0.2, 0.2]),
|
203 |
+
dict(type="RandomFlip", p=0.5),
|
204 |
+
dict(type="RandomJitter", sigma=0.005, clip=0.02),
|
205 |
+
# dict(type="ElasticDistortion", distortion_params=[[0.2, 0.4], [0.8, 1.6]]),
|
206 |
+
dict(
|
207 |
+
type="GridSample",
|
208 |
+
grid_size=0.05,
|
209 |
+
hash_type="fnv",
|
210 |
+
mode="train",
|
211 |
+
keys=("coord", "strength", "segment"),
|
212 |
+
return_grid_coord=True,
|
213 |
+
),
|
214 |
+
# dict(type="SphereCrop", point_max=1000000, mode="random"),
|
215 |
+
# dict(type="CenterShift", apply_z=False),
|
216 |
+
dict(type="Add", keys_dict={"condition": "Waymo"}),
|
217 |
+
dict(type="ToTensor"),
|
218 |
+
dict(
|
219 |
+
type="Collect",
|
220 |
+
keys=("coord", "grid_coord", "segment", "condition"),
|
221 |
+
feat_keys=("coord", "strength"),
|
222 |
+
),
|
223 |
+
],
|
224 |
+
test_mode=False,
|
225 |
+
ignore_index=-1,
|
226 |
+
loop=1,
|
227 |
+
),
|
228 |
+
],
|
229 |
+
),
|
230 |
+
test=dict(
|
231 |
+
type="NuScenesDataset",
|
232 |
+
split="test",
|
233 |
+
data_root="data/nuscenes",
|
234 |
+
transform=[
|
235 |
+
dict(type="Copy", keys_dict={"segment": "origin_segment"}),
|
236 |
+
dict(
|
237 |
+
type="GridSample",
|
238 |
+
grid_size=0.025,
|
239 |
+
hash_type="fnv",
|
240 |
+
mode="train",
|
241 |
+
keys=("coord", "strength", "segment"),
|
242 |
+
return_inverse=True,
|
243 |
+
),
|
244 |
+
],
|
245 |
+
test_mode=True,
|
246 |
+
test_cfg=dict(
|
247 |
+
voxelize=dict(
|
248 |
+
type="GridSample",
|
249 |
+
grid_size=0.05,
|
250 |
+
hash_type="fnv",
|
251 |
+
mode="test",
|
252 |
+
return_grid_coord=True,
|
253 |
+
keys=("coord", "strength"),
|
254 |
+
),
|
255 |
+
crop=None,
|
256 |
+
post_transform=[
|
257 |
+
dict(type="Add", keys_dict={"condition": "nuScenes"}),
|
258 |
+
dict(type="ToTensor"),
|
259 |
+
dict(
|
260 |
+
type="Collect",
|
261 |
+
keys=("coord", "grid_coord", "index", "condition"),
|
262 |
+
feat_keys=("coord", "strength"),
|
263 |
+
),
|
264 |
+
],
|
265 |
+
aug_transform=[
|
266 |
+
[dict(type="RandomScale", scale=[0.9, 0.9])],
|
267 |
+
[dict(type="RandomScale", scale=[0.95, 0.95])],
|
268 |
+
[dict(type="RandomScale", scale=[1, 1])],
|
269 |
+
[dict(type="RandomScale", scale=[1.05, 1.05])],
|
270 |
+
[dict(type="RandomScale", scale=[1.1, 1.1])],
|
271 |
+
[
|
272 |
+
dict(type="RandomScale", scale=[0.9, 0.9]),
|
273 |
+
dict(type="RandomFlip", p=1),
|
274 |
+
],
|
275 |
+
[
|
276 |
+
dict(type="RandomScale", scale=[0.95, 0.95]),
|
277 |
+
dict(type="RandomFlip", p=1),
|
278 |
+
],
|
279 |
+
[dict(type="RandomScale", scale=[1, 1]), dict(type="RandomFlip", p=1)],
|
280 |
+
[
|
281 |
+
dict(type="RandomScale", scale=[1.05, 1.05]),
|
282 |
+
dict(type="RandomFlip", p=1),
|
283 |
+
],
|
284 |
+
[
|
285 |
+
dict(type="RandomScale", scale=[1.1, 1.1]),
|
286 |
+
dict(type="RandomFlip", p=1),
|
287 |
+
],
|
288 |
+
],
|
289 |
+
),
|
290 |
+
ignore_index=-1,
|
291 |
+
),
|
292 |
+
)
|