jinlinyi commited on
Commit
f73740f
·
1 Parent(s): 0a8337f

improve model

Browse files
.gitattributes CHANGED
@@ -41,3 +41,6 @@ assets/imgs/907px-Vincent_van_Gogh_-_De_slaapkamer_-_Google_Art_Project.jpg filt
41
  assets/imgs/AdobeStock_286429091.jpeg filter=lfs diff=lfs merge=lfs -text
42
  assets/imgs/AdobeStock_331358641.jpeg filter=lfs diff=lfs merge=lfs -text
43
  assets/imgs/ filter=lfs diff=lfs merge=lfs -text
 
 
 
 
41
  assets/imgs/AdobeStock_286429091.jpeg filter=lfs diff=lfs merge=lfs -text
42
  assets/imgs/AdobeStock_331358641.jpeg filter=lfs diff=lfs merge=lfs -text
43
  assets/imgs/ filter=lfs diff=lfs merge=lfs -text
44
+ assets/imgs/epic.png filter=lfs diff=lfs merge=lfs -text
45
+ models/paramnet_360cities_edina_rpfpp.pth filter=lfs diff=lfs merge=lfs -text
46
+ models/paramnet_360cities_edina_rpf.pth filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -1,8 +1,12 @@
1
 
2
  import os
 
 
3
  os.system(f"pip install git+https://github.com/jinlinyi/PerspectiveFields.git@dev#egg=perspective2d")
4
 
5
 
 
 
6
  import gradio as gr
7
  import cv2
8
  import copy
@@ -136,6 +140,21 @@ print(examples)
136
 
137
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
138
  model_zoo = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  'PersNet-360Cities': {
140
  'weights': ['https://www.dropbox.com/s/czqrepqe7x70b7y/cvpr2023.pth'],
141
  'opts': ['MODEL.WEIGHTS', 'models/cvpr2023.pth', 'MODEL.DEVICE', device,],
 
1
 
2
  import os
3
+ os.system(f"pip install -U openmim")
4
+ os.system(f"mim install mmcv")
5
  os.system(f"pip install git+https://github.com/jinlinyi/PerspectiveFields.git@dev#egg=perspective2d")
6
 
7
 
8
+
9
+
10
  import gradio as gr
11
  import cv2
12
  import copy
 
140
 
141
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
142
  model_zoo = {
143
+
144
+ 'Paramnet-360Cities-edina-centered': {
145
+ 'weights': ['https://www.dropbox.com/s/z2dja70bgy007su/paramnet_360cities_edina_rpf.pth'],
146
+ 'opts': ['MODEL.WEIGHTS', 'models/paramnet_360cities_edina_rpf.pth', 'MODEL.DEVICE', device,],
147
+ 'config_file': 'models/paramnet_360cities_edina_rpf.yaml',
148
+ 'param': True,
149
+ },
150
+
151
+ 'Paramnet-360Cities-edina-uncentered': {
152
+ 'weights': ['https://www.dropbox.com/s/nt29e1pi83mm1va/paramnet_360cities_edina_rpfpp.pth'],
153
+ 'opts': ['MODEL.WEIGHTS', 'models/paramnet_360cities_edina_rpfpp.pth', 'MODEL.DEVICE', device,],
154
+ 'config_file': 'models/paramnet_360cities_edina_rpfpp.yaml',
155
+ 'param': True,
156
+ },
157
+
158
  'PersNet-360Cities': {
159
  'weights': ['https://www.dropbox.com/s/czqrepqe7x70b7y/cvpr2023.pth'],
160
  'opts': ['MODEL.WEIGHTS', 'models/cvpr2023.pth', 'MODEL.DEVICE', device,],
assets/imgs/epic.png ADDED

Git LFS Details

  • SHA256: c2a42a05c6498aca8a92355bbe065f49865b0ab60a11e96b6e1458ac1e5d237a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.84 MB
models/paramnet_360cities_edina_rpf.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58fe0285fe5d4592aec77e9ef57ac94273deb79bcb99f27d08bae68a2d1efc4a
3
+ size 837147876
models/paramnet_360cities_edina_rpf.yaml ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ AUGMENTATION: true
5
+ AUGMENTATION_FUN: uniform_vfov_crop_resize
6
+ AUGMENTATION_TYPE: geometry
7
+ FILTER_EMPTY_ANNOTATIONS: true
8
+ NO_GEOMETRY_AUG: false
9
+ NUM_WORKERS: 8
10
+ REPEAT_THRESHOLD: 0.0
11
+ RESIZE:
12
+ - 320
13
+ - 320
14
+ SAMPLER_TRAIN: TrainingSampler
15
+ DATASETS:
16
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
17
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
18
+ PROPOSAL_FILES_TEST: []
19
+ PROPOSAL_FILES_TRAIN: []
20
+ TEST:
21
+ - edina_test_crop_vfov
22
+ TRAIN:
23
+ - edina_train
24
+ - cities360_train
25
+ DEBUG_ON: false
26
+ GLOBAL:
27
+ HACK: 1.0
28
+ INPUT:
29
+ CROP:
30
+ ENABLED: false
31
+ SIZE:
32
+ - 0.9
33
+ - 0.9
34
+ TYPE: relative_range
35
+ FORMAT: BGR
36
+ MASK_FORMAT: polygon
37
+ MAX_SIZE_TEST: 1333
38
+ MAX_SIZE_TRAIN: 1333
39
+ MIN_SIZE_TEST: 800
40
+ MIN_SIZE_TRAIN:
41
+ - 800
42
+ MIN_SIZE_TRAIN_SAMPLING: choice
43
+ ONLINE_CROP: false
44
+ RANDOM_FLIP: horizontal
45
+ MODEL:
46
+ ANCHOR_GENERATOR:
47
+ ANGLES:
48
+ - - -90
49
+ - 0
50
+ - 90
51
+ ASPECT_RATIOS:
52
+ - - 0.5
53
+ - 1.0
54
+ - 2.0
55
+ NAME: DefaultAnchorGenerator
56
+ OFFSET: 0.0
57
+ SIZES:
58
+ - - 32
59
+ - 64
60
+ - 128
61
+ - 256
62
+ - 512
63
+ BACKBONE:
64
+ FREEZE_AT: 2
65
+ NAME: build_mit_backbone
66
+ CENTER_ON: false
67
+ DEVICE: cuda
68
+ FPN:
69
+ FUSE_TYPE: sum
70
+ IN_FEATURES: []
71
+ NORM: ''
72
+ OUT_CHANNELS: 256
73
+ FPN_CENTER_HEAD:
74
+ COMMON_STRIDE: 4
75
+ CONVS_DIM: 128
76
+ IGNORE_VALUE: 360
77
+ IN_FEATURES:
78
+ - p2
79
+ - p3
80
+ - p4
81
+ - p5
82
+ LOSS_WEIGHT: 1.0
83
+ NAME: CenterFPNHead
84
+ NORM: GN
85
+ NUM_CLASSES: 30
86
+ FPN_GRAVITY_HEAD:
87
+ COMMON_STRIDE: 4
88
+ CONVS_DIM: 128
89
+ IGNORE_VALUE: 360
90
+ IN_FEATURES:
91
+ - p2
92
+ - p3
93
+ - p4
94
+ - p5
95
+ LOSS_WEIGHT: 1.0
96
+ NAME: GravityFPNHead
97
+ NORM: GN
98
+ NUM_CLASSES: 361
99
+ FPN_HEADS:
100
+ NAME: StandardFPNHeads
101
+ FPN_LATITUDE_HEAD:
102
+ COMMON_STRIDE: 4
103
+ CONVS_DIM: 128
104
+ IGNORE_VALUE: -1
105
+ IN_FEATURES:
106
+ - p2
107
+ - p3
108
+ - p4
109
+ - p5
110
+ LOSS_WEIGHT: 1.0
111
+ NAME: LatitudeFPNHead
112
+ NORM: GN
113
+ NUM_CLASSES: 9
114
+ FREEZE: []
115
+ GRAVITY_DECODER:
116
+ IGNORE_VALUE: 72
117
+ LOSS_TYPE: regression
118
+ LOSS_WEIGHT: 1.0
119
+ NAME: GravityDecoder
120
+ NUM_CLASSES: 73
121
+ GRAVITY_ON: true
122
+ HEIGHT_DECODER:
123
+ LOSS_WEIGHT: 1.0
124
+ NAME: HeightDecoder
125
+ HEIGHT_ON: false
126
+ KEYPOINT_ON: false
127
+ LATITUDE_DECODER:
128
+ IGNORE_VALUE: -1
129
+ LOSS_TYPE: regression
130
+ LOSS_WEIGHT: 1.0
131
+ NAME: LatitudeDecoder
132
+ NUM_CLASSES: 1
133
+ LATITUDE_ON: true
134
+ LOAD_PROPOSALS: false
135
+ MASK_ON: false
136
+ META_ARCHITECTURE: PersFormer
137
+ PANOPTIC_FPN:
138
+ COMBINE:
139
+ ENABLED: true
140
+ INSTANCES_CONFIDENCE_THRESH: 0.5
141
+ OVERLAP_THRESH: 0.5
142
+ STUFF_AREA_LIMIT: 4096
143
+ INSTANCE_LOSS_WEIGHT: 1.0
144
+ PARAM_DECODER:
145
+ DEBUG_LAT: false
146
+ DEBUG_UP: false
147
+ INPUT_SIZE: 64
148
+ LOSS_TYPE: regression
149
+ LOSS_WEIGHT: 1.0
150
+ NAME: ParamNet
151
+ PREDICT_PARAMS:
152
+ - roll
153
+ - pitch
154
+ - vfov
155
+ SYNTHETIC_PRETRAIN: false
156
+ PERSFORMER_HEADS:
157
+ NAME: StandardPersformerHeads
158
+ PIXEL_MEAN:
159
+ - 103.53
160
+ - 116.28
161
+ - 123.675
162
+ PIXEL_STD:
163
+ - 1.0
164
+ - 1.0
165
+ - 1.0
166
+ PROPOSAL_GENERATOR:
167
+ MIN_SIZE: 0
168
+ NAME: RPN
169
+ RECOVER_PP: false
170
+ RECOVER_RPF: true
171
+ RESNETS:
172
+ DEFORM_MODULATED: false
173
+ DEFORM_NUM_GROUPS: 1
174
+ DEFORM_ON_PER_STAGE:
175
+ - false
176
+ - false
177
+ - false
178
+ - false
179
+ DEPTH: 50
180
+ NORM: FrozenBN
181
+ NUM_GROUPS: 1
182
+ OUT_FEATURES:
183
+ - res4
184
+ RES2_OUT_CHANNELS: 256
185
+ RES5_DILATION: 1
186
+ STEM_OUT_CHANNELS: 64
187
+ STRIDE_IN_1X1: true
188
+ WIDTH_PER_GROUP: 64
189
+ RETINANET:
190
+ BBOX_REG_LOSS_TYPE: smooth_l1
191
+ BBOX_REG_WEIGHTS: &id002
192
+ - 1.0
193
+ - 1.0
194
+ - 1.0
195
+ - 1.0
196
+ FOCAL_LOSS_ALPHA: 0.25
197
+ FOCAL_LOSS_GAMMA: 2.0
198
+ IN_FEATURES:
199
+ - p3
200
+ - p4
201
+ - p5
202
+ - p6
203
+ - p7
204
+ IOU_LABELS:
205
+ - 0
206
+ - -1
207
+ - 1
208
+ IOU_THRESHOLDS:
209
+ - 0.4
210
+ - 0.5
211
+ NMS_THRESH_TEST: 0.5
212
+ NORM: ''
213
+ NUM_CLASSES: 80
214
+ NUM_CONVS: 4
215
+ PRIOR_PROB: 0.01
216
+ SCORE_THRESH_TEST: 0.05
217
+ SMOOTH_L1_LOSS_BETA: 0.1
218
+ TOPK_CANDIDATES_TEST: 1000
219
+ ROI_BOX_CASCADE_HEAD:
220
+ BBOX_REG_WEIGHTS:
221
+ - &id001
222
+ - 10.0
223
+ - 10.0
224
+ - 5.0
225
+ - 5.0
226
+ - - 20.0
227
+ - 20.0
228
+ - 10.0
229
+ - 10.0
230
+ - - 30.0
231
+ - 30.0
232
+ - 15.0
233
+ - 15.0
234
+ IOUS:
235
+ - 0.5
236
+ - 0.6
237
+ - 0.7
238
+ ROI_BOX_HEAD:
239
+ BBOX_REG_LOSS_TYPE: smooth_l1
240
+ BBOX_REG_LOSS_WEIGHT: 1.0
241
+ BBOX_REG_WEIGHTS: *id001
242
+ CLS_AGNOSTIC_BBOX_REG: false
243
+ CONV_DIM: 256
244
+ FC_DIM: 1024
245
+ FED_LOSS_FREQ_WEIGHT_POWER: 0.5
246
+ FED_LOSS_NUM_CLASSES: 50
247
+ NAME: ''
248
+ NORM: ''
249
+ NUM_CONV: 0
250
+ NUM_FC: 0
251
+ POOLER_RESOLUTION: 14
252
+ POOLER_SAMPLING_RATIO: 0
253
+ POOLER_TYPE: ROIAlignV2
254
+ SMOOTH_L1_BETA: 0.0
255
+ TRAIN_ON_PRED_BOXES: false
256
+ USE_FED_LOSS: false
257
+ USE_SIGMOID_CE: false
258
+ ROI_HEADS:
259
+ BATCH_SIZE_PER_IMAGE: 512
260
+ IN_FEATURES:
261
+ - res4
262
+ IOU_LABELS:
263
+ - 0
264
+ - 1
265
+ IOU_THRESHOLDS:
266
+ - 0.5
267
+ NAME: Res5ROIHeads
268
+ NMS_THRESH_TEST: 0.5
269
+ NUM_CLASSES: 80
270
+ POSITIVE_FRACTION: 0.25
271
+ PROPOSAL_APPEND_GT: true
272
+ SCORE_THRESH_TEST: 0.05
273
+ ROI_KEYPOINT_HEAD:
274
+ CONV_DIMS:
275
+ - 512
276
+ - 512
277
+ - 512
278
+ - 512
279
+ - 512
280
+ - 512
281
+ - 512
282
+ - 512
283
+ LOSS_WEIGHT: 1.0
284
+ MIN_KEYPOINTS_PER_IMAGE: 1
285
+ NAME: KRCNNConvDeconvUpsampleHead
286
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
287
+ NUM_KEYPOINTS: 17
288
+ POOLER_RESOLUTION: 14
289
+ POOLER_SAMPLING_RATIO: 0
290
+ POOLER_TYPE: ROIAlignV2
291
+ ROI_MASK_HEAD:
292
+ CLS_AGNOSTIC_MASK: false
293
+ CONV_DIM: 256
294
+ NAME: MaskRCNNConvUpsampleHead
295
+ NORM: ''
296
+ NUM_CONV: 0
297
+ POOLER_RESOLUTION: 14
298
+ POOLER_SAMPLING_RATIO: 0
299
+ POOLER_TYPE: ROIAlignV2
300
+ RPN:
301
+ BATCH_SIZE_PER_IMAGE: 256
302
+ BBOX_REG_LOSS_TYPE: smooth_l1
303
+ BBOX_REG_LOSS_WEIGHT: 1.0
304
+ BBOX_REG_WEIGHTS: *id002
305
+ BOUNDARY_THRESH: -1
306
+ CONV_DIMS:
307
+ - -1
308
+ HEAD_NAME: StandardRPNHead
309
+ IN_FEATURES:
310
+ - res4
311
+ IOU_LABELS:
312
+ - 0
313
+ - -1
314
+ - 1
315
+ IOU_THRESHOLDS:
316
+ - 0.3
317
+ - 0.7
318
+ LOSS_WEIGHT: 1.0
319
+ NMS_THRESH: 0.7
320
+ POSITIVE_FRACTION: 0.5
321
+ POST_NMS_TOPK_TEST: 1000
322
+ POST_NMS_TOPK_TRAIN: 2000
323
+ PRE_NMS_TOPK_TEST: 6000
324
+ PRE_NMS_TOPK_TRAIN: 12000
325
+ SMOOTH_L1_BETA: 0.0
326
+ SEM_SEG_HEAD:
327
+ COMMON_STRIDE: 4
328
+ CONVS_DIM: 128
329
+ IGNORE_VALUE: 255
330
+ IN_FEATURES:
331
+ - p2
332
+ - p3
333
+ - p4
334
+ - p5
335
+ LOSS_WEIGHT: 1.0
336
+ NAME: SemSegFPNHead
337
+ NORM: GN
338
+ NUM_CLASSES: 54
339
+ WEIGHTS: ./init_model_weights/cvpr2023.pth
340
+ OUTPUT_DIR: /home/msticha/exps/e01_edina
341
+ OVERFIT_ON: false
342
+ SEED: -1
343
+ SOLVER:
344
+ AMP:
345
+ ENABLED: false
346
+ BASE_LR: 0.01
347
+ BASE_LR_END: 0.0
348
+ BIAS_LR_FACTOR: 1.0
349
+ CHECKPOINT_PERIOD: 500
350
+ CLIP_GRADIENTS:
351
+ CLIP_TYPE: value
352
+ CLIP_VALUE: 1.0
353
+ ENABLED: false
354
+ NORM_TYPE: 2.0
355
+ GAMMA: 0.1
356
+ IMS_PER_BATCH: 32
357
+ LR_SCHEDULER_NAME: WarmupMultiStepLR
358
+ MAX_ITER: 90000
359
+ MOMENTUM: 0.9
360
+ NESTEROV: false
361
+ NUM_DECAYS: 3
362
+ REFERENCE_WORLD_SIZE: 0
363
+ RESCALE_INTERVAL: false
364
+ STEPS:
365
+ - 40000
366
+ - 60000
367
+ WARMUP_FACTOR: 0.001
368
+ WARMUP_ITERS: 1000
369
+ WARMUP_METHOD: linear
370
+ WEIGHT_DECAY: 0.0001
371
+ WEIGHT_DECAY_BIAS: null
372
+ WEIGHT_DECAY_NORM: 0.0
373
+ TEST:
374
+ AUG:
375
+ ENABLED: false
376
+ FLIP: true
377
+ MAX_SIZE: 4000
378
+ MIN_SIZES:
379
+ - 400
380
+ - 500
381
+ - 600
382
+ - 700
383
+ - 800
384
+ - 900
385
+ - 1000
386
+ - 1100
387
+ - 1200
388
+ DETECTIONS_PER_IMAGE: 100
389
+ EVAL_PERIOD: 500
390
+ EXPECTED_RESULTS: []
391
+ KEYPOINT_OKS_SIGMAS: []
392
+ PRECISE_BN:
393
+ ENABLED: false
394
+ NUM_ITER: 200
395
+ VERSION: 2
396
+ VIS_PERIOD: 500
models/paramnet_360cities_edina_rpfpp.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5220df6d9d89380d490bb9a3e4a162a8a5ca8eeb8610510173410076719fca67
3
+ size 837147876
models/paramnet_360cities_edina_rpfpp.yaml ADDED
@@ -0,0 +1,397 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDNN_BENCHMARK: false
2
+ DATALOADER:
3
+ ASPECT_RATIO_GROUPING: true
4
+ AUGMENTATION: true
5
+ AUGMENTATION_FUN: uniform_vfov_crop_resize
6
+ AUGMENTATION_TYPE: geometry
7
+ FILTER_EMPTY_ANNOTATIONS: true
8
+ NO_GEOMETRY_AUG: false
9
+ NUM_WORKERS: 8
10
+ REPEAT_THRESHOLD: 0.0
11
+ RESIZE:
12
+ - 320
13
+ - 320
14
+ SAMPLER_TRAIN: TrainingSampler
15
+ DATASETS:
16
+ PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
17
+ PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
18
+ PROPOSAL_FILES_TEST: []
19
+ PROPOSAL_FILES_TRAIN: []
20
+ TEST:
21
+ - edina_test_crop_uniform
22
+ TRAIN:
23
+ - edina_train
24
+ - cities360_train
25
+ DEBUG_ON: false
26
+ GLOBAL:
27
+ HACK: 1.0
28
+ INPUT:
29
+ CROP:
30
+ ENABLED: false
31
+ SIZE:
32
+ - 0.9
33
+ - 0.9
34
+ TYPE: relative_range
35
+ FORMAT: BGR
36
+ MASK_FORMAT: polygon
37
+ MAX_SIZE_TEST: 1333
38
+ MAX_SIZE_TRAIN: 1333
39
+ MIN_SIZE_TEST: 800
40
+ MIN_SIZE_TRAIN:
41
+ - 800
42
+ MIN_SIZE_TRAIN_SAMPLING: choice
43
+ ONLINE_CROP: false
44
+ RANDOM_FLIP: horizontal
45
+ MODEL:
46
+ ANCHOR_GENERATOR:
47
+ ANGLES:
48
+ - - -90
49
+ - 0
50
+ - 90
51
+ ASPECT_RATIOS:
52
+ - - 0.5
53
+ - 1.0
54
+ - 2.0
55
+ NAME: DefaultAnchorGenerator
56
+ OFFSET: 0.0
57
+ SIZES:
58
+ - - 32
59
+ - 64
60
+ - 128
61
+ - 256
62
+ - 512
63
+ BACKBONE:
64
+ FREEZE_AT: 2
65
+ NAME: build_mit_backbone
66
+ CENTER_ON: false
67
+ DEVICE: cuda
68
+ FPN:
69
+ FUSE_TYPE: sum
70
+ IN_FEATURES: []
71
+ NORM: ''
72
+ OUT_CHANNELS: 256
73
+ FPN_CENTER_HEAD:
74
+ COMMON_STRIDE: 4
75
+ CONVS_DIM: 128
76
+ IGNORE_VALUE: 360
77
+ IN_FEATURES:
78
+ - p2
79
+ - p3
80
+ - p4
81
+ - p5
82
+ LOSS_WEIGHT: 1.0
83
+ NAME: CenterFPNHead
84
+ NORM: GN
85
+ NUM_CLASSES: 30
86
+ FPN_GRAVITY_HEAD:
87
+ COMMON_STRIDE: 4
88
+ CONVS_DIM: 128
89
+ IGNORE_VALUE: 360
90
+ IN_FEATURES:
91
+ - p2
92
+ - p3
93
+ - p4
94
+ - p5
95
+ LOSS_WEIGHT: 1.0
96
+ NAME: GravityFPNHead
97
+ NORM: GN
98
+ NUM_CLASSES: 361
99
+ FPN_HEADS:
100
+ NAME: StandardFPNHeads
101
+ FPN_LATITUDE_HEAD:
102
+ COMMON_STRIDE: 4
103
+ CONVS_DIM: 128
104
+ IGNORE_VALUE: -1
105
+ IN_FEATURES:
106
+ - p2
107
+ - p3
108
+ - p4
109
+ - p5
110
+ LOSS_WEIGHT: 1.0
111
+ NAME: LatitudeFPNHead
112
+ NORM: GN
113
+ NUM_CLASSES: 9
114
+ FREEZE: []
115
+ GRAVITY_DECODER:
116
+ IGNORE_VALUE: 72
117
+ LOSS_TYPE: regression
118
+ LOSS_WEIGHT: 1.0
119
+ NAME: GravityDecoder
120
+ NUM_CLASSES: 73
121
+ GRAVITY_ON: true
122
+ HEIGHT_DECODER:
123
+ LOSS_WEIGHT: 1.0
124
+ NAME: HeightDecoder
125
+ HEIGHT_ON: false
126
+ KEYPOINT_ON: false
127
+ LATITUDE_DECODER:
128
+ IGNORE_VALUE: -1
129
+ LOSS_TYPE: regression
130
+ LOSS_WEIGHT: 1.0
131
+ NAME: LatitudeDecoder
132
+ NUM_CLASSES: 1
133
+ LATITUDE_ON: true
134
+ LOAD_PROPOSALS: false
135
+ MASK_ON: false
136
+ META_ARCHITECTURE: PersFormer
137
+ PANOPTIC_FPN:
138
+ COMBINE:
139
+ ENABLED: true
140
+ INSTANCES_CONFIDENCE_THRESH: 0.5
141
+ OVERLAP_THRESH: 0.5
142
+ STUFF_AREA_LIMIT: 4096
143
+ INSTANCE_LOSS_WEIGHT: 1.0
144
+ PARAM_DECODER:
145
+ DEBUG_LAT: false
146
+ DEBUG_UP: false
147
+ INPUT_SIZE: 64
148
+ LOSS_TYPE: regression
149
+ LOSS_WEIGHT: 1.0
150
+ NAME: ParamNetConvNextRegress
151
+ PREDICT_PARAMS:
152
+ - roll
153
+ - pitch
154
+ - general_vfov
155
+ - rel_cx
156
+ - rel_cy
157
+ SYNTHETIC_PRETRAIN: false
158
+ PERSFORMER_HEADS:
159
+ NAME: StandardPersformerHeads
160
+ PIXEL_MEAN:
161
+ - 103.53
162
+ - 116.28
163
+ - 123.675
164
+ PIXEL_STD:
165
+ - 1.0
166
+ - 1.0
167
+ - 1.0
168
+ PROPOSAL_GENERATOR:
169
+ MIN_SIZE: 0
170
+ NAME: RPN
171
+ RECOVER_PP: true
172
+ RECOVER_RPF: true
173
+ RESNETS:
174
+ DEFORM_MODULATED: false
175
+ DEFORM_NUM_GROUPS: 1
176
+ DEFORM_ON_PER_STAGE:
177
+ - false
178
+ - false
179
+ - false
180
+ - false
181
+ DEPTH: 50
182
+ NORM: FrozenBN
183
+ NUM_GROUPS: 1
184
+ OUT_FEATURES:
185
+ - res4
186
+ RES2_OUT_CHANNELS: 256
187
+ RES5_DILATION: 1
188
+ STEM_OUT_CHANNELS: 64
189
+ STRIDE_IN_1X1: true
190
+ WIDTH_PER_GROUP: 64
191
+ RETINANET:
192
+ BBOX_REG_LOSS_TYPE: smooth_l1
193
+ BBOX_REG_WEIGHTS: &id002
194
+ - 1.0
195
+ - 1.0
196
+ - 1.0
197
+ - 1.0
198
+ FOCAL_LOSS_ALPHA: 0.25
199
+ FOCAL_LOSS_GAMMA: 2.0
200
+ IN_FEATURES:
201
+ - p3
202
+ - p4
203
+ - p5
204
+ - p6
205
+ - p7
206
+ IOU_LABELS:
207
+ - 0
208
+ - -1
209
+ - 1
210
+ IOU_THRESHOLDS:
211
+ - 0.4
212
+ - 0.5
213
+ NMS_THRESH_TEST: 0.5
214
+ NORM: ''
215
+ NUM_CLASSES: 80
216
+ NUM_CONVS: 4
217
+ PRIOR_PROB: 0.01
218
+ SCORE_THRESH_TEST: 0.05
219
+ SMOOTH_L1_LOSS_BETA: 0.1
220
+ TOPK_CANDIDATES_TEST: 1000
221
+ ROI_BOX_CASCADE_HEAD:
222
+ BBOX_REG_WEIGHTS:
223
+ - &id001
224
+ - 10.0
225
+ - 10.0
226
+ - 5.0
227
+ - 5.0
228
+ - - 20.0
229
+ - 20.0
230
+ - 10.0
231
+ - 10.0
232
+ - - 30.0
233
+ - 30.0
234
+ - 15.0
235
+ - 15.0
236
+ IOUS:
237
+ - 0.5
238
+ - 0.6
239
+ - 0.7
240
+ ROI_BOX_HEAD:
241
+ BBOX_REG_LOSS_TYPE: smooth_l1
242
+ BBOX_REG_LOSS_WEIGHT: 1.0
243
+ BBOX_REG_WEIGHTS: *id001
244
+ CLS_AGNOSTIC_BBOX_REG: false
245
+ CONV_DIM: 256
246
+ FC_DIM: 1024
247
+ FED_LOSS_FREQ_WEIGHT_POWER: 0.5
248
+ FED_LOSS_NUM_CLASSES: 50
249
+ NAME: ''
250
+ NORM: ''
251
+ NUM_CONV: 0
252
+ NUM_FC: 0
253
+ POOLER_RESOLUTION: 14
254
+ POOLER_SAMPLING_RATIO: 0
255
+ POOLER_TYPE: ROIAlignV2
256
+ SMOOTH_L1_BETA: 0.0
257
+ TRAIN_ON_PRED_BOXES: false
258
+ USE_FED_LOSS: false
259
+ USE_SIGMOID_CE: false
260
+ ROI_HEADS:
261
+ BATCH_SIZE_PER_IMAGE: 512
262
+ IN_FEATURES:
263
+ - res4
264
+ IOU_LABELS:
265
+ - 0
266
+ - 1
267
+ IOU_THRESHOLDS:
268
+ - 0.5
269
+ NAME: Res5ROIHeads
270
+ NMS_THRESH_TEST: 0.5
271
+ NUM_CLASSES: 80
272
+ POSITIVE_FRACTION: 0.25
273
+ PROPOSAL_APPEND_GT: true
274
+ SCORE_THRESH_TEST: 0.05
275
+ ROI_KEYPOINT_HEAD:
276
+ CONV_DIMS:
277
+ - 512
278
+ - 512
279
+ - 512
280
+ - 512
281
+ - 512
282
+ - 512
283
+ - 512
284
+ - 512
285
+ LOSS_WEIGHT: 1.0
286
+ MIN_KEYPOINTS_PER_IMAGE: 1
287
+ NAME: KRCNNConvDeconvUpsampleHead
288
+ NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
289
+ NUM_KEYPOINTS: 17
290
+ POOLER_RESOLUTION: 14
291
+ POOLER_SAMPLING_RATIO: 0
292
+ POOLER_TYPE: ROIAlignV2
293
+ ROI_MASK_HEAD:
294
+ CLS_AGNOSTIC_MASK: false
295
+ CONV_DIM: 256
296
+ NAME: MaskRCNNConvUpsampleHead
297
+ NORM: ''
298
+ NUM_CONV: 0
299
+ POOLER_RESOLUTION: 14
300
+ POOLER_SAMPLING_RATIO: 0
301
+ POOLER_TYPE: ROIAlignV2
302
+ RPN:
303
+ BATCH_SIZE_PER_IMAGE: 256
304
+ BBOX_REG_LOSS_TYPE: smooth_l1
305
+ BBOX_REG_LOSS_WEIGHT: 1.0
306
+ BBOX_REG_WEIGHTS: *id002
307
+ BOUNDARY_THRESH: -1
308
+ CONV_DIMS:
309
+ - -1
310
+ HEAD_NAME: StandardRPNHead
311
+ IN_FEATURES:
312
+ - res4
313
+ IOU_LABELS:
314
+ - 0
315
+ - -1
316
+ - 1
317
+ IOU_THRESHOLDS:
318
+ - 0.3
319
+ - 0.7
320
+ LOSS_WEIGHT: 1.0
321
+ NMS_THRESH: 0.7
322
+ POSITIVE_FRACTION: 0.5
323
+ POST_NMS_TOPK_TEST: 1000
324
+ POST_NMS_TOPK_TRAIN: 2000
325
+ PRE_NMS_TOPK_TEST: 6000
326
+ PRE_NMS_TOPK_TRAIN: 12000
327
+ SMOOTH_L1_BETA: 0.0
328
+ SEM_SEG_HEAD:
329
+ COMMON_STRIDE: 4
330
+ CONVS_DIM: 128
331
+ IGNORE_VALUE: 255
332
+ IN_FEATURES:
333
+ - p2
334
+ - p3
335
+ - p4
336
+ - p5
337
+ LOSS_WEIGHT: 1.0
338
+ NAME: SemSegFPNHead
339
+ NORM: GN
340
+ NUM_CLASSES: 54
341
+ WEIGHTS: ./init_model_weights/cvpr2023.pth
342
+ OUTPUT_DIR: /home/msticha/exps/e01_edina_pp
343
+ OVERFIT_ON: false
344
+ SEED: -1
345
+ SOLVER:
346
+ AMP:
347
+ ENABLED: false
348
+ BASE_LR: 0.005
349
+ BASE_LR_END: 0.0
350
+ BIAS_LR_FACTOR: 1.0
351
+ CHECKPOINT_PERIOD: 500
352
+ CLIP_GRADIENTS:
353
+ CLIP_TYPE: value
354
+ CLIP_VALUE: 1.0
355
+ ENABLED: false
356
+ NORM_TYPE: 2.0
357
+ GAMMA: 0.1
358
+ IMS_PER_BATCH: 32
359
+ LR_SCHEDULER_NAME: WarmupCosineLR
360
+ MAX_ITER: 90000
361
+ MOMENTUM: 0.9
362
+ NESTEROV: false
363
+ NUM_DECAYS: 3
364
+ REFERENCE_WORLD_SIZE: 0
365
+ RESCALE_INTERVAL: false
366
+ STEPS:
367
+ - 30000
368
+ WARMUP_FACTOR: 0.001
369
+ WARMUP_ITERS: 1000
370
+ WARMUP_METHOD: linear
371
+ WEIGHT_DECAY: 0.0001
372
+ WEIGHT_DECAY_BIAS: null
373
+ WEIGHT_DECAY_NORM: 0.0
374
+ TEST:
375
+ AUG:
376
+ ENABLED: false
377
+ FLIP: true
378
+ MAX_SIZE: 4000
379
+ MIN_SIZES:
380
+ - 400
381
+ - 500
382
+ - 600
383
+ - 700
384
+ - 800
385
+ - 900
386
+ - 1000
387
+ - 1100
388
+ - 1200
389
+ DETECTIONS_PER_IMAGE: 100
390
+ EVAL_PERIOD: 500
391
+ EXPECTED_RESULTS: []
392
+ KEYPOINT_OKS_SIGMAS: []
393
+ PRECISE_BN:
394
+ ENABLED: false
395
+ NUM_ITER: 200
396
+ VERSION: 2
397
+ VIS_PERIOD: 500
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
- torch==1.10.0+cu113
3
- torchvision==0.11.1+cu113
 
1
  --extra-index-url https://download.pytorch.org/whl/cu113
2
+ torch==1.11.0+cu113
3
+ torchvision==0.12.0+cu113