_base_ = ['./detr4seg_r50.py', '../datasets/psg.py', '../custom_runtime.py'] custom_imports = dict(imports=[ 'openpsg.models.frameworks.detr4seg', 'openpsg.models.relation_heads.detr4seg_head', 'openpsg.datasets', 'openpsg.datasets.pipelines.loading', 'openpsg.datasets.pipelines.rel_randomcrop', 'openpsg.models.relation_heads.approaches.matcher', 'openpsg.models.losses.seg_losses' ], allow_failed_imports=False) object_classes = [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'banner', 'blanket', 'bridge', 'cardboard', 'counter', 'curtain', 'door-stuff', 'floor-wood', 'flower', 'fruit', 'gravel', 'house', 'light', 'mirror-stuff', 'net', 'pillow', 'platform', 'playingfield', 'railroad', 'river', 'road', 'roof', 'sand', 'sea', 'shelf', 'snow', 'stairs', 'tent', 'towel', 'wall-brick', 'wall-stone', 'wall-tile', 'wall-wood', 'water-other', 'window-blind', 'window-other', 'tree-merged', 'fence-merged', 'ceiling-merged', 'sky-other-merged', 'cabinet-merged', 'table-merged', 'floor-other-merged', 'pavement-merged', 'mountain-merged', 'grass-merged', 'dirt-merged', 'paper-merged', 'food-other-merged', 'building-other-merged', 'rock-merged', 'wall-other-merged', 'rug-merged' ] model = dict(bbox_head=dict( num_classes=len(object_classes), object_classes=object_classes, )) img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) # train_pipeline, NOTE the img_scale and the Pad's size_divisor is different # from the default setting in mmdet. train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadPanopticSceneGraphAnnotations', with_bbox=True, with_mask=True, with_seg=True), dict(type='RandomFlip', flip_ratio=0.5), dict( type='AutoAugment', policies=[ [ dict(type='Resize', img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), (608, 1333), (640, 1333), (672, 1333), (704, 1333), (736, 1333), (768, 1333), (800, 1333)], multiscale_mode='value', keep_ratio=True) ], [ dict(type='Resize', img_scale=[(400, 1333), (500, 1333), (600, 1333)], multiscale_mode='value', keep_ratio=True), dict(type='RandomCrop', crop_type='absolute_range', crop_size=(384, 600), allow_negative_crop=False), # no empty relations dict(type='Resize', img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333), (608, 1333), (640, 1333), (672, 1333), (704, 1333), (736, 1333), (768, 1333), (800, 1333)], multiscale_mode='value', override=True, keep_ratio=True) ] ]), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=1), dict(type='RelsFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']) ] # test_pipeline, NOTE the Pad's size_divisor is different from the default # setting (size_divisor=32). While there is little effect on the performance # whether we use the default setting or use size_divisor=1. test_pipeline = [ dict(type='LoadImageFromFile'), dict(type='MultiScaleFlipAug', img_scale=(1333, 800), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='RandomFlip'), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=1), dict(type='ImageToTensor', keys=['img']), dict(type='Collect', keys=['img']) ]) ] data = dict(samples_per_gpu=1, workers_per_gpu=1, train=dict(pipeline=train_pipeline), val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline)) # optimizer optimizer = dict(type='AdamW', lr=0.00001, weight_decay=0.0001, paramwise_cfg=dict( custom_keys={ 'backbone': dict(lr_mult=0.1, decay_mult=1.0), 'bbox_attention': dict(lr_mult=10.0, decay_mult=1.0), 'mask_head': dict(lr_mult=10.0, decay_mult=1.0) })) optimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2)) # learning policy lr_config = dict(policy='step', step=8) runner = dict(type='EpochBasedRunner', max_epochs=10) evaluation = dict(interval=1, metric='PQ') checkpoint_config = dict(interval=1, max_keep_ckpts=10) project_name = 'detr4seg' expt_name = 'test_detr4seg_r50_psg' work_dir = f'./work_dirs/{expt_name}' log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), dict(type='TensorboardLoggerHook'), dict( type='WandbLoggerHook', init_kwargs=dict( project=project_name, name=expt_name, # config=work_dir + "/cfg.yaml" )) ], ) load_from = 'detr_pan_r50.pth'