Tzktz's picture
Upload 7664 files
6fc683c verified
MODEL:
MASK_ON: True
IMAGE_ONLY: True
META_ARCHITECTURE: "VLGeneralizedRCNN"
PIXEL_MEAN: [ 127.5, 127.5, 127.5 ]
PIXEL_STD: [ 127.5, 127.5, 127.5 ]
WEIGHTS: "/path/to/models/layoutlmv3/pts/layoutlmv3-base/pytorch_model.bin"
BACKBONE:
NAME: "build_vit_fpn_backbone"
VIT:
NAME: "layoutlmv3_base"
OUT_FEATURES: [ "layer3", "layer5", "layer7", "layer11" ]
DROP_PATH: 0.1
IMG_SIZE: [ 224,224 ]
POS_TYPE: "abs"
ROI_HEADS:
NAME: CascadeROIHeads
IN_FEATURES: [ "p2", "p3", "p4", "p5" ]
NUM_CLASSES: 5
ROI_BOX_HEAD:
CLS_AGNOSTIC_BBOX_REG: True
NAME: "FastRCNNConvFCHead"
NUM_FC: 2
POOLER_RESOLUTION: 7
ROI_MASK_HEAD:
NAME: "MaskRCNNConvUpsampleHead"
NUM_CONV: 4
POOLER_RESOLUTION: 14
FPN:
IN_FEATURES: [ "layer3", "layer5", "layer7", "layer11" ]
ANCHOR_GENERATOR:
SIZES: [ [ 32 ], [ 64 ], [ 128 ], [ 256 ], [ 512 ] ] # One size for each in feature map
ASPECT_RATIOS: [ [ 0.5, 1.0, 2.0 ] ] # Three aspect ratios (same for all in feature maps)
RPN:
IN_FEATURES: [ "p2", "p3", "p4", "p5", "p6" ]
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level
PRE_NMS_TOPK_TEST: 1000 # Per FPN level
# Detectron1 uses 2000 proposals per-batch,
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
POST_NMS_TOPK_TRAIN: 2000
POST_NMS_TOPK_TEST: 1000
DATASETS:
TRAIN: ("publaynet_train",)
TEST: ("publaynet_val",)
SOLVER:
GRADIENT_ACCUMULATION_STEPS: 1
BASE_LR: 0.0002
WARMUP_ITERS: 1000
IMS_PER_BATCH: 32
MAX_ITER: 60000
CHECKPOINT_PERIOD: 2000
LR_SCHEDULER_NAME: "WarmupCosineLR"
AMP:
ENABLED: True
OPTIMIZER: "ADAMW"
BACKBONE_MULTIPLIER: 1.0
CLIP_GRADIENTS:
ENABLED: True
CLIP_TYPE: "full_model"
CLIP_VALUE: 1.0
NORM_TYPE: 2.0
WARMUP_FACTOR: 0.01
WEIGHT_DECAY: 0.05
TEST:
EVAL_PERIOD: 2000
INPUT:
CROP:
ENABLED: True
TYPE: "absolute_range"
SIZE: (384, 600)
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
FORMAT: "RGB"
DATALOADER:
FILTER_EMPTY_ANNOTATIONS: False
VERSION: 2
AUG:
DETR: True
SEED: 42
OUTPUT_DIR: "/path/to/models/layoutlmv3/fts/publaynet-base/"
PUBLAYNET_DATA_DIR_TRAIN: "/path/to/data/PubLayNet/publaynet/train"
PUBLAYNET_DATA_DIR_TEST: "/path/to/data/PubLayNet/publaynet/val"
CACHE_DIR: "/path/to/cache/huggingface"