Spaces:
Sleeping
Sleeping
Dit-document-layout-analysis
/
unilm
/layoutlmv3
/examples
/object_detection
/cascade_layoutlmv3.yaml
MODEL: | |
MASK_ON: True | |
IMAGE_ONLY: True | |
META_ARCHITECTURE: "VLGeneralizedRCNN" | |
PIXEL_MEAN: [ 127.5, 127.5, 127.5 ] | |
PIXEL_STD: [ 127.5, 127.5, 127.5 ] | |
WEIGHTS: "/path/to/models/layoutlmv3/pts/layoutlmv3-base/pytorch_model.bin" | |
BACKBONE: | |
NAME: "build_vit_fpn_backbone" | |
VIT: | |
NAME: "layoutlmv3_base" | |
OUT_FEATURES: [ "layer3", "layer5", "layer7", "layer11" ] | |
DROP_PATH: 0.1 | |
IMG_SIZE: [ 224,224 ] | |
POS_TYPE: "abs" | |
ROI_HEADS: | |
NAME: CascadeROIHeads | |
IN_FEATURES: [ "p2", "p3", "p4", "p5" ] | |
NUM_CLASSES: 5 | |
ROI_BOX_HEAD: | |
CLS_AGNOSTIC_BBOX_REG: True | |
NAME: "FastRCNNConvFCHead" | |
NUM_FC: 2 | |
POOLER_RESOLUTION: 7 | |
ROI_MASK_HEAD: | |
NAME: "MaskRCNNConvUpsampleHead" | |
NUM_CONV: 4 | |
POOLER_RESOLUTION: 14 | |
FPN: | |
IN_FEATURES: [ "layer3", "layer5", "layer7", "layer11" ] | |
ANCHOR_GENERATOR: | |
SIZES: [ [ 32 ], [ 64 ], [ 128 ], [ 256 ], [ 512 ] ] # One size for each in feature map | |
ASPECT_RATIOS: [ [ 0.5, 1.0, 2.0 ] ] # Three aspect ratios (same for all in feature maps) | |
RPN: | |
IN_FEATURES: [ "p2", "p3", "p4", "p5", "p6" ] | |
PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level | |
PRE_NMS_TOPK_TEST: 1000 # Per FPN level | |
# Detectron1 uses 2000 proposals per-batch, | |
# (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) | |
# which is approximately 1000 proposals per-image since the default batch size for FPN is 2. | |
POST_NMS_TOPK_TRAIN: 2000 | |
POST_NMS_TOPK_TEST: 1000 | |
DATASETS: | |
TRAIN: ("publaynet_train",) | |
TEST: ("publaynet_val",) | |
SOLVER: | |
GRADIENT_ACCUMULATION_STEPS: 1 | |
BASE_LR: 0.0002 | |
WARMUP_ITERS: 1000 | |
IMS_PER_BATCH: 32 | |
MAX_ITER: 60000 | |
CHECKPOINT_PERIOD: 2000 | |
LR_SCHEDULER_NAME: "WarmupCosineLR" | |
AMP: | |
ENABLED: True | |
OPTIMIZER: "ADAMW" | |
BACKBONE_MULTIPLIER: 1.0 | |
CLIP_GRADIENTS: | |
ENABLED: True | |
CLIP_TYPE: "full_model" | |
CLIP_VALUE: 1.0 | |
NORM_TYPE: 2.0 | |
WARMUP_FACTOR: 0.01 | |
WEIGHT_DECAY: 0.05 | |
TEST: | |
EVAL_PERIOD: 2000 | |
INPUT: | |
CROP: | |
ENABLED: True | |
TYPE: "absolute_range" | |
SIZE: (384, 600) | |
MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) | |
FORMAT: "RGB" | |
DATALOADER: | |
FILTER_EMPTY_ANNOTATIONS: False | |
VERSION: 2 | |
AUG: | |
DETR: True | |
SEED: 42 | |
OUTPUT_DIR: "/path/to/models/layoutlmv3/fts/publaynet-base/" | |
PUBLAYNET_DATA_DIR_TRAIN: "/path/to/data/PubLayNet/publaynet/train" | |
PUBLAYNET_DATA_DIR_TEST: "/path/to/data/PubLayNet/publaynet/val" | |
CACHE_DIR: "/path/to/cache/huggingface" | |