Created
January 22, 2021 01:13
-
-
Save ppwwyyxx/8f9c4d9b1a1f60f0563a9df924f561b0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from detectron2.layers import ShapeSpec | |
from detectron2.modeling import FPN, GeneralizedRCNN, ResNet, StandardROIHeads | |
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator | |
from detectron2.modeling.backbone.fpn import LastLevelMaxPool | |
from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock | |
from detectron2.modeling.box_regression import Box2BoxTransform | |
from detectron2.modeling.matcher import Matcher | |
from detectron2.modeling.poolers import ROIPooler | |
from detectron2.modeling.proposal_generator.rpn import RPN, StandardRPNHead | |
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, MaskRCNNConvUpsampleHead | |
from detectron2.modeling.roi_heads.box_head import FastRCNNConvFCHead | |
from newconfig import LazyCall as L | |
model = L(GeneralizedRCNN)( | |
backbone=L(FPN)( | |
bottom_up=L(ResNet)( | |
stem=L(BasicStem)(in_channels=3, out_channels=64), | |
# can create some specializations such as "ResNet50" to avoid writing this | |
stages=[ | |
L(ResNet.make_stage)( | |
block_class=BottleneckBlock, | |
num_blocks=n, | |
stride_per_block=[s] + [1] * (n - 1), | |
in_channels=i, | |
bottleneck_channels=o // 4, | |
out_channels=o, | |
stride_in_1x1=True, | |
) | |
for (n, s, i, o) in zip( | |
[3, 4, 6, 3], [1, 2, 2, 2], [64, 256, 512, 1024], [256, 512, 1024, 2048] | |
) | |
], | |
out_features=["res2", "res3", "res4", "res5"], | |
# freeze_at=2, | |
), | |
in_features=["res2", "res3", "res4", "res5"], | |
out_channels=256, | |
top_block=L(LastLevelMaxPool)(), | |
), | |
proposal_generator=L(RPN)( | |
in_features=["p2", "p3", "p4", "p5", "p6"], | |
head=L(StandardRPNHead)(in_channels=256, num_anchors=3), | |
anchor_generator=L(DefaultAnchorGenerator)( | |
sizes=[[32], [64], [128], [256], [512]], | |
aspect_ratios=[0.5, 1.0, 2.0], | |
strides=[4, 8, 16, 32, 64], | |
offset=0.0, | |
), | |
anchor_matcher=L(Matcher)( | |
thresholds=[0.3, 0.7], labels=[0, -1, 1], allow_low_quality_matches=True | |
), | |
box2box_transform=L(Box2BoxTransform)(weights=[1.0, 1.0, 1.0, 1.0]), | |
batch_size_per_image=256, | |
positive_fraction=0.5, | |
pre_nms_topk=(2000, 1000), | |
post_nms_topk=(1000, 1000), | |
nms_thresh=0.7, | |
), | |
roi_heads=L(StandardROIHeads)( | |
num_classes=80, | |
batch_size_per_image=512, | |
positive_fraction=0.25, | |
proposal_matcher=L(Matcher)( | |
thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False | |
), | |
box_in_features=["p2", "p3", "p4", "p5"], | |
box_pooler=L(ROIPooler)( | |
output_size=7, | |
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), | |
sampling_ratio=0, | |
pooler_type="ROIAlignV2", | |
), | |
box_head=L(FastRCNNConvFCHead)( | |
input_shape=ShapeSpec(channels=256, height=7, width=7), | |
conv_dims=[], | |
fc_dims=[1024, 1024], | |
), | |
box_predictor=L(FastRCNNOutputLayers)( | |
input_shape=ShapeSpec(channels=1024), | |
test_score_thresh=0.05, | |
box2box_transform=L(Box2BoxTransform)(weights=(10, 10, 5, 5)), | |
# NOTE: interpolation supported by OmegaConf | |
num_classes="${..num_classes}", | |
), | |
mask_in_features=["p2", "p3", "p4", "p5"], | |
mask_pooler=L(ROIPooler)( | |
output_size=14, | |
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), | |
sampling_ratio=0, | |
pooler_type="ROIAlignV2", | |
), | |
mask_head=L(MaskRCNNConvUpsampleHead)( | |
input_shape=ShapeSpec(channels=256, width=14, height=14), | |
num_classes="${..num_classes}", | |
conv_dims=[256, 256, 256, 256, 256], | |
), | |
), | |
pixel_mean=[103.530, 116.280, 123.675], | |
pixel_std=[1.0, 1.0, 1.0], | |
input_format="BGR", | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from detectron2.layers import ShapeSpec | |
from detectron2.modeling import FPN, GeneralizedRCNN, ResNet, StandardROIHeads | |
from detectron2.modeling.anchor_generator import DefaultAnchorGenerator | |
from detectron2.modeling.backbone.fpn import LastLevelMaxPool | |
from detectron2.modeling.backbone.resnet import BasicStem, BottleneckBlock | |
from detectron2.modeling.box_regression import Box2BoxTransform | |
from detectron2.modeling.matcher import Matcher | |
from detectron2.modeling.poolers import ROIPooler | |
from detectron2.modeling.proposal_generator.rpn import RPN, StandardRPNHead | |
from detectron2.modeling.roi_heads import FastRCNNOutputLayers, MaskRCNNConvUpsampleHead | |
from detectron2.modeling.roi_heads.box_head import FastRCNNConvFCHead | |
model = dict( | |
_target_=GeneralizedRCNN, | |
backbone=dict( | |
_target_=FPN, | |
bottom_up=dict( | |
_target_=ResNet, | |
stem=dict(_target_=BasicStem, in_channels=3, out_channels=64), | |
# can create some specializations such as "ResNet50" to avoid writing this | |
stages=[ | |
dict( | |
_target_=ResNet.make_stage, | |
block_class=BottleneckBlock, | |
num_blocks=n, | |
stride_per_block=[s] + [1] * (n - 1), | |
in_channels=i, | |
bottleneck_channels=o // 4, | |
out_channels=o, | |
stride_in_1x1=True, | |
) | |
for (n, s, i, o) in zip( | |
[3, 4, 6, 3], [1, 2, 2, 2], [64, 256, 512, 1024], [256, 512, 1024, 2048] | |
) | |
], | |
out_features=["res2", "res3", "res4", "res5"], | |
# freeze_at=2, | |
), | |
in_features=["res2", "res3", "res4", "res5"], | |
out_channels=256, | |
top_block=dict(_target_=LastLevelMaxPool), | |
), | |
proposal_generator=dict( | |
_target_=RPN, | |
in_features=["p2", "p3", "p4", "p5", "p6"], | |
head=dict(_target_=StandardRPNHead, in_channels=256, num_anchors=3), | |
anchor_generator=dict( | |
_target_=DefaultAnchorGenerator, | |
sizes=[[32], [64], [128], [256], [512]], | |
aspect_ratios=[0.5, 1.0, 2.0], | |
strides=[4, 8, 16, 32, 64], | |
offset=0.0, | |
), | |
anchor_matcher=dict( | |
_target_=Matcher, | |
thresholds=[0.3, 0.7], | |
labels=[0, -1, 1], | |
allow_low_quality_matches=True, | |
), | |
box2box_transform=dict(_target_=Box2BoxTransform, weights=[1.0, 1.0, 1.0, 1.0]), | |
batch_size_per_image=256, | |
positive_fraction=0.5, | |
pre_nms_topk=(2000, 1000), | |
post_nms_topk=(1000, 1000), | |
nms_thresh=0.7, | |
), | |
roi_heads=dict( | |
_target_=StandardROIHeads, | |
num_classes=80, | |
batch_size_per_image=512, | |
positive_fraction=0.25, | |
proposal_matcher=dict( | |
_target_=Matcher, thresholds=[0.5], labels=[0, 1], allow_low_quality_matches=False | |
), | |
box_in_features=["p2", "p3", "p4", "p5"], | |
box_pooler=dict( | |
_target_=ROIPooler, | |
output_size=7, | |
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), | |
sampling_ratio=0, | |
pooler_type="ROIAlignV2", | |
), | |
box_head=dict( | |
_target_=FastRCNNConvFCHead, | |
input_shape=ShapeSpec(channels=256, height=7, width=7), | |
conv_dims=[], | |
fc_dims=[1024, 1024], | |
), | |
box_predictor=dict( | |
_target_=FastRCNNOutputLayers, | |
input_shape=ShapeSpec(channels=1024), | |
test_score_thresh=0.05, | |
box2box_transform=dict(_target_=Box2BoxTransform, weights=(10, 10, 5, 5)), | |
# NOTE: interpolation supported by OmegaConf | |
num_classes="${..num_classes}", | |
), | |
mask_in_features=["p2", "p3", "p4", "p5"], | |
mask_pooler=dict( | |
_target_=ROIPooler, | |
output_size=14, | |
scales=(1.0 / 4, 1.0 / 8, 1.0 / 16, 1.0 / 32), | |
sampling_ratio=0, | |
pooler_type="ROIAlignV2", | |
), | |
mask_head=dict( | |
_target_=MaskRCNNConvUpsampleHead, | |
input_shape=ShapeSpec(channels=256, width=14, height=14), | |
num_classes="${..num_classes}", | |
conv_dims=[256, 256, 256, 256, 256], | |
), | |
), | |
pixel_mean=[103.530, 116.280, 123.675], | |
pixel_std=[1.0, 1.0, 1.0], | |
input_format="BGR", | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment