Skip to content

Commit

Permalink
add Faster rcnn c5 (open-mmlab#4043)
Browse files Browse the repository at this point in the history
* add faster rcnn C5

* add trained faster rcnn C5 model url

* fix typos

* update based on 1-st comments

* update unittest for ChannelMapper

* small changes of ChannelMapper's docstring

* small changes of ChannelMapper's docstring
  • Loading branch information
GT9505 authored Nov 5, 2020
1 parent af7deff commit d3cf38d
Show file tree
Hide file tree
Showing 8 changed files with 308 additions and 3 deletions.
107 changes: 107 additions & 0 deletions configs/_base_/models/faster_rcnn_r50_caffe_dc5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained='open-mmlab://detectron2/resnet50_caffe',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
strides=(1, 2, 2, 1),
dilations=(1, 1, 1, 2),
out_indices=(3, ),
frozen_stages=1,
norm_cfg=norm_cfg,
norm_eval=True,
style='caffe'),
rpn_head=dict(
type='RPNHead',
in_channels=2048,
feat_channels=2048,
anchor_generator=dict(
type='AnchorGenerator',
scales=[2, 4, 8, 16, 32],
ratios=[0.5, 1.0, 2.0],
strides=[16]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
out_channels=2048,
featmap_strides=[16]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=2048,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=80,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=12000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=6000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100))
3 changes: 3 additions & 0 deletions configs/faster_rcnn/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Download |
| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: |
| R-50-DC5 | caffe | 1x | - | - | 37.2 | [model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909-531f0f43.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909.log.json) |
| R-50-FPN | caffe | 1x | 3.8 | | 37.8 | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_20200504_180032.log.json) |
| R-50-FPN | pytorch | 1x | 4.0 | 21.4 | 37.4 | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130_204655.log.json) |
| R-50-FPN | pytorch | 2x | - | - | 38.4 | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_20200504_210434.log.json) |
Expand All @@ -42,5 +43,7 @@ We also train some models with longer schedules and multi-scale training. The us

| Backbone | Style | Lr schd | Mem (GB) | Inf time (fps) | box AP | Download |
| :-------------: | :-----: | :-----: | :------: | :------------: | :----: | :------: |
| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py) | caffe | 1x | - | | 37.4 |[model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851-b33d21b9.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851.log.json)
| [R-50-DC5](./faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py) | caffe | 3x | - | | 38.7 |[model](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107-34a53b2c.pth) | [log](https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107.log.json)
| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py) | caffe | 2x | 4.3 | | 39.7 |[model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_bbox_mAP-0.397_20200504_231813-10b2de58.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_20200504_231813.log.json)
| [R-50-FPN](./faster_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py) | caffe | 3x | 4.3 | | 40.2 | [model](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth) | [log](http://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20200504_163323.log.json)
37 changes: 37 additions & 0 deletions configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
_base_ = [
'../_base_/models/faster_rcnn_r50_caffe_dc5.py',
'../_base_/datasets/coco_detection.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
# use caffe img_norm
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
42 changes: 42 additions & 0 deletions configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
_base_ = [
'../_base_/models/faster_rcnn_r50_caffe_dc5.py',
'../_base_/datasets/coco_detection.py',
'../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
]
# use caffe img_norm
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
(1333, 768), (1333, 800)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
_base_ = './faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py'
# learning policy
lr_config = dict(step=[28, 34])
total_epochs = 36
5 changes: 3 additions & 2 deletions mmdet/models/necks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .bfp import BFP
from .channel_mapper import ChannelMapper
from .fpn import FPN
from .fpn_carafe import FPN_CARAFE
from .hrfpn import HRFPN
Expand All @@ -9,6 +10,6 @@
from .yolo_neck import YOLOV3Neck

__all__ = [
'FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN', 'NASFCOS_FPN',
'RFP', 'YOLOV3Neck'
'FPN', 'BFP', 'ChannelMapper', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN',
'NASFCOS_FPN', 'RFP', 'YOLOV3Neck'
]
74 changes: 74 additions & 0 deletions mmdet/models/necks/channel_mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import torch.nn as nn
from mmcv.cnn import ConvModule, xavier_init

from ..builder import NECKS


@NECKS.register_module()
class ChannelMapper(nn.Module):
r"""Channel Mapper to reduce/increase channels of backbone features.
This is used to reduce/increase channels of backbone features.
Args:
in_channels (List[int]): Number of input channels per scale.
out_channels (int): Number of output channels (used at each scale).
kernel_size (int, optional): kernel_size for reducing channels (used
at each scale). Default: 3.
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None.
norm_cfg (dict, optional): Config dict for normalization layer.
Default: None.
act_cfg (dict, optional): Config dict for activation layer in
ConvModule. Default: dict(type='ReLU').
Example:
>>> import torch
>>> in_channels = [2, 3, 5, 7]
>>> scales = [340, 170, 84, 43]
>>> inputs = [torch.rand(1, c, s, s)
... for c, s in zip(in_channels, scales)]
>>> self = ChannelMapper(in_channels, 11, 3).eval()
>>> outputs = self.forward(inputs)
>>> for i in range(len(outputs)):
... print(f'outputs[{i}].shape = {outputs[i].shape}')
outputs[0].shape = torch.Size([1, 11, 340, 340])
outputs[1].shape = torch.Size([1, 11, 170, 170])
outputs[2].shape = torch.Size([1, 11, 84, 84])
outputs[3].shape = torch.Size([1, 11, 43, 43])
"""

def __init__(self,
in_channels,
out_channels,
kernel_size=3,
conv_cfg=None,
norm_cfg=None,
act_cfg=dict(type='ReLU')):
super(ChannelMapper, self).__init__()
assert isinstance(in_channels, list)

self.convs = nn.ModuleList()
for in_channel in in_channels:
self.convs.append(
ConvModule(
in_channel,
out_channels,
kernel_size,
padding=(kernel_size - 1) // 2,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg))

# default init_weights for conv(msra) and norm in ConvModule
def init_weights(self):
"""Initialize the weights of ChannelMapper module."""
for m in self.modules():
if isinstance(m, nn.Conv2d):
xavier_init(m, distribution='uniform')

def forward(self, inputs):
"""Forward function."""
assert len(inputs) == len(self.convs)
outs = [self.convs[i](inputs[i]) for i in range(len(inputs))]
return tuple(outs)
39 changes: 38 additions & 1 deletion tests/test_models/test_necks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import torch
from torch.nn.modules.batchnorm import _BatchNorm

from mmdet.models.necks import FPN
from mmdet.models.necks import FPN, ChannelMapper


def test_fpn():
Expand Down Expand Up @@ -199,3 +199,40 @@ def test_fpn():
for i in range(fpn_model.num_outs):
outs[i].shape[1] == out_channels
outs[i].shape[2] == outs[i].shape[3] == s // (2**i)


def test_channel_mapper():
"""Tests ChannelMapper."""
s = 64
in_channels = [8, 16, 32, 64]
feat_sizes = [s // 2**i for i in range(4)] # [64, 32, 16, 8]
out_channels = 8
kernel_size = 3
feats = [
torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i])
for i in range(len(in_channels))
]

# in_channels must be a list
with pytest.raises(AssertionError):
channel_mapper = ChannelMapper(
in_channels=10, out_channels=out_channels, kernel_size=kernel_size)
# the length of channel_mapper's inputs must be equal to the length of
# in_channels
with pytest.raises(AssertionError):
channel_mapper = ChannelMapper(
in_channels=in_channels[:-1],
out_channels=out_channels,
kernel_size=kernel_size)
channel_mapper(feats)

channel_mapper = ChannelMapper(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size)

outs = channel_mapper(feats)
assert len(outs) == len(feats)
for i in range(len(feats)):
outs[i].shape[1] == out_channels
outs[i].shape[2] == outs[i].shape[3] == s // (2**i)

0 comments on commit d3cf38d

Please sign in to comment.