VGG16, VGG_CNN_M_1024,ResNet,ResNeXt.
"""
VGG16 from https://arxiv.org/abs/1409.1556.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from core.config import cfg
def add_VGG16_conv5_body(model):
model.Conv('data', 'conv1_1', 3, 64, 3, pad=1, stride=1)
model.Relu('conv1_1', 'conv1_1')
model.Conv('conv1_1', 'conv1_2', 64, 64, 3, pad=1, stride=1)
model.Relu('conv1_2', 'conv1_2')
model.MaxPool('conv1_2', 'pool1', kernel=2, pad=0, stride=2)
model.Conv('pool1', 'conv2_1', 64, 128, 3, pad=1, stride=1)
model.Relu('conv2_1', 'conv2_1')
model.Conv('conv2_1', 'conv2_2', 128, 128, 3, pad=1, stride=1)
model.Relu('conv2_2', 'conv2_2')
model.MaxPool('conv2_2', 'pool2', kernel=2, pad=0, stride=2)
model.StopGradient('pool2', 'pool2')
model.Conv('pool2', 'conv3_1', 128, 256, 3, pad=1, stride=1)
model.Relu('conv3_1', 'conv3_1')
model.Conv('conv3_1', 'conv3_2', 256, 256, 3, pad=1, stride=1)
model.Relu('conv3_2', 'conv3_2')
model.Conv('conv3_2', 'conv3_3', 256, 256, 3, pad=1, stride=1)
model.Relu('conv3_3', 'conv3_3')
model.MaxPool('conv3_3', 'pool3', kernel=2, pad=0, stride=2)
model.Conv('pool3', 'conv4_1', 256, 512, 3, pad=1, stride=1)
model.Relu('conv4_1', 'conv4_1')
model.Conv('conv4_1', 'conv4_2', 512, 512, 3, pad=1, stride=1)
model.Relu('conv4_2', 'conv4_2')
model.Conv('conv4_2', 'conv4_3', 512, 512, 3, pad=1, stride=1)
model.Relu('conv4_3', 'conv4_3')
model.MaxPool('conv4_3', 'pool4', kernel=2, pad=0, stride=2)
model.Conv('pool4', 'conv5_1', 512, 512, 3, pad=1, stride=1)
model.Relu('conv5_1', 'conv5_1')
model.Conv('conv5_1', 'conv5_2', 512, 512, 3, pad=1, stride=1)
model.Relu('conv5_2', 'conv5_2')
model.Conv('conv5_2', 'conv5_3', 512, 512, 3, pad=1, stride=1)
blob_out = model.Relu('conv5_3', 'conv5_3')
return blob_out, 512, 1. / 16.
def add_VGG16_roi_fc_head(model, blob_in, dim_in, spatial_scale):
model.RoIFeatureTransform(blob_in,
'pool5',
blob_rois='rois',
method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
resolution=7,
sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
spatial_scale=spatial_scale )
model.FC('pool5', 'fc6', dim_in * 7 * 7, 4096)
model.Relu('fc6', 'fc6')
model.FC('fc6', 'fc7', 4096, 4096)
blob_out = model.Relu('fc7', 'fc7')
return blob_out, 4096
"""
VGG_CNN_M_1024 from https://arxiv.org/abs/1405.3531.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from core.config import cfg
def add_VGG_CNN_M_1024_conv5_body(model):
model.Conv('data', 'conv1', 3, 96, 7, pad=0, stride=2)
model.Relu('conv1', 'conv1')
model.LRN('conv1', 'norm1', size=5, alpha=0.0005, beta=0.75, bias=2.)
model.MaxPool('norm1', 'pool1', kernel=3, pad=0, stride=2)
model.StopGradient('pool1', 'pool1')
# No updates at conv1 and below (norm1 and pool1 have no params,
# so we can stop gradients before them, too)
model.Conv('pool1', 'conv2', 96, 256, 5, pad=0, stride=2)
model.Relu('conv2', 'conv2')
model.LRN('conv2', 'norm2', size=5, alpha=0.0005, beta=0.75, bias=2.)
model.MaxPool('norm2', 'pool2', kernel=3, pad=0, stride=2)
model.Conv('pool2', 'conv3', 256, 512, 3, pad=1, stride=1)
model.Relu('conv3', 'conv3')
model.Conv('conv3', 'conv4', 512, 512, 3, pad=1, stride=1)
model.Relu('conv4', 'conv4')
model.Conv('conv4', 'conv5', 512, 512, 3, pad=1, stride=1)
blob_out = model.Relu('conv5', 'conv5')
return blob_out, 512, 1. / 16.
def add_VGG_CNN_M_1024_roi_fc_head(model, blob_in, dim_in, spatial_scale):
model.RoIFeatureTransform(blob_in,
'pool5',
blob_rois='rois',
method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
resolution=6,
sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
spatial_scale=spatial_scale )
model.FC('pool5', 'fc6', dim_in * 6 * 6, 4096)
model.Relu('fc6', 'fc6')
model.FC('fc6', 'fc7', 4096, 1024)
blob_out = model.Relu('fc7', 'fc7')
return blob_out, 1024
"""
ResNet 和 ResNeXt 的实现.
参考论文:https://arxiv.org/abs/1512.03385, https://arxiv.org/abs/1611.05431.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from core.config import cfg
# ---------------------------------------------------------------------------- #
# Bits for specific architectures (ResNet50, ResNet101, ...)
# ---------------------------------------------------------------------------- #
def add_ResNet50_conv4_body(model):
return add_ResNet_convX_body(model, (3, 4, 6))
def add_ResNet50_conv5_body(model):
return add_ResNet_convX_body(model, (3, 4, 6, 3))
def add_ResNet101_conv4_body(model):
return add_ResNet_convX_body(model, (3, 4, 23))
def add_ResNet101_conv5_body(model):
return add_ResNet_convX_body(model, (3, 4, 23, 3))
def add_ResNet152_conv5_body(model):
return add_ResNet_convX_body(model, (3, 8, 36, 3))
# ---------------------------------------------------------------------------- #
# 通用 ResNet 组成
# ---------------------------------------------------------------------------- #
def add_stage(model, prefix, blob_in, n, dim_in, dim_out, dim_inner, dilation, stride_init=2):
"""Add a ResNet stage to the model by stacking n residual blocks."""
# e.g., prefix = res2
for i in range(n):
blob_in = add_residual_block(model, '{}_{}'.format(prefix, i), blob_in, dim_in, dim_out,
dim_inner, dilation, stride_init,
inplace_sum=i < n - 1) # 最后一个 block 不使用 inplace; 可能需要外部拉取(fetched externally),或者 FPN 使用.
dim_in = dim_out
return blob_in, dim_in
def add_ResNet_convX_body(model, block_counts, freeze_at=2):
"""
添加 ResNet body, 从 input data 到 res5(也叫作conv5) 阶段.
最终的 res5/conv5 阶段可以可选的排除掉(因此, convX, X = 4 or 5).
"""
assert freeze_at in [0, 2, 3, 4, 5]
p = model.Conv('data', 'conv1', 3, 64, 7, pad=3, stride=2, no_bias=1)
p = model.AffineChannel(p, 'res_conv1_bn', inplace=True)
p = model.Relu(p, p)
p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
dim_in = 64
dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
(n1, n2, n3) = block_counts[:3]
s, dim_in = add_stage(model, 'res2', p, n1, dim_in, 256, dim_bottleneck, 1 )
if freeze_at == 2:
model.StopGradient(s, s)
s, dim_in = add_stage(model, 'res3', s, n2, dim_in, 512, dim_bottleneck * 2, 1 )
if freeze_at == 3:
model.StopGradient(s, s)
s, dim_in = add_stage(model, 'res4', s, n3, dim_in, 1024, dim_bottleneck * 4, 1 )
if freeze_at == 4:
model.StopGradient(s, s)
if len(block_counts) == 4:
n4 = block_counts[3]
s, dim_in = add_stage(model, 'res5', s, n4, dim_in, 2048, dim_bottleneck * 8,
cfg.RESNETS.RES5_DILATION )
if freeze_at == 5:
model.StopGradient(s, s)
return s, dim_in, 1. / 32. * cfg.RESNETS.RES5_DILATION
else:
return s, dim_in, 1. / 16.
def add_ResNet_roi_conv5_head(model, blob_in, dim_in, spatial_scale):
"""
添加 RoI 特征变换(e.g., RoI pooling),其后接 res5/conv5 head 对每一个 RoI 进行处理.
"""
# 待办事项(rbg): This contains Fast R-CNN specific config options making it non-
# reusable; make this more generic with model-specific wrappers
model.RoIFeatureTransform(blob_in,
'pool5',
blob_rois='rois',
method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
spatial_scale=spatial_scale )
dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
stride_init = int(cfg.FAST_RCNN.ROI_XFORM_RESOLUTION / 7)
s, dim_in = add_stage(model, 'res5', 'pool5', 3, dim_in, 2048, dim_bottleneck * 8, 1, stride_init )
s = model.AveragePool(s, 'res5_pool', kernel=7)
return s, 2048
def add_residual_block(model, prefix, blob_in, dim_in, dim_out, dim_inner, dilation,
stride_init=2, inplace_sum=False ):
"""
添加一个残差模块 a residual block.
"""
# prefix = res<stage>_<sub_stage>, e.g., res2_3
# Max pooling is performed prior to the first stage (which is uniquely
# distinguished by dim_in = 64), thus we keep stride = 1 for the first stage
stride = stride_init if (dim_in != dim_out and dim_in != 64 and dilation == 1 ) else 1
# transformation blob
tr = globals()[cfg.RESNETS.TRANS_FUNC](model, blob_in, dim_in, dim_out, stride, prefix,
dim_inner, group=cfg.RESNETS.NUM_GROUPS, dilation=dilation )
# sum -> ReLU
sc = add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride)
if inplace_sum:
s = model.net.Sum([tr, sc], tr)
else:
s = model.net.Sum([tr, sc], prefix + '_sum')
return model.Relu(s, s)
def add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
if dim_in == dim_out:
return blob_in
c = model.Conv(blob_in, prefix + '_branch1', dim_in, dim_out, kernel=1, stride=stride, no_bias=1 )
return model.AffineChannel(c, prefix + '_branch1_bn')
# ------------------------------------------------------------------------------
# various transformations (may expand and may consider a new helper)
# ------------------------------------------------------------------------------
def bottleneck_transformation(model, blob_in, dim_in, dim_out, stride,
prefix, dim_inner, dilation=1, group=1 ):
"""
添加 bottleneck transformation.
"""
# In original resnet, stride=2 is on 1x1.
# In fb.torch resnet, stride=2 is on 3x3.
(str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)
# conv 1x1 -> BN -> ReLU
cur = model.ConvAffine(blob_in, prefix + '_branch2a', dim_in, dim_inner,
kernel=1, stride=str1x1, pad=0, inplace=True )
cur = model.Relu(cur, cur)
# conv 3x3 -> BN -> ReLU
cur = model.ConvAffine(cur, prefix + '_branch2b', dim_inner, dim_inner,
kernel=3, stride=str3x3, pad=1 * dilation, dilation=dilation,
group=group, inplace=True )
cur = model.Relu(cur, cur)
# conv 1x1 -> BN (no ReLU)
# 注意: AffineChannel op 不能是 in-place 的,因为 Caffe2 对于这样的图graph 的梯度计算存在 bug.
# inplace=False
cur = model.ConvAffine(cur, prefix + '_branch2c', dim_inner, dim_out,
kernel=1, stride=1, pad=0, inplace=False )
return cur