前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Caffe2 - (二十九) Detectron 之 modeling - 基础网络

Caffe2 - (二十九) Detectron 之 modeling - 基础网络

作者头像
AIHGF
发布2018-05-17 10:18:01
8670
发布2018-05-17 10:18:01
举报
文章被收录于专栏:AIUAI

Caffe2 - (二十九) Detectron 之 modeling - 基础网络

VGG16, VGG_CNN_M_1024,ResNet,ResNeXt.

1. VGG16.py

代码语言:javascript
复制
"""
VGG16 from https://arxiv.org/abs/1409.1556.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from core.config import cfg


def add_VGG16_conv5_body(model):
    model.Conv('data', 'conv1_1', 3, 64, 3, pad=1, stride=1)
    model.Relu('conv1_1', 'conv1_1')
    model.Conv('conv1_1', 'conv1_2', 64, 64, 3, pad=1, stride=1)
    model.Relu('conv1_2', 'conv1_2')
    model.MaxPool('conv1_2', 'pool1', kernel=2, pad=0, stride=2)
    model.Conv('pool1', 'conv2_1', 64, 128, 3, pad=1, stride=1)
    model.Relu('conv2_1', 'conv2_1')
    model.Conv('conv2_1', 'conv2_2', 128, 128, 3, pad=1, stride=1)
    model.Relu('conv2_2', 'conv2_2')
    model.MaxPool('conv2_2', 'pool2', kernel=2, pad=0, stride=2)
    model.StopGradient('pool2', 'pool2')
    model.Conv('pool2', 'conv3_1', 128, 256, 3, pad=1, stride=1)
    model.Relu('conv3_1', 'conv3_1')
    model.Conv('conv3_1', 'conv3_2', 256, 256, 3, pad=1, stride=1)
    model.Relu('conv3_2', 'conv3_2')
    model.Conv('conv3_2', 'conv3_3', 256, 256, 3, pad=1, stride=1)
    model.Relu('conv3_3', 'conv3_3')
    model.MaxPool('conv3_3', 'pool3', kernel=2, pad=0, stride=2)
    model.Conv('pool3', 'conv4_1', 256, 512, 3, pad=1, stride=1)
    model.Relu('conv4_1', 'conv4_1')
    model.Conv('conv4_1', 'conv4_2', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4_2', 'conv4_2')
    model.Conv('conv4_2', 'conv4_3', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4_3', 'conv4_3')
    model.MaxPool('conv4_3', 'pool4', kernel=2, pad=0, stride=2)
    model.Conv('pool4', 'conv5_1', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv5_1', 'conv5_1')
    model.Conv('conv5_1', 'conv5_2', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv5_2', 'conv5_2')
    model.Conv('conv5_2', 'conv5_3', 512, 512, 3, pad=1, stride=1)
    blob_out = model.Relu('conv5_3', 'conv5_3')
    return blob_out, 512, 1. / 16.


def add_VGG16_roi_fc_head(model, blob_in, dim_in, spatial_scale):
    model.RoIFeatureTransform(blob_in, 
                              'pool5', 
                              blob_rois='rois',
                              method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
                              resolution=7,
                              sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
                              spatial_scale=spatial_scale )
    model.FC('pool5', 'fc6', dim_in * 7 * 7, 4096)
    model.Relu('fc6', 'fc6')
    model.FC('fc6', 'fc7', 4096, 4096)
    blob_out = model.Relu('fc7', 'fc7')
    return blob_out, 4096

2. VGG_CNN_M_1024.py

代码语言:javascript
复制
"""
VGG_CNN_M_1024 from https://arxiv.org/abs/1405.3531.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from core.config import cfg


def add_VGG_CNN_M_1024_conv5_body(model):
    model.Conv('data', 'conv1', 3, 96, 7, pad=0, stride=2)
    model.Relu('conv1', 'conv1')
    model.LRN('conv1', 'norm1', size=5, alpha=0.0005, beta=0.75, bias=2.)
    model.MaxPool('norm1', 'pool1', kernel=3, pad=0, stride=2)
    model.StopGradient('pool1', 'pool1')
    # No updates at conv1 and below (norm1 and pool1 have no params,
    # so we can stop gradients before them, too)
    model.Conv('pool1', 'conv2', 96, 256, 5, pad=0, stride=2)
    model.Relu('conv2', 'conv2')
    model.LRN('conv2', 'norm2', size=5, alpha=0.0005, beta=0.75, bias=2.)
    model.MaxPool('norm2', 'pool2', kernel=3, pad=0, stride=2)
    model.Conv('pool2', 'conv3', 256, 512, 3, pad=1, stride=1)
    model.Relu('conv3', 'conv3')
    model.Conv('conv3', 'conv4', 512, 512, 3, pad=1, stride=1)
    model.Relu('conv4', 'conv4')
    model.Conv('conv4', 'conv5', 512, 512, 3, pad=1, stride=1)
    blob_out = model.Relu('conv5', 'conv5')
    return blob_out, 512, 1. / 16.


def add_VGG_CNN_M_1024_roi_fc_head(model, blob_in, dim_in, spatial_scale):
    model.RoIFeatureTransform(blob_in,
                              'pool5',
                              blob_rois='rois',
                              method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
                              resolution=6,
                              sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
                              spatial_scale=spatial_scale )
    model.FC('pool5', 'fc6', dim_in * 6 * 6, 4096)
    model.Relu('fc6', 'fc6')
    model.FC('fc6', 'fc7', 4096, 1024)
    blob_out = model.Relu('fc7', 'fc7')
    return blob_out, 1024

3. ResNet.py

代码语言:javascript
复制
"""
ResNet 和 ResNeXt 的实现.
参考论文:https://arxiv.org/abs/1512.03385, https://arxiv.org/abs/1611.05431.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

from core.config import cfg

# ---------------------------------------------------------------------------- #
# Bits for specific architectures (ResNet50, ResNet101, ...)
# ---------------------------------------------------------------------------- #


def add_ResNet50_conv4_body(model):
    return add_ResNet_convX_body(model, (3, 4, 6))


def add_ResNet50_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 4, 6, 3))


def add_ResNet101_conv4_body(model):
    return add_ResNet_convX_body(model, (3, 4, 23))


def add_ResNet101_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 4, 23, 3))


def add_ResNet152_conv5_body(model):
    return add_ResNet_convX_body(model, (3, 8, 36, 3))


# ---------------------------------------------------------------------------- #
# 通用 ResNet 组成
# ---------------------------------------------------------------------------- #


def add_stage(model, prefix, blob_in, n, dim_in, dim_out, dim_inner, dilation, stride_init=2):
    """Add a ResNet stage to the model by stacking n residual blocks."""
    # e.g., prefix = res2
    for i in range(n):
        blob_in = add_residual_block(model, '{}_{}'.format(prefix, i), blob_in, dim_in, dim_out,
                                     dim_inner, dilation, stride_init,
                                     inplace_sum=i < n - 1) # 最后一个 block 不使用 inplace; 可能需要外部拉取(fetched externally),或者 FPN 使用. 
        dim_in = dim_out
    return blob_in, dim_in


def add_ResNet_convX_body(model, block_counts, freeze_at=2):
    """
    添加 ResNet body, 从 input data 到 res5(也叫作conv5) 阶段.
    最终的  res5/conv5 阶段可以可选的排除掉(因此, convX, X = 4 or 5).
    """
    assert freeze_at in [0, 2, 3, 4, 5]
    p = model.Conv('data', 'conv1', 3, 64, 7, pad=3, stride=2, no_bias=1)
    p = model.AffineChannel(p, 'res_conv1_bn', inplace=True)
    p = model.Relu(p, p)
    p = model.MaxPool(p, 'pool1', kernel=3, pad=1, stride=2)
    dim_in = 64
    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
    (n1, n2, n3) = block_counts[:3]
    s, dim_in = add_stage(model, 'res2', p, n1, dim_in, 256, dim_bottleneck, 1 )
    if freeze_at == 2:
        model.StopGradient(s, s)
    s, dim_in = add_stage(model, 'res3', s, n2, dim_in, 512, dim_bottleneck * 2, 1 )
    if freeze_at == 3:
        model.StopGradient(s, s)
    s, dim_in = add_stage(model, 'res4', s, n3, dim_in, 1024, dim_bottleneck * 4, 1 )
    if freeze_at == 4:
        model.StopGradient(s, s)
    if len(block_counts) == 4:
        n4 = block_counts[3]
        s, dim_in = add_stage(model, 'res5', s, n4, dim_in, 2048, dim_bottleneck * 8, 
                              cfg.RESNETS.RES5_DILATION )
        if freeze_at == 5:
            model.StopGradient(s, s)
        return s, dim_in, 1. / 32. * cfg.RESNETS.RES5_DILATION
    else:
        return s, dim_in, 1. / 16.


def add_ResNet_roi_conv5_head(model, blob_in, dim_in, spatial_scale):
    """
    添加 RoI 特征变换(e.g., RoI pooling),其后接 res5/conv5 head 对每一个 RoI 进行处理.
    """
    # 待办事项(rbg): This contains Fast R-CNN specific config options making it non-
    # reusable; make this more generic with model-specific wrappers
    model.RoIFeatureTransform(blob_in, 
                              'pool5', 
                              blob_rois='rois',
                              method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
                              resolution=cfg.FAST_RCNN.ROI_XFORM_RESOLUTION,
                              sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
                              spatial_scale=spatial_scale )
    dim_bottleneck = cfg.RESNETS.NUM_GROUPS * cfg.RESNETS.WIDTH_PER_GROUP
    stride_init = int(cfg.FAST_RCNN.ROI_XFORM_RESOLUTION / 7)
    s, dim_in = add_stage(model, 'res5', 'pool5', 3, dim_in, 2048, dim_bottleneck * 8, 1, stride_init )
    s = model.AveragePool(s, 'res5_pool', kernel=7)
    return s, 2048


def add_residual_block(model, prefix, blob_in, dim_in, dim_out, dim_inner, dilation, 
                       stride_init=2, inplace_sum=False ):
    """
    添加一个残差模块 a residual block.
    """
    # prefix = res<stage>_<sub_stage>, e.g., res2_3

    # Max pooling is performed prior to the first stage (which is uniquely
    # distinguished by dim_in = 64), thus we keep stride = 1 for the first stage
    stride = stride_init if (dim_in != dim_out and dim_in != 64 and dilation == 1 ) else 1

    # transformation blob
    tr = globals()[cfg.RESNETS.TRANS_FUNC](model, blob_in, dim_in, dim_out, stride, prefix,
                                           dim_inner, group=cfg.RESNETS.NUM_GROUPS, dilation=dilation )

    # sum -> ReLU
    sc = add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride)
    if inplace_sum:
        s = model.net.Sum([tr, sc], tr)
    else:
        s = model.net.Sum([tr, sc], prefix + '_sum')

    return model.Relu(s, s)


def add_shortcut(model, prefix, blob_in, dim_in, dim_out, stride):
    if dim_in == dim_out:
        return blob_in

    c = model.Conv(blob_in, prefix + '_branch1', dim_in, dim_out, kernel=1, stride=stride, no_bias=1 )
    return model.AffineChannel(c, prefix + '_branch1_bn')


# ------------------------------------------------------------------------------
# various transformations (may expand and may consider a new helper)
# ------------------------------------------------------------------------------

def bottleneck_transformation(model, blob_in, dim_in, dim_out, stride, 
                              prefix, dim_inner, dilation=1, group=1 ):
    """
    添加 bottleneck transformation.
    """
    # In original resnet, stride=2 is on 1x1.
    # In fb.torch resnet, stride=2 is on 3x3.
    (str1x1, str3x3) = (stride, 1) if cfg.RESNETS.STRIDE_1X1 else (1, stride)

    # conv 1x1 -> BN -> ReLU
    cur = model.ConvAffine(blob_in, prefix + '_branch2a', dim_in, dim_inner,
                           kernel=1, stride=str1x1, pad=0, inplace=True )
    cur = model.Relu(cur, cur)

    # conv 3x3 -> BN -> ReLU
    cur = model.ConvAffine(cur, prefix + '_branch2b', dim_inner, dim_inner,
                           kernel=3, stride=str3x3, pad=1 * dilation, dilation=dilation,
                           group=group, inplace=True )
    cur = model.Relu(cur, cur)

    # conv 1x1 -> BN (no ReLU)
    # 注意: AffineChannel op 不能是 in-place 的,因为 Caffe2 对于这样的图graph 的梯度计算存在 bug.
    #       inplace=False
    cur = model.ConvAffine(cur, prefix + '_branch2c', dim_inner, dim_out,
                           kernel=1, stride=1, pad=0, inplace=False )
    return cur
本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。
原始发表:2018年04月08日,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • Caffe2 - (二十九) Detectron 之 modeling - 基础网络
    • 1. VGG16.py
      • 2. VGG_CNN_M_1024.py
        • 3. ResNet.py
        领券
        问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档