前往小程序,Get更优阅读体验!
立即前往
首页
学习
活动
专区
工具
TVP
发布
社区首页 >专栏 >Caffe2 - (三十一) Detectron 之 modeling - FPN 与 optimizer

Caffe2 - (三十一) Detectron 之 modeling - FPN 与 optimizer

作者头像
AIHGF
发布2018-05-17 10:19:03
1.7K0
发布2018-05-17 10:19:03
举报
文章被收录于专栏:AIUAI

Caffe2 - (三十一) Detectron 之 modeling - FPN 与 optimizer

1. FPN.py

FPN 模块.

代码语言:javascript
复制
"""
Feature Pyramid Network (FPN) 使用的相关函数.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import collections
import numpy as np

from core.config import cfg
from modeling.generate_anchors import generate_anchors ## 
from utils.c2 import const_fill
from utils.c2 import gauss_fill
import modeling.ResNet as ResNet ## 
import utils.blob as blob_utils
import utils.boxes as box_utils

"""
Backbone 骨干网络的最低(lowest) 和最高(highest) 金字塔(pyramid) 层(level).
对于 FPN, 这里假设所有的网络都有 5 个空间维度的减少(spatial reductions),
每一个减少的因子都是 2.
Level 1 对应于输入图片,此时使用时是没有意义的.
"""
LOWEST_BACKBONE_LVL = 2   # E.g., "conv2"-like level
HIGHEST_BACKBONE_LVL = 5  # E.g., "conv5"-like level


# ---------------------------------------------------------------------------- #
# FPN with ResNet
# ---------------------------------------------------------------------------- #

def add_fpn_ResNet50_conv5_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet50_conv5_body, 
                                  fpn_level_info_ResNet50_conv5 )


def add_fpn_ResNet50_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(model,
                                  ResNet.add_ResNet50_conv5_body,
                                  fpn_level_info_ResNet50_conv5,
                                  P2only=True )


def add_fpn_ResNet101_conv5_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet101_conv5_body, 
                                  fpn_level_info_ResNet101_conv5)


def add_fpn_ResNet101_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet101_conv5_body,
                                  fpn_level_info_ResNet101_conv5,
                                  P2only=True )


def add_fpn_ResNet152_conv5_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet152_conv5_body, 
                                  fpn_level_info_ResNet152_conv5 )


def add_fpn_ResNet152_conv5_P2only_body(model):
    return add_fpn_onto_conv_body(model, 
                                  ResNet.add_ResNet152_conv5_body,
                                  fpn_level_info_ResNet152_conv5,
                                  P2only=True )


# ---------------------------------------------------------------------------- #
# 添加 FPN 到骨干backbone 网络结构的相关函数
# ---------------------------------------------------------------------------- #

def add_fpn_onto_conv_body(model, conv_body_func, fpn_level_info_func, P2only=False):
    """
    添加指定的 conv body 到模型model, 然后再往模型添加 FPN levels.
    """
    """
    注意事项:
        blobs_conv 是倒序方式:[fpn5, fpn4, fpn3, fpn2]
        类似地,dims_conv:[2048, 1024, 512, 256]
        类似地,spatial_scales_fpn:[1/32, 1/16, 1/8, 1/4]
    """

    conv_body_func(model)
    blobs_fpn, dim_fpn, spatial_scales_fpn = add_fpn(model, fpn_level_info_func())

    if P2only:
        # 只使用最精细层次, finest level
        return blobs_fpn[-1], dim_fpn, spatial_scales_fpn[-1]
    else:
        # 使用所有的层次 all levels
        return blobs_fpn, dim_fpn, spatial_scales_fpn


def add_fpn(model, fpn_level_info):
    """
    基于 FPN 论文叙述的模型,添加 FPN 连接.
    """
    """
    FPN levels 是从骨干backbone 网络的 highest/coarest level(通常为 conv5) 开始构建的.
    首先向下,递归地(recursively)构建 lower/finer 分辨率的 FPN levels;
    然后向上,构建比起始 level higher/coarser 分辨率的 FPN levels.
    """
    fpn_dim = cfg.FPN.DIM
    min_level, max_level = get_min_max_levels()

    """
    计算从 coarest backbone 阶段stage(通常是 "conv5"-like level) 生成 FPN levels时,
    backbone 阶段stages 数.
    如,如果 backbone levels info 定义了 4 stages:"conv5", "conv4", "conv3", "conv2",且 min_level=2,
    那么,将会添加 FPN 上的 backbone stages 数为:4 - (2 - 2) = 4
    """
    num_backbone_stages = (len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) )

    lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]
    output_blobs = ['fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ]
    fpn_dim_lateral = fpn_level_info.dims
    xavier_fill = ('XavierFill', {})

    # 对于 coarest backbone level: 1x1 conv only seeds recursion
    model.Conv(lateral_input_blobs[0],
               output_blobs[0],
               dim_in=fpn_dim_lateral[0],
               dim_out=fpn_dim,
               kernel=1,
               pad=0,
               stride=1,
               weight_init=xavier_fill,
               bias_init=const_fill(0.0) )

    #
    # Step 1: 从 coarest backbone level 开始,递归地向下构建 FPN levels
    #

    # 对于其它 levels,添加 top-down 和侧向连接(lateral connections)
    for i in range(num_backbone_stages - 1):
        add_topdown_lateral_module(
            model,
            output_blobs[i],             # top-down blob
            lateral_input_blobs[i + 1],  # lateral blob 侧向 blob
            output_blobs[i + 1],         # next output blob 下一个输出 blob
            fpn_dim,                     # output dimension 输出维度
            fpn_dim_lateral[i + 1]       # lateral input dimension 侧向输入维度
        )

    # Post-hoc scale-specific 3x3 convs
    blobs_fpn = []
    spatial_scales = []
    for i in range(num_backbone_stages):
        fpn_blob = model.Conv(output_blobs[i],
                              fpn_{}'.format(fpn_level_info.blobs[i]),
                              dim_in=fpn_dim,
                              dim_out=fpn_dim,
                              kernel=3,
                              pad=1,
                              stride=1,
                              weight_init=xavier_fill,
                              bias_init=const_fill(0.0) )
        blobs_fpn += [fpn_blob]
        spatial_scales += [fpn_level_info.spatial_scales[i]]

    #
    # Step 2: 从 coarest backbone level 开始,递归地向上构建 FPN levels
    #

    # 判断是否需要 P6 feature map
    if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
        # CVPR'17 FPN 论文里的原始 FPN P6 level 的实现
        P6_blob_in = blobs_fpn[0]
        P6_name = P6_blob_in + '_subsampled_2x'
        # 使用 max pooling 模拟步长为 stride=2 的下采样subsampling
        P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)
        blobs_fpn.insert(0, P6_blob)
        spatial_scales.insert(0, spatial_scales[0] * 0.5)

    # RetinaNet 介绍的 Coarser FPN levels
    if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
        fpn_blob = fpn_level_info.blobs[0]
        dim_in = fpn_level_info.dims[0]
        for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
            fpn_blob_in = fpn_blob
            if i > HIGHEST_BACKBONE_LVL + 1:
                fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')
            fpn_blob = model.Conv(fpn_blob_in,
                                  'fpn_' + str(i),
                                  dim_in=dim_in,
                                  dim_out=fpn_dim,
                                  kernel=3,
                                  pad=1,
                                  stride=2,
                                  weight_init=xavier_fill,
                                  bias_init=const_fill(0.0) )
            dim_in = fpn_dim
            blobs_fpn.insert(0, fpn_blob)
            spatial_scales.insert(0, spatial_scales[0] * 0.5)

    return blobs_fpn, fpn_dim, spatial_scales


def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral):
    """
    添加 top-down 侧向(lateral)模块.
    """
    # Lateral 1x1 conv
    lat = model.Conv(fpn_lateral,
                     fpn_bottom + '_lateral',
                     dim_in=dim_lateral,
                     dim_out=dim_top,
                     kernel=1,
                     pad=0,
                     stride=1,
                     weight_init=(
                         const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ),
                     bias_init=const_fill(0.0) )
    # Top-down 2x 上采样upsampling
    td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2)
    # 相加 lateral and top-down
    model.net.Sum([lat, td], fpn_bottom)


def get_min_max_levels():
    """
    在 multiple FPN levels 上进行 RPN 和 RoI 变换操作所需要的 min 和 max FPN levels.
    """
    min_level = LOWEST_BACKBONE_LVL
    max_level = HIGHEST_BACKBONE_LVL
    if cfg.FPN.MULTILEVEL_RPN and not cfg.FPN.MULTILEVEL_ROIS:
        max_level = cfg.FPN.RPN_MAX_LEVEL
        min_level = cfg.FPN.RPN_MIN_LEVEL
    if not cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
        max_level = cfg.FPN.ROI_MAX_LEVEL
        min_level = cfg.FPN.ROI_MIN_LEVEL
    if cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
        max_level = max(cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.ROI_MAX_LEVEL)
        min_level = min(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.ROI_MIN_LEVEL)
    return min_level, max_level


# ---------------------------------------------------------------------------- #
# RPN with an FPN backbone
# ---------------------------------------------------------------------------- #

def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
    """
    在 FPN 上添加 RPN 输出.
    """
    num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS)
    dim_out = dim_in

    k_max = cfg.FPN.RPN_MAX_LEVEL  # coarsest level of pyramid
    k_min = cfg.FPN.RPN_MIN_LEVEL  # finest level of pyramid
    assert len(blobs_in) == k_max - k_min + 1
    for lvl in range(k_min, k_max + 1):
        bl_in = blobs_in[k_max - lvl]  # blobs_in 是倒序
        sc = spatial_scales[k_max - lvl]  # in reversed order
        slvl = str(lvl)

        if lvl == k_min:
            # 对 first FPN levels, 创建 conv ops,且权重随机初始化,zeroed biases.
            # 所有的其它 FPN levels 也共享该 conv ops.
            # RPN hidden representation
            conv_rpn_fpn = model.Conv(bl_in,
                                      'conv_rpn_fpn' + slvl,
                                      dim_in,
                                      dim_out,
                                      kernel=3,
                                      pad=1,
                                      stride=1,
                                      weight_init=gauss_fill(0.01),
                                      bias_init=const_fill(0.0) )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.Conv(conv_rpn_fpn,
                                            'rpn_cls_logits_fpn' + slvl,
                                            dim_in,
                                            num_anchors,
                                            kernel=1,
                                            pad=0,
                                            stride=1,
                                            weight_init=gauss_fill(0.01),
                                            bias_init=const_fill(0.0) )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.Conv(conv_rpn_fpn,
                                           'rpn_bbox_pred_fpn' + slvl,
                                           dim_in,
                                           4 * num_anchors,
                                           kernel=1,
                                           pad=0,
                                           stride=1,
                                           weight_init=gauss_fill(0.01),
                                           bias_init=const_fill(0.0) )
        else:
            # 共享 weights and biases
            sk_min = str(k_min)
            # RPN hidden representation
            conv_rpn_fpn = model.ConvShared(
                bl_in,
                'conv_rpn_fpn' + slvl,
                dim_in,
                dim_out,
                kernel=3,
                pad=1,
                stride=1,
                weight='conv_rpn_fpn' + sk_min + '_w',
                bias='conv_rpn_fpn' + sk_min + '_b'
            )
            model.Relu(conv_rpn_fpn, conv_rpn_fpn)
            # Proposal classification scores
            rpn_cls_logits_fpn = model.ConvShared(conv_rpn_fpn,
                                                  'rpn_cls_logits_fpn' + slvl,
                                                  dim_in,
                                                  num_anchors,
                                                  kernel=1,
                                                  pad=0,
                                                  stride=1,
                                                  weight='rpn_cls_logits_fpn' + sk_min + '_w',
                                                  bias='rpn_cls_logits_fpn' + sk_min + '_b' )
            # Proposal bbox regression deltas
            rpn_bbox_pred_fpn = model.ConvShared(conv_rpn_fpn,
                                                 'rpn_bbox_pred_fpn' + slvl,
                                                 dim_in,
                                                 4 * num_anchors,
                                                 kernel=1,
                                                 pad=0,
                                                 stride=1,
                                                 weight='rpn_bbox_pred_fpn' + sk_min + '_w',
                                                 bias='rpn_bbox_pred_fpn' + sk_min + '_b' )

        if not model.train or cfg.MODEL.FASTER_RCNN:
            """
            需要 proposals 的情况:
             - 1. inference (== not model.train) 时,RPN Only 和 Faster R-CNN
             - 2. training 时, Faster R-CNN.

            其它情况(training for RPN only),不需要 proposals.
            """
            lvl_anchors = generate_anchors(
                stride=2.**lvl,
                sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
                aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS )
            rpn_cls_probs_fpn = model.net.Sigmoid(rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl)
            model.GenerateProposals([rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
                                    ['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
                                    anchors=lvl_anchors,
                                    spatial_scale=sc )


def add_fpn_rpn_losses(model):
    """
    在 FPN 添加 RPN losses.
    """
    loss_gradients = {}
    for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
        slvl = str(lvl)
        # 空间上限定 full-sized RPN label arrays, 以匹配 feature map 的大小shape.
        model.net.SpatialNarrowAs(['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
                                  'rpn_labels_int32_fpn' + slvl )
        for key in ('targets', 'inside_weights', 'outside_weights'):
            model.net.SpatialNarrowAs(['rpn_bbox_' + key + '_wide_fpn' + slvl,
                                       'rpn_bbox_pred_fpn' + slvl ],
                                      'rpn_bbox_' + key + '_fpn' + slvl )
        loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
            ['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
            'loss_rpn_cls_fpn' + slvl,
            normalize=0,
            scale=(1. / cfg.NUM_GPUS / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM / cfg.TRAIN.IMS_PER_BATCH )
        )
        """
        Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is handled by (1) setting bbox outside weights and (2) SmoothL1Loss normalizes by IMS_PER_BATCH.
        """
        loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
            ['rpn_bbox_pred_fpn' + slvl, 
             'rpn_bbox_targets_fpn' + slvl,
             'rpn_bbox_inside_weights_fpn' + slvl,
             'rpn_bbox_outside_weights_fpn' + slvl ],
            'loss_rpn_bbox_fpn' + slvl,
            beta=1. / 9.,
            scale=1. / cfg.NUM_GPUS )
        loss_gradients.update(blob_utils.
                              get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn]) )
        model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
    return loss_gradients


# ---------------------------------------------------------------------------- #
# multilevel FPN RoIs 相关的辅助函数Helper functions
# ---------------------------------------------------------------------------- #

def map_rois_to_fpn_levels(rois, k_min, k_max):
    """
    判断在一系列 RoIs 集合中每个 RoI 应该映射的 FPN level,
    基于 FPN 论文中的启发式.
    """
    # 计算 level ids
    s = np.sqrt(box_utils.boxes_area(rois))
    s0 = cfg.FPN.ROI_CANONICAL_SCALE  # default: 224
    lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL  # default: 4

    # Eqn.(1) in FPN paper
    target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
    target_lvls = np.clip(target_lvls, k_min, k_max)
    return target_lvls


def add_multilevel_roi_blobs(blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max):
    """
    将 multiple FPN levels 的 RoI blobs 添加到 blobs dict.

        blobs: blob name 到 numpy ndarray 映射的 dict.
        blob_prefix: FPN blobs 使用的 name 前缀prefix.
        rois: rois源,2D numpy array,shape (N, 5)
              每一行是一个 roi,各列分别编码: (batch_idx, x1, y1, x2, y2)
        target_lvls: numpy array,shape (N, ),表示 rois 中每个 roi 应该被分配的 FPN level. 
        lvl_min: the finest (highest resolution) FPN level (e.g., 2)
        lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)
    """
    rois_idx_order = np.empty((0, ))
    rois_stacked = np.zeros((0, 5), dtype=np.float32)  # for assert
    for lvl in range(lvl_min, lvl_max + 1):
        idx_lvl = np.where(target_lvls == lvl)[0]
        blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]
        rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
        rois_stacked = np.vstack([rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]] )
    rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)
    blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore
    # 安全性检测,判断恢复restore的顺序是否正确.
    assert (rois_stacked[rois_idx_restore] == rois).all()


# ---------------------------------------------------------------------------- #
# FPN level info for stages 5, 4, 3, 2 for select models (more can be added)
# ---------------------------------------------------------------------------- #

FpnLevelInfo = collections.namedtuple('FpnLevelInfo', ['blobs', 'dims', 'spatial_scales'] )


def fpn_level_info_ResNet50_conv5():
    return FpnLevelInfo(blobs=('res5_2_sum', 'res4_5_sum', 'res3_3_sum', 'res2_2_sum'),
                        dims=(2048, 1024, 512, 256),
                        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )


def fpn_level_info_ResNet101_conv5():
    return FpnLevelInfo(blobs=('res5_2_sum', 'res4_22_sum', 'res3_3_sum', 'res2_2_sum'),
                        dims=(2048, 1024, 512, 256),
                        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )


def fpn_level_info_ResNet152_conv5():
    return FpnLevelInfo(blobs=('res5_2_sum', 'res4_35_sum', 'res3_7_sum', 'res2_2_sum'),
                        dims=(2048, 1024, 512, 256),
                        spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )

2. optimizer.py

代码语言:javascript
复制
"""
优化 op 图构建.
Optimization operator graph construction.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging

from caffe2.python import muji

from core.config import cfg
import utils.c2 as c2_utils

logger = logging.getLogger(__name__)


def build_data_parallel_model(model, single_gpu_build_func):
    """
    给定单 GPU 模型构建函数,创建数据并行模型.
    """
    if model.train:
        # 在每个 GPU 上构建 forward graph.
        all_loss_gradients = _build_forward_graph(model, single_gpu_build_func)
        # 在所有 GPUs 上添加 backward pass
        model.AddGradientOperators(all_loss_gradients)
        if cfg.NUM_GPUS > 1:
            # 构建 graph,在 gradients 上进行 Allreduce.
            _add_allreduce_graph(model)
        for gpu_id in range(cfg.NUM_GPUS):
            # allreduce 后,所有 GPUs 并行地分别 SGD 更新参数.
            _add_parameter_update_ops(model, gpu_id)
    else:
        # 测试阶段Test-time,网络 ops 在单 GPU 上进行.
        # 测试阶段Test-time,并行是通过多线程multiprocessing 实现的.
        with c2_utils.NamedCudaScope(0):
            single_gpu_build_func(model)


def _build_forward_graph(model, single_gpu_build_func):
    """
    在每个 GPU 上构建 forward graph.
    """
    all_loss_gradients = {}  # 包括所有 GPUs 上的 loss gradients
    # 以正确的 name 和 device scoping 在每个 GPU 上构建模型
    for gpu_id in range(cfg.NUM_GPUS):
        with c2_utils.NamedCudaScope(gpu_id):
            all_loss_gradients.update(single_gpu_build_func(model))
    return all_loss_gradients


def _add_allreduce_graph(model):
    """
    构建 graph,对 gradients 进行 Allreduce.
    """
    # 如果训练时GPU 数大于1, 需要 all-reduce the per-GPU gradients.
    all_params = model.TrainableParams()
    assert len(all_params) % cfg.NUM_GPUS == 0
    # 模型参数在每个 GPU 上进行复制,获取不同参数 blobs 数.
    # (i.e., 在每个 GPU 上参数 blobs 数)
    params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
    with c2_utils.CudaScope(0):
        # 对不同的参数 blobs 进行迭代
        for i in range(params_per_gpu):
            # 对于该参数 blob,所有 GPUs 上的 Gradients from all GPUs for this parameter blob
            gradients = [model.param_to_grad[p] for p in all_params[i::params_per_gpu]]
            if len(gradients) > 0:
                if cfg.USE_NCCL:
                    model.net.NCCLAllreduce(gradients, gradients)
                else:
                    muji.Allreduce(model.net, gradients, reduced_affix='')


def _add_parameter_update_ops(model, gpu_id):
    """
    构建优化更新 op 图(optimizer update op graph)
    """
    with c2_utils.NamedCudaScope(gpu_id):
        # 在训练开始时,lr 设为 0,是作为 a dummy value
        lr = model.param_init_net.ConstantFill([], 'lr', shape=[1], value=0.0)
        one = model.param_init_net.ConstantFill([], 'one', shape=[1], value=1.0)
        wd = model.param_init_net.ConstantFill([], 'wd', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY)

        for param in model.TrainableParams(gpu_id=gpu_id):
            logger.info('param ' + str(param) + ' will be updated')
            param_grad = model.param_to_grad[param]
            # 初始化 momentum 向量
            param_momentum = model.param_init_net.ConstantFill([param], param + '_momentum', value=0.0)
            if param in model.biases:
                # biases 特殊处理(主要是因为历史因素):
                #   (1) Do not apply weight decay
                #   (2) Use a 2x higher learning rate
                model.Scale(param_grad, param_grad, scale=2.0)
            elif cfg.SOLVER.WEIGHT_DECAY > 0:
                # 对 non-bias weights 应用 weight decay
                model.WeightedSum([param_grad, one, param, wd], param_grad)
            # 更新 param_grad 和 param_momentum in place
            model.net.MomentumSGDUpdate([param_grad, param_momentum, lr, param],
                                        [param_grad, param_momentum, param],
                                        momentum=cfg.SOLVER.MOMENTUM )

3.generate_anchors.py

代码语言:javascript
复制
import numpy as np

# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
#    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
#    >> anchors
#
#    anchors =
#
#       -83   -39   100    56
#      -175   -87   192   104
#      -359  -183   376   200
#       -55   -55    72    72
#      -119  -119   136   136
#      -247  -247   264   264
#       -35   -79    52    96
#       -79  -167    96   184
#      -167  -343   184   360

# array([[ -83.,  -39.,  100.,   56.],
#        [-175.,  -87.,  192.,  104.],
#        [-359., -183.,  376.,  200.],
#        [ -55.,  -55.,   72.,   72.],
#        [-119., -119.,  136.,  136.],
#        [-247., -247.,  264.,  264.],
#        [ -35.,  -79.,   52.,   96.],
#        [ -79., -167.,   96.,  184.],
#        [-167., -343.,  184.,  360.]])


def generate_anchors(stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)):
    """
    生成 anchor boxes 矩阵,其格式为 (x1, y1, x2, y2).
    Anchors 是以 stride / 2 的中心,逼近指定大小的平方根面积(sqrt areas),长宽比
    Anchors are centered on stride / 2, have (approximate) sqrt areas of the specified
    sizes, and aspect ratios as given.
    """
    return _generate_anchors(stride,
                             np.array(sizes, dtype=np.float) / stride,
                             np.array(aspect_ratios, dtype=np.float) )


def _generate_anchors(base_size, scales, aspect_ratios):
    """
    通过枚举关于参考窗口window (0, 0, base_size - 1, base_size - 1) 的长宽比(aspect ratios) X scales,
    来生成 anchore 窗口(参考窗口 reference windows).
    """
    anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
    anchors = _ratio_enum(anchor, aspect_ratios)
    anchors = np.vstack([_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])])
    return anchors


def _whctrs(anchor):
    """
    返回 anchor 窗口的 width, height, x center,  y center.
    """
    w = anchor[2] - anchor[0] + 1
    h = anchor[3] - anchor[1] + 1
    x_ctr = anchor[0] + 0.5 * (w - 1)
    y_ctr = anchor[1] + 0.5 * (h - 1)
    return w, h, x_ctr, y_ctr


def _mkanchors(ws, hs, x_ctr, y_ctr):
    """
    给定 center(x_ctr, y_ctr) 及 widths (ws),heights (hs) 向量,输出 anchors窗口window 集合.
    """
    ws = ws[:, np.newaxis]
    hs = hs[:, np.newaxis]
    anchors = np.hstack( (x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
                          x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) ) )
    return anchors


def _ratio_enum(anchor, ratios):
    """
    对于每个关于一个 anchor 的长宽比aspect ratio,枚举 anchors 集合.
    """
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    size = w * h
    size_ratios = size / ratios
    ws = np.round(np.sqrt(size_ratios))
    hs = np.round(ws * ratios)
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors


def _scale_enum(anchor, scales):
    """
    对于每个关于一个 anchor 的尺度scale,枚举 anchors 集合.
    Enumerate a set of anchors for each scale wrt an anchor."""
    w, h, x_ctr, y_ctr = _whctrs(anchor)
    ws = w * scales
    hs = h * scales
    anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
    return anchors
本文参与 腾讯云自媒体同步曝光计划,分享自作者个人站点/博客。
原始发表:2018年04月09日,如有侵权请联系 cloudcommunity@tencent.com 删除

本文分享自 作者个人站点/博客 前往查看

如有侵权,请联系 cloudcommunity@tencent.com 删除。

本文参与 腾讯云自媒体同步曝光计划  ,欢迎热爱写作的你一起参与!

评论
登录后参与评论
0 条评论
热度
最新
推荐阅读
目录
  • Caffe2 - (三十一) Detectron 之 modeling - FPN 与 optimizer
    • 1. FPN.py
      • 2. optimizer.py
        • 3.generate_anchors.py
        相关产品与服务
        GPU 云服务器
        GPU 云服务器(Cloud GPU Service,GPU)是提供 GPU 算力的弹性计算服务,具有超强的并行计算能力,作为 IaaS 层的尖兵利器,服务于生成式AI,自动驾驶,深度学习训练、科学计算、图形图像处理、视频编解码等场景。腾讯云随时提供触手可得的算力,有效缓解您的计算压力,提升业务效率与竞争力。
        领券
        问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档