fast_rcnn_heads.py 里给出了用于 classification 和 bounding box prediction 的网络 heads.
网络设计:
... -> RoI ----\ /-> box cls output -> cls loss
-> RoIFeatureXform -> box head
... -> Feature / \-> box reg output -> reg loss
Map
Fast R-CNN head 网络主要是用于产生 RoI 的特征表示,以用于 bounding box classification 和 regression.
Box 输出模块是将特征表示转化为 classification 和 regression 预测结果,进行输出.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from core.config import cfg
from utils.c2 import const_fill
from utils.c2 import gauss_fill
import utils.blob as blob_utils
# ---------------------------------------------------------------------------- #
# Fast R-CNN 输出和losses
# ---------------------------------------------------------------------------- #
def add_fast_rcnn_outputs(model, blob_in, dim):
"""
添加 RoI classification 和 bounding box regression 输出的 ops.
"""
model.FC(blob_in, 'cls_score', dim, model.num_classes,
weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) )
if not model.train: # == if test
# 测试阶段,只添加 softmax;
# 训练阶段,softmax 被结合在 abel cross entropy loss,保持数值稳定性.
model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
model.FC(blob_in, 'bbox_pred', dim, model.num_classes * 4,
weight_init=gauss_fill(0.001), bias_init=const_fill(0.0) )
def add_fast_rcnn_losses(model):
"""
添加用于 RoI classification 和 bounding box regression 的 losses 函数.
"""
cls_prob, loss_cls = model.net.SoftmaxWithLoss(['cls_score', 'labels_int32'],
['cls_prob', 'loss_cls'],
scale=1. / cfg.NUM_GPUS )
loss_bbox = model.net.SmoothL1Loss(
['bbox_pred', 'bbox_targets', 'bbox_inside_weights', 'bbox_outside_weights' ],
'loss_bbox',
scale=1. / cfg.NUM_GPUS )
loss_gradients = blob_utils.get_loss_gradients(model, [loss_cls, loss_bbox])
model.Accuracy(['cls_prob', 'labels_int32'], 'accuracy_cls')
model.AddLosses(['loss_cls', 'loss_bbox'])
model.AddMetrics('accuracy_cls')
return loss_gradients
# ---------------------------------------------------------------------------- #
# Box heads
# ---------------------------------------------------------------------------- #
def add_roi_2mlp_head(model, blob_in, dim_in, spatial_scale):
"""
添加包含两个隐层的 ReLU MLP.
"""
hidden_dim = cfg.FAST_RCNN.MLP_HEAD_DIM
roi_size = cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
roi_feat = model.RoIFeatureTransform(blob_in,
'roi_feat',
blob_rois='rois',
method=cfg.FAST_RCNN.ROI_XFORM_METHOD,
resolution=roi_size,
sampling_ratio=cfg.FAST_RCNN.ROI_XFORM_SAMPLING_RATIO,
spatial_scale=spatial_scale )
model.FC(roi_feat, 'fc6', dim_in * roi_size * roi_size, hidden_dim)
model.Relu('fc6', 'fc6')
model.FC('fc6', 'fc7', hidden_dim, hidden_dim)
model.Relu('fc7', 'fc7')
return 'fc7', hidden_dim
RPN 模块.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from core.config import cfg
from modeling.generate_anchors import generate_anchors
from utils.c2 import const_fill
from utils.c2 import gauss_fill
import modeling.FPN as FPN
import utils.blob as blob_utils
# ---------------------------------------------------------------------------- #
# RPN and Faster R-CNN outputs and losses
# ---------------------------------------------------------------------------- #
def add_generic_rpn_outputs(model, blob_in, dim_in, spatial_scale_in):
"""
在 RPN 模型中,添加 RPN 输出 - objectness classification 和 bounding box regression.
FPN 的抽象使用.
"""
loss_gradients = None
if cfg.FPN.FPN_ON:
# 转到 FPN 模块
FPN.add_fpn_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)
if cfg.MODEL.FASTER_RCNN:
# 在训练阶段,CollectAndDistributeFpnRpnProposals 也标记 proposals.
model.CollectAndDistributeFpnRpnProposals()
if model.train:
loss_gradients = FPN.add_fpn_rpn_losses(model)
else:
# 不使用 FPN,添加 RPN 到单个scale
add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale_in)
if model.train:
loss_gradients = add_single_scale_rpn_losses(model)
return loss_gradients
def add_single_scale_rpn_outputs(model, blob_in, dim_in, spatial_scale):
"""
添加 RPN 输出到单 scale 模型. (i.e., no FPN).
"""
anchors = generate_anchors(stride=1. / spatial_scale,
sizes=cfg.RPN.SIZES,
aspect_ratios=cfg.RPN.ASPECT_RATIOS )
num_anchors = anchors.shape[0]
dim_out = dim_in
# RPN hidden representation
model.Conv(blob_in, 'conv_rpn', dim_in, dim_out, kernel=3, pad=1, stride=1,
weight_init=gauss_fill(0.01), bias_init=const_fill(0.0))
model.Relu('conv_rpn', 'conv_rpn')
# Proposal classification scores
model.Conv('conv_rpn', 'rpn_cls_logits', dim_in, num_anchors, kernel=1, pad=0, stride=1,
weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) )
# Proposal bbox regression deltas
model.Conv('conv_rpn', 'rpn_bbox_pred', dim_in, 4 * num_anchors, kernel=1, pad=0, stride=1,
weight_init=gauss_fill(0.01), bias_init=const_fill(0.0) )
if not model.train or cfg.MODEL.FASTER_RCNN:
"""
需要 proposals 的情况:
1. 推断时(== not model.train), RPN Only 和 Faster R-CNN
2. 训练时,Faster R-CNN
其它情况(== training for RPN only), 不需要 proposals.
"""
model.net.Sigmoid('rpn_cls_logits', 'rpn_cls_probs')
model.GenerateProposals(['rpn_cls_probs', 'rpn_bbox_pred', 'im_info'],
['rpn_rois', 'rpn_roi_probs'],
anchors=anchors,
spatial_scale=spatial_scale )
if cfg.MODEL.FASTER_RCNN:
if model.train:
# 添加 ops, 用于生成 in-network RPN proposals 的 raining labels.
model.GenerateProposalLabels(['rpn_rois', 'roidb', 'im_info'])
else:
# 推断阶段,Alias rois to rpn_rois
model.net.Alias('rpn_rois', 'rois')
def add_single_scale_rpn_losses(model):
"""
添加单 scale RPN 模型的 losses(i.e., no FPN).
"""
# 空间地限定 full-sized RPN label arrays, 以匹配 feature map 尺寸shape.
model.net.SpatialNarrowAs(['rpn_labels_int32_wide', 'rpn_cls_logits'], 'rpn_labels_int32' )
for key in ('targets', 'inside_weights', 'outside_weights'):
model.net.SpatialNarrowAs(['rpn_bbox_' + key + '_wide', 'rpn_bbox_pred'], 'rpn_bbox_' + key )
loss_rpn_cls = model.net.SigmoidCrossEntropyLoss(['rpn_cls_logits', 'rpn_labels_int32'],
'loss_rpn_cls',
scale=1. / cfg.NUM_GPUS )
loss_rpn_bbox = model.net.SmoothL1Loss(['rpn_bbox_pred',
'rpn_bbox_targets',
'rpn_bbox_inside_weights',
'rpn_bbox_outside_weights'],
'loss_rpn_bbox',
beta=1. / 9.,
scale=1. / cfg.NUM_GPUS )
loss_gradients = blob_utils.get_loss_gradients(model,
[loss_rpn_cls, loss_rpn_bbox] )
model.AddLosses(['loss_rpn_cls', 'loss_rpn_bbox'])
return loss_gradients
mask_rcnn_heads.py 里给出了 Mask R-CNN 的 masks 预测结果.
网络设计:
... -> RoI ----\
-> RoIFeatureXform -> mask head -> mask output -> loss
... -> Feature /
Map
mask head 产生 RoI 的特征表示,用于 mask 预测.
mask 输出模块将特征表示转化为实值 real-valued (soft) masks.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from core.config import cfg
from utils.c2 import const_fill
from utils.c2 import gauss_fill
import modeling.ResNet as ResNet
import utils.blob as blob_utils
# ---------------------------------------------------------------------------- #
# Mask R-CNN 输出和losses
# ---------------------------------------------------------------------------- #
def add_mask_rcnn_outputs(model, blob_in, dim):
"""
添加 Mask R-CNN 输出: mask logits 或 probs.
"""
num_cls = cfg.MODEL.NUM_CLASSES if cfg.MRCNN.CLS_SPECIFIC_MASK else 1
if cfg.MRCNN.USE_FC_OUTPUT:
# 采用全连接层 FC 来预测 masks(在 blob name 中忽略 'fcn')
blob_out = model.FC(blob_in,
'mask_fcn_logits',
dim,
num_cls * cfg.MRCNN.RESOLUTION**2,
weight_init=gauss_fill(0.001),
bias_init=const_fill(0.0) )
else:
# 采用 Conv 层来预测 mask.
# 采用 GaussianFill 来处理类别未知(class-agnostic) 的 mask 预测问题.
# 基于 fan-in 的 fills 在这种情况会太大,并造成发散.
fill = (cfg.MRCNN.CONV_INIT if cfg.MRCNN.CLS_SPECIFIC_MASK else 'GaussianFill')
blob_out = model.Conv(blob_in,
'mask_fcn_logits',
dim,
num_cls,
kernel=1,
pad=0,
stride=1,
weight_init=(fill, {'std': 0.001}),
bias_init=const_fill(0.0) )
if cfg.MRCNN.UPSAMPLE_RATIO > 1:
# 双线性插值.
blob_out = model.BilinearInterpolation('mask_fcn_logits',
'mask_fcn_logits_up',
num_cls,
num_cls,
cfg.MRCNN.UPSAMPLE_RATIO )
if not model.train: # == if test
blob_out = model.net.Sigmoid(blob_out, 'mask_fcn_probs')
return blob_out
def add_mask_rcnn_losses(model, blob_mask):
"""
添加 Mask R-CNN losses."""
loss_mask = model.net.SigmoidCrossEntropyLoss([blob_mask, 'masks_int32'],
'loss_mask',
scale=1. / cfg.NUM_GPUS * cfg.MRCNN.WEIGHT_LOSS_MASK )
loss_gradients = blob_utils.get_loss_gradients(model, [loss_mask])
model.AddLosses('loss_mask')
return loss_gradients
# ---------------------------------------------------------------------------- #
# Mask heads
# ---------------------------------------------------------------------------- #
def mask_rcnn_fcn_head_v1up4convs(model, blob_in, dim_in, spatial_scale):
"""v1up design: 4 * (conv 3x3), convT 2x2."""
return mask_rcnn_fcn_head_v1upXconvs(model, blob_in, dim_in, spatial_scale, 4)
def mask_rcnn_fcn_head_v1up(model, blob_in, dim_in, spatial_scale):
"""v1up design: 2 * (conv 3x3), convT 2x2."""
return mask_rcnn_fcn_head_v1upXconvs(model, blob_in, dim_in, spatial_scale, 2)
def mask_rcnn_fcn_head_v1upXconvs(model, blob_in, dim_in, spatial_scale, num_convs):
"""v1upXconvs design: X * (conv 3x3), convT 2x2."""
current = model.RoIFeatureTransform(blob_in,
blob_out='_[mask]_roi_feat',
blob_rois='mask_rois',
method=cfg.MRCNN.ROI_XFORM_METHOD,
resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
spatial_scale=spatial_scale )
dilation = cfg.MRCNN.DILATION
dim_inner = cfg.MRCNN.DIM_REDUCED
for i in range(num_convs):
current = model.Conv(current,
'_[mask]_fcn' + str(i + 1),
dim_in,
dim_inner,
kernel=3,
pad=1 * dilation,
stride=1,
weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
bias_init=('ConstantFill', {'value': 0.}) )
current = model.Relu(current, current)
dim_in = dim_inner
# 上采样upsample layer
model.ConvTranspose(current,
'conv5_mask',
dim_inner,
dim_inner,
kernel=2,
pad=0,
stride=2,
weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}),
bias_init=const_fill(0.0) )
blob_mask = model.Relu('conv5_mask', 'conv5_mask')
return blob_mask, dim_inner
def mask_rcnn_fcn_head_v0upshare(model, blob_in, dim_in, spatial_scale):
"""
采用 ResNet "conv5" / "stage5" head 来预测 mask.
与 conv5 box head 的权重参数和计算是共享的.
由于推断inference阶段 是级联的,只有训练阶段才共享计算.
v0upshare design: conv5, convT 2x2.
"""
# 由于 box 和 mask head 是共享的, 因此二者需要匹配.
assert cfg.MRCNN.ROI_XFORM_RESOLUTION == cfg.FAST_RCNN.ROI_XFORM_RESOLUTION
if model.train: # 训练阶段与 bbox head 共享计算
dim_conv5 = 2048
blob_conv5 = model.net.SampleAs(['res5_2_sum', 'roi_has_mask_int32'],
['_[mask]_res5_2_sum_sliced'] )
else: # 测试阶段重新计算re-compute
blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(model,
blob_in,
dim_in,
spatial_scale )
dim_reduced = cfg.MRCNN.DIM_REDUCED
blob_mask = model.ConvTranspose(blob_conv5,
'conv5_mask',
dim_conv5,
dim_reduced,
kernel=2,
pad=0,
stride=2,
weight_init=(cfg.MRCNN.CONV_INIT, {'std': 0.001}), # std only for gauss
bias_init=const_fill(0.0) )
model.Relu('conv5_mask', 'conv5_mask')
return blob_mask, dim_reduced
def mask_rcnn_fcn_head_v0up(model, blob_in, dim_in, spatial_scale):
"""v0up design: conv5, deconv 2x2 (no weight sharing with the box head)."""
blob_conv5, dim_conv5 = add_ResNet_roi_conv5_head_for_masks(model,
blob_in,
dim_in,
spatial_scale )
dim_reduced = cfg.MRCNN.DIM_REDUCED
model.ConvTranspose(blob_conv5,
'conv5_mask',
dim_conv5,
dim_reduced,
kernel=2,
pad=0,
stride=2,
weight_init=('GaussianFill', {'std': 0.001}),
bias_init=const_fill(0.0) )
blob_mask = model.Relu('conv5_mask', 'conv5_mask')
return blob_mask, dim_reduced
def add_ResNet_roi_conv5_head_for_masks(model, blob_in, dim_in, spatial_scale):
"""
添加 ResNet "conv5" / "stage5" head 以预测 masks.
"""
model.RoIFeatureTransform(blob_in,
blob_out='_[mask]_pool5',
blob_rois='mask_rois',
method=cfg.MRCNN.ROI_XFORM_METHOD,
resolution=cfg.MRCNN.ROI_XFORM_RESOLUTION,
sampling_ratio=cfg.MRCNN.ROI_XFORM_SAMPLING_RATIO,
spatial_scale=spatial_scale )
dilation = cfg.MRCNN.DILATION
stride_init = int(cfg.MRCNN.ROI_XFORM_RESOLUTION / 7) # by default: 2
s, dim_in = ResNet.add_stage(model,
'_[mask]_res5',
'_[mask]_pool5',
3,
dim_in,
2048,
512,
dilation,
stride_init=stride_init )
return s, 2048
keypoint_rcnn_heads.py 给出了 Mask R-CNN 中关于 keypoints 的预测.
网络设计:
... -> RoI ----\
-> RoIFeatureXform -> keypoint head -> keypoint output -> loss
... -> Feature /
Map
keypoint head 产生 RoI 的特征表示,用于预测 keypoint.
keypoint 输出模块将特征表示转化为 keypoint heatmaps.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from core.config import cfg
from utils.c2 import const_fill
from utils.c2 import gauss_fill
import modeling.ResNet as ResNet
import utils.blob as blob_utils
# ---------------------------------------------------------------------------- #
# Keypoint R-CNN 输出和losses
# ---------------------------------------------------------------------------- #
def add_keypoint_outputs(model, blob_in, dim):
"""
添加 Mask R-CNN keypoint 输出: keypoint heatmaps.
"""
# NxKxHxW
upsample_heatmap = (cfg.KRCNN.UP_SCALE > 1)
if cfg.KRCNN.USE_DECONV:
# 采用 ConvTranspose 对特征表示进行处理,得到 2x 上采样upsampling
blob_in = model.ConvTranspose(blob_in,
'kps_deconv',
dim,
cfg.KRCNN.DECONV_DIM,
kernel=cfg.KRCNN.DECONV_KERNEL,
pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
stride=2,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0) )
model.Relu('kps_deconv', 'kps_deconv')
dim = cfg.KRCNN.DECONV_DIM
if upsample_heatmap:
blob_name = 'kps_score_lowres'
else:
blob_name = 'kps_score'
if cfg.KRCNN.USE_DECONV_OUTPUT:
# 使用 ConvTranspose 预测 keypoint heatmaps,得到 2x 上采样upsampling
blob_out = model.ConvTranspose(blob_in,
blob_name,
dim,
cfg.KRCNN.NUM_KEYPOINTS,
kernel=cfg.KRCNN.DECONV_KERNEL,
pad=int(cfg.KRCNN.DECONV_KERNEL / 2 - 1),
stride=2,
weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),
bias_init=const_fill(0.0) )
else:
# 采用 Conv 预测 keypoint heatmaps; 未上采用 no upsampling
blob_out = model.Conv(blob_in,
blob_name,
dim,
cfg.KRCNN.NUM_KEYPOINTS,
kernel=1,
pad=0,
stride=1,
weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.001}),
bias_init=const_fill(0.0) )
if upsample_heatmap:
# 采用双线性上采样 bilinear upsampling 来增加输出的 heatmap 尺寸.
blob_out = model.BilinearInterpolation(blob_out,
'kps_score',
cfg.KRCNN.NUM_KEYPOINTS,
cfg.KRCNN.NUM_KEYPOINTS,
cfg.KRCNN.UP_SCALE )
return blob_out
def add_keypoint_losses(model):
"""
添加 Mask R-CNN keypoint losses.
"""
# 将输入input 从 (N, K, H, W) reshape 为 (NK, HW)
model.net.Reshape(['kps_score'],
['kps_score_reshaped', '_kps_score_old_shape'],
shape=(-1, cfg.KRCNN.HEATMAP_SIZE * cfg.KRCNN.HEATMAP_SIZE) )
# Softmax across **space** (woahh....space!)
# 注意: 这不是通常所说的 "spatial softmax"
# (i.e., 在每个空间位置spatial location,沿着 channel 维度应用 softmax;
# 这里是对一系列空间位置集合应用 softmax (i.e., 每个空间位置是一个类别"class").
kps_prob, loss_kps = model.net.SoftmaxWithLoss(
['kps_score_reshaped', 'keypoint_locations_int32', 'keypoint_weights'],
['kps_prob', 'loss_kps'],
scale=cfg.KRCNN.LOSS_WEIGHT / cfg.NUM_GPUS,
spatial=0 )
if not cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS:
"""
讨论:
softmax loss 会根据 keypoint_weights 的和来对 loss 求平均,进行归一化,
如所有可见visible keypoints visible 的总和.
由于可见 keypoints 的数量在不同的 minibatches 变化很大,
这就对于只有少量可见 keypoints 的 minibatches 的重要性会有加权up-weighting影响.
(假设极端情况时,只有一个可见keypoint 和 N 个可见 keypoints:
N 个 keypoints时,每个对于梯度计算的作用是 1/N;
而 1 个 keypoint时,其决定着梯度方向.)
因而,如果full minibatch 内,所有的 keypoints 都是可见的,
则可以采用 keypoints 的总数来归一化 loss.
(意味着,一个可见 keypoint 与 N 个 keypoints 中的每个 keypoint 的作用效果相同.)
"""
model.StopGradient('keypoint_loss_normalizer',
'keypoint_loss_normalizer' )
loss_kps = model.net.Mul(['loss_kps', 'keypoint_loss_normalizer'],
'loss_kps_normalized' )
loss_gradients = blob_utils.get_loss_gradients(model, [loss_kps])
model.AddLosses(loss_kps)
return loss_gradients
# ---------------------------------------------------------------------------- #
# Keypoint heads
# ---------------------------------------------------------------------------- #
def add_ResNet_roi_conv5_head_for_keypoints(model, blob_in, dim_in, spatial_scale):
"""
添加 ResNet "conv5" / "stage5" head 用于 Mask R-CNN keypoint 预测.
"""
model.RoIFeatureTransform(blob_in,
'_[pose]_pool5',
blob_rois='keypoint_rois',
method=cfg.KRCNN.ROI_XFORM_METHOD,
resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION,
sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO,
spatial_scale=spatial_scale )
"""
采用前缀'_[pose]_' 到 'res5',在 utils.net.initialize_from_weights_file 给定 pretrained 'res5' 参数时,可以采用其参数对 head 的参数进行初始化.
"""
s, dim_in = ResNet.add_stage(model,
'_[pose]_res5',
'_[pose]_pool5',
3,
dim_in,
2048,
512,
cfg.KRCNN.DILATION,
stride_init=int(cfg.KRCNN.ROI_XFORM_RESOLUTION / 7) )
return s, 2048
def add_roi_pose_head_v1convX(model, blob_in, dim_in, spatial_scale):
"""
添加 Mask R-CNN keypoint head.
v1convX design: X * (conv).
"""
hidden_dim = cfg.KRCNN.CONV_HEAD_DIM
kernel_size = cfg.KRCNN.CONV_HEAD_KERNEL
pad_size = kernel_size // 2
current = model.RoIFeatureTransform(
blob_in,
'_[pose]_roi_feat',
blob_rois='keypoint_rois',
method=cfg.KRCNN.ROI_XFORM_METHOD,
resolution=cfg.KRCNN.ROI_XFORM_RESOLUTION,
sampling_ratio=cfg.KRCNN.ROI_XFORM_SAMPLING_RATIO,
spatial_scale=spatial_scale )
for i in range(cfg.KRCNN.NUM_STACKED_CONVS):
current = model.Conv(current,
'conv_fcn' + str(i + 1),
dim_in,
hidden_dim,
kernel_size,
stride=1,
pad=pad_size,
weight_init=(cfg.KRCNN.CONV_INIT, {'std': 0.01}),
bias_init=('ConstantFill', {'value': 0.}) )
current = model.Relu(current, current)
dim_in = hidden_dim
return current, hidden_dim
"""
RetinaNet model heads 和 losses.
参见: https://arxiv.org/abs/1708.02002.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import numpy as np
from core.config import cfg
import utils.blob as blob_utils
def get_retinanet_bias_init(model):
"""
初始化 conv ops 的 biases,该 conv ops 输出类别概率预测结果.
在训练开始时,进行初始化,所有位置被预测为 background类别,且概率很大
(e.g., ~0.99 = 1 - cfg.RETINANET.PRIOR_PROB)
细节参考 Focal Loss 论文.
"""
prior_prob = cfg.RETINANET.PRIOR_PROB
scales_per_octave = cfg.RETINANET.SCALES_PER_OCTAVE
aspect_ratios = len(cfg.RETINANET.ASPECT_RATIOS)
if cfg.RETINANET.SOFTMAX:
# Multiclass softmax 情况
bias = np.zeros((model.num_classes, 1), dtype=np.float32)
bias[0] = np.log((model.num_classes - 1) * (1 - prior_prob) / (prior_prob))
bias = np.vstack([bias for _ in range(scales_per_octave * aspect_ratios)])
bias_init = ('GivenTensorFill',
{'values': bias.astype(dtype=np.float32)} )
else:
# Per-class sigmoid (binary classification) 情况
bias_init = ('ConstantFill',
{'value': -np.log((1 - prior_prob) / prior_prob) } )
return bias_init
def add_fpn_retinanet_outputs(model, blobs_in, dim_in, spatial_scales):
"""
RetinaNet head.
对于 classification 和 box regression 问题,
可以选择 same conv tower 或 a separate tower.
"bl_feat_list" 存储 bbox 预测的 feature blobs 列表.
如果共享 tower,则这些 blobs 可以共享 cls feature blobs;
否则,这些 blobs 是不同的 blobs.
"""
dim_out = dim_in
k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid
k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid
A = len(cfg.RETINANET.ASPECT_RATIOS) * cfg.RETINANET.SCALES_PER_OCTAVE
# bias 的计算初始化
bias_init = get_retinanet_bias_init(model)
assert len(blobs_in) == k_max - k_min + 1
bbox_feat_list = []
cls_pred_dim = (
model.num_classes if cfg.RETINANET.SOFTMAX else (model.num_classes - 1)
)
# unpacked bbox feature and add prediction layers
bbox_regr_dim = (
4 * (model.num_classes - 1) if cfg.RETINANET.CLASS_SPECIFIC_BBOX else 4
)
# ==========================================================================
# classification tower with logits and prob prediction
# ==========================================================================
for lvl in range(k_min, k_max + 1):
bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order
# classification tower stack convolution starts
for nconv in range(cfg.RETINANET.NUM_CONVS):
suffix = 'n{}_fpn{}'.format(nconv, lvl)
dim_in, dim_out = dim_in, dim_in
if lvl == k_min:
bl_out = model.Conv(bl_in,
'retnet_cls_conv_' + suffix,
dim_in,
dim_out,
3,
stride=1,
pad=1,
weight_init=('GaussianFill', {'std': 0.01}),
bias_init=('ConstantFill', {'value': 0.}) )
else:
bl_out = model.ConvShared(
bl_in,
'retnet_cls_conv_' + suffix,
dim_in,
dim_out,
3,
stride=1,
pad=1,
weight='retnet_cls_conv_n{}_fpn{}_w'.format(nconv, k_min),
bias='retnet_cls_conv_n{}_fpn{}_b'.format(nconv, k_min)
)
bl_in = model.Relu(bl_out, bl_out)
bl_feat = bl_in
# cls tower stack convolution ends. Add the logits layer now
if lvl == k_min:
retnet_cls_pred = model.Conv(bl_feat,
'retnet_cls_pred_fpn{}'.format(lvl),
dim_in,
cls_pred_dim * A,
3,
pad=1,
stride=1,
weight_init=('GaussianFill', {'std': 0.01}),
bias_init=bias_init )
else:
retnet_cls_pred = model.ConvShared(
bl_feat,
'retnet_cls_pred_fpn{}'.format(lvl),
dim_in,
cls_pred_dim * A,
3,
pad=1,
stride=1,
weight='retnet_cls_pred_fpn{}_w'.format(k_min),
bias='retnet_cls_pred_fpn{}_b'.format(k_min)
)
if not model.train:
if cfg.RETINANET.SOFTMAX:
model.net.GroupSpatialSoftmax(retnet_cls_pred,
'retnet_cls_prob_fpn{}'.format(lvl),
num_classes=cls_pred_dim )
else:
model.net.Sigmoid(retnet_cls_pred,
'retnet_cls_prob_fpn{}'.format(lvl) )
if cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
bbox_feat_list.append(bl_feat)
# ==========================================================================
# bbox tower if not sharing features with the classification tower with
# logits and prob prediction
# ==========================================================================
if not cfg.RETINANET.SHARE_CLS_BBOX_TOWER:
for lvl in range(k_min, k_max + 1):
bl_in = blobs_in[k_max - lvl] # blobs_in is in reversed order
for nconv in range(cfg.RETINANET.NUM_CONVS):
suffix = 'n{}_fpn{}'.format(nconv, lvl)
dim_in, dim_out = dim_in, dim_in
if lvl == k_min:
bl_out = model.Conv(bl_in,
'retnet_bbox_conv_' + suffix,
dim_in,
dim_out,
3,
stride=1,
pad=1,
weight_init=('GaussianFill', {'std': 0.01}),
bias_init=('ConstantFill', {'value': 0. }))
else:
bl_out = model.ConvShared(
bl_in,
'retnet_bbox_conv_' + suffix,
dim_in,
dim_out,
3,
stride=1,
pad=1,
weight='retnet_bbox_conv_n{}_fpn{}_w'.format(nconv, k_min),
bias='retnet_bbox_conv_n{}_fpn{}_b'.format(nconv, k_min) )
bl_in = model.Relu(bl_out, bl_out)
# Add octave scales and aspect ratio
# At least 1 convolution for dealing different aspect ratios
bl_feat = bl_in
bbox_feat_list.append(bl_feat)
# Depending on the features [shared/separate] for bbox, add prediction layer
for i, lvl in enumerate(range(k_min, k_max + 1)):
bbox_pred = 'retnet_bbox_pred_fpn{}'.format(lvl)
bl_feat = bbox_feat_list[i]
if lvl == k_min:
model.Conv(bl_feat,
bbox_pred,
dim_in,
bbox_regr_dim * A,
3,
pad=1,
stride=1,
weight_init=('GaussianFill', {'std': 0.01}),
bias_init=('ConstantFill', {'value': 0.}))
else:
model.ConvShared(bl_feat,
bbox_pred,
dim_in,
bbox_regr_dim * A,
3,
pad=1,
stride=1,
weight='retnet_bbox_pred_fpn{}_w'.format(k_min),
bias='retnet_bbox_pred_fpn{}_b'.format(k_min )
def add_fpn_retinanet_losses(model):
loss_gradients = {}
gradients, losses = [], []
k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid
k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid
model.AddMetrics(['retnet_fg_num', 'retnet_bg_num'])
# ==========================================================================
# bbox regression loss - SelectSmoothL1Loss for multiple anchors at a location
# ==========================================================================
for lvl in range(k_min, k_max + 1):
suffix = 'fpn{}'.format(lvl)
bbox_loss = model.net.SelectSmoothL1Loss(
['retnet_bbox_pred_' + suffix,
'retnet_roi_bbox_targets_' + suffix,
'retnet_roi_fg_bbox_locs_' + suffix, 'retnet_fg_num' ],
'retnet_loss_bbox_' + suffix,
beta=cfg.RETINANET.BBOX_REG_BETA,
scale=model.GetLossScale() * cfg.RETINANET.BBOX_REG_WEIGHT )
gradients.append(bbox_loss)
losses.append('retnet_loss_bbox_' + suffix)
# ==========================================================================
# cls loss - depends on softmax/sigmoid outputs
# ==========================================================================
for lvl in range(k_min, k_max + 1):
suffix = 'fpn{}'.format(lvl)
cls_lvl_logits = 'retnet_cls_pred_' + suffix
if not cfg.RETINANET.SOFTMAX:
cls_focal_loss = model.net.SigmoidFocalLoss(
[cls_lvl_logits,
'retnet_cls_labels_' + suffix,
'retnet_fg_num'],
['fl_{}'.format(suffix)],
gamma=cfg.RETINANET.LOSS_GAMMA,
alpha=cfg.RETINANET.LOSS_ALPHA,
scale=model.GetLossScale(),
num_classes=model.num_classes - 1 )
gradients.append(cls_focal_loss)
losses.append('fl_{}'.format(suffix))
else:
cls_focal_loss, gated_prob = model.net.SoftmaxFocalLoss(
[cls_lvl_logits,
'retnet_cls_labels_' + suffix,
'retnet_fg_num' ],
['fl_{}'.format(suffix), 'retnet_prob_{}'.format(suffix)],
gamma=cfg.RETINANET.LOSS_GAMMA,
alpha=cfg.RETINANET.LOSS_ALPHA,
scale=model.GetLossScale(),
num_classes=model.num_classes )
gradients.append(cls_focal_loss)
losses.append('fl_{}'.format(suffix))
loss_gradients.update(blob_utils.get_loss_gradients(model, gradients))
model.AddLosses(losses)
return loss_gradients
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from core.config import cfg
from utils.c2 import const_fill
from utils.c2 import gauss_fill
# ---------------------------------------------------------------------------- #
# R-FCN 的输出和losses
# ---------------------------------------------------------------------------- #
def add_rfcn_outputs(model, blob_in, dim_in, dim_reduce, spatial_scale):
if dim_reduce is not None:
# Optional dim reduction
# 1*1 Conv
blob_in = model.Conv(blob_in,
'conv_dim_reduce',
dim_in,
dim_reduce,
kernel=1,
pad=0,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0) )
blob_in = model.Relu(blob_in, blob_in)
dim_in = dim_reduce
# Classification conv
model.Conv(blob_in,
'conv_cls',
dim_in,
model.num_classes * cfg.RFCN.PS_GRID_SIZE**2,
kernel=1,
pad=0,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0) )
# # Bounding-box regression conv
num_bbox_reg_classes = (2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else model.num_classes )
model.Conv(blob_in,
'conv_bbox_pred',
dim_in,
4 * num_bbox_reg_classes * cfg.RFCN.PS_GRID_SIZE**2,
kernel=1,
pad=0,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0) )
# Classification PS RoI pooling
model.net.PSRoIPool(['conv_cls', 'rois'],
['psroipooled_cls', '_mapping_channel_cls'],
group_size=cfg.RFCN.PS_GRID_SIZE,
output_dim=model.num_classes,
spatial_scale=spatial_scale )
model.AveragePool('psroipooled_cls',
'cls_score_4d',
kernel=cfg.RFCN.PS_GRID_SIZE )
model.net.Reshape('cls_score_4d',
['cls_score', '_cls_scores_shape'],
shape=(-1, cfg.MODEL.NUM_CLASSES) )
if not model.train:
model.Softmax('cls_score', 'cls_prob', engine='CUDNN')
# Bbox regression PS RoI pooling
model.net.PSRoIPool(['conv_bbox_pred', 'rois'],
['psroipooled_bbox', '_mapping_channel_bbox'],
group_size=cfg.RFCN.PS_GRID_SIZE,
output_dim=4 * num_bbox_reg_classes,
spatial_scale=spatial_scale )
model.AveragePool('psroipooled_bbox',
'bbox_pred',
kernel=cfg.RFCN.PS_GRID_SIZE )