FPN 模块.
"""
Feature Pyramid Network (FPN) 使用的相关函数.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import collections
import numpy as np
from core.config import cfg
from modeling.generate_anchors import generate_anchors ##
from utils.c2 import const_fill
from utils.c2 import gauss_fill
import modeling.ResNet as ResNet ##
import utils.blob as blob_utils
import utils.boxes as box_utils
"""
Backbone 骨干网络的最低(lowest) 和最高(highest) 金字塔(pyramid) 层(level).
对于 FPN, 这里假设所有的网络都有 5 个空间维度的减少(spatial reductions),
每一个减少的因子都是 2.
Level 1 对应于输入图片,此时使用时是没有意义的.
"""
LOWEST_BACKBONE_LVL = 2 # E.g., "conv2"-like level
HIGHEST_BACKBONE_LVL = 5 # E.g., "conv5"-like level
# ---------------------------------------------------------------------------- #
# FPN with ResNet
# ---------------------------------------------------------------------------- #
def add_fpn_ResNet50_conv5_body(model):
return add_fpn_onto_conv_body(model,
ResNet.add_ResNet50_conv5_body,
fpn_level_info_ResNet50_conv5 )
def add_fpn_ResNet50_conv5_P2only_body(model):
return add_fpn_onto_conv_body(model,
ResNet.add_ResNet50_conv5_body,
fpn_level_info_ResNet50_conv5,
P2only=True )
def add_fpn_ResNet101_conv5_body(model):
return add_fpn_onto_conv_body(model,
ResNet.add_ResNet101_conv5_body,
fpn_level_info_ResNet101_conv5)
def add_fpn_ResNet101_conv5_P2only_body(model):
return add_fpn_onto_conv_body(model,
ResNet.add_ResNet101_conv5_body,
fpn_level_info_ResNet101_conv5,
P2only=True )
def add_fpn_ResNet152_conv5_body(model):
return add_fpn_onto_conv_body(model,
ResNet.add_ResNet152_conv5_body,
fpn_level_info_ResNet152_conv5 )
def add_fpn_ResNet152_conv5_P2only_body(model):
return add_fpn_onto_conv_body(model,
ResNet.add_ResNet152_conv5_body,
fpn_level_info_ResNet152_conv5,
P2only=True )
# ---------------------------------------------------------------------------- #
# 添加 FPN 到骨干backbone 网络结构的相关函数
# ---------------------------------------------------------------------------- #
def add_fpn_onto_conv_body(model, conv_body_func, fpn_level_info_func, P2only=False):
"""
添加指定的 conv body 到模型model, 然后再往模型添加 FPN levels.
"""
"""
注意事项:
blobs_conv 是倒序方式:[fpn5, fpn4, fpn3, fpn2]
类似地,dims_conv:[2048, 1024, 512, 256]
类似地,spatial_scales_fpn:[1/32, 1/16, 1/8, 1/4]
"""
conv_body_func(model)
blobs_fpn, dim_fpn, spatial_scales_fpn = add_fpn(model, fpn_level_info_func())
if P2only:
# 只使用最精细层次, finest level
return blobs_fpn[-1], dim_fpn, spatial_scales_fpn[-1]
else:
# 使用所有的层次 all levels
return blobs_fpn, dim_fpn, spatial_scales_fpn
def add_fpn(model, fpn_level_info):
"""
基于 FPN 论文叙述的模型,添加 FPN 连接.
"""
"""
FPN levels 是从骨干backbone 网络的 highest/coarest level(通常为 conv5) 开始构建的.
首先向下,递归地(recursively)构建 lower/finer 分辨率的 FPN levels;
然后向上,构建比起始 level higher/coarser 分辨率的 FPN levels.
"""
fpn_dim = cfg.FPN.DIM
min_level, max_level = get_min_max_levels()
"""
计算从 coarest backbone 阶段stage(通常是 "conv5"-like level) 生成 FPN levels时,
backbone 阶段stages 数.
如,如果 backbone levels info 定义了 4 stages:"conv5", "conv4", "conv3", "conv2",且 min_level=2,
那么,将会添加 FPN 上的 backbone stages 数为:4 - (2 - 2) = 4
"""
num_backbone_stages = (len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) )
lateral_input_blobs = fpn_level_info.blobs[:num_backbone_stages]
output_blobs = ['fpn_inner_{}'.format(s) for s in fpn_level_info.blobs[:num_backbone_stages] ]
fpn_dim_lateral = fpn_level_info.dims
xavier_fill = ('XavierFill', {})
# 对于 coarest backbone level: 1x1 conv only seeds recursion
model.Conv(lateral_input_blobs[0],
output_blobs[0],
dim_in=fpn_dim_lateral[0],
dim_out=fpn_dim,
kernel=1,
pad=0,
stride=1,
weight_init=xavier_fill,
bias_init=const_fill(0.0) )
#
# Step 1: 从 coarest backbone level 开始,递归地向下构建 FPN levels
#
# 对于其它 levels,添加 top-down 和侧向连接(lateral connections)
for i in range(num_backbone_stages - 1):
add_topdown_lateral_module(
model,
output_blobs[i], # top-down blob
lateral_input_blobs[i + 1], # lateral blob 侧向 blob
output_blobs[i + 1], # next output blob 下一个输出 blob
fpn_dim, # output dimension 输出维度
fpn_dim_lateral[i + 1] # lateral input dimension 侧向输入维度
)
# Post-hoc scale-specific 3x3 convs
blobs_fpn = []
spatial_scales = []
for i in range(num_backbone_stages):
fpn_blob = model.Conv(output_blobs[i],
fpn_{}'.format(fpn_level_info.blobs[i]),
dim_in=fpn_dim,
dim_out=fpn_dim,
kernel=3,
pad=1,
stride=1,
weight_init=xavier_fill,
bias_init=const_fill(0.0) )
blobs_fpn += [fpn_blob]
spatial_scales += [fpn_level_info.spatial_scales[i]]
#
# Step 2: 从 coarest backbone level 开始,递归地向上构建 FPN levels
#
# 判断是否需要 P6 feature map
if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1:
# CVPR'17 FPN 论文里的原始 FPN P6 level 的实现
P6_blob_in = blobs_fpn[0]
P6_name = P6_blob_in + '_subsampled_2x'
# 使用 max pooling 模拟步长为 stride=2 的下采样subsampling
P6_blob = model.MaxPool(P6_blob_in, P6_name, kernel=1, pad=0, stride=2)
blobs_fpn.insert(0, P6_blob)
spatial_scales.insert(0, spatial_scales[0] * 0.5)
# RetinaNet 介绍的 Coarser FPN levels
if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL:
fpn_blob = fpn_level_info.blobs[0]
dim_in = fpn_level_info.dims[0]
for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1):
fpn_blob_in = fpn_blob
if i > HIGHEST_BACKBONE_LVL + 1:
fpn_blob_in = model.Relu(fpn_blob, fpn_blob + '_relu')
fpn_blob = model.Conv(fpn_blob_in,
'fpn_' + str(i),
dim_in=dim_in,
dim_out=fpn_dim,
kernel=3,
pad=1,
stride=2,
weight_init=xavier_fill,
bias_init=const_fill(0.0) )
dim_in = fpn_dim
blobs_fpn.insert(0, fpn_blob)
spatial_scales.insert(0, spatial_scales[0] * 0.5)
return blobs_fpn, fpn_dim, spatial_scales
def add_topdown_lateral_module(model, fpn_top, fpn_lateral, fpn_bottom, dim_top, dim_lateral):
"""
添加 top-down 侧向(lateral)模块.
"""
# Lateral 1x1 conv
lat = model.Conv(fpn_lateral,
fpn_bottom + '_lateral',
dim_in=dim_lateral,
dim_out=dim_top,
kernel=1,
pad=0,
stride=1,
weight_init=(
const_fill(0.0) if cfg.FPN.ZERO_INIT_LATERAL else ('XavierFill', {}) ),
bias_init=const_fill(0.0) )
# Top-down 2x 上采样upsampling
td = model.net.UpsampleNearest(fpn_top, fpn_bottom + '_topdown', scale=2)
# 相加 lateral and top-down
model.net.Sum([lat, td], fpn_bottom)
def get_min_max_levels():
"""
在 multiple FPN levels 上进行 RPN 和 RoI 变换操作所需要的 min 和 max FPN levels.
"""
min_level = LOWEST_BACKBONE_LVL
max_level = HIGHEST_BACKBONE_LVL
if cfg.FPN.MULTILEVEL_RPN and not cfg.FPN.MULTILEVEL_ROIS:
max_level = cfg.FPN.RPN_MAX_LEVEL
min_level = cfg.FPN.RPN_MIN_LEVEL
if not cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
max_level = cfg.FPN.ROI_MAX_LEVEL
min_level = cfg.FPN.ROI_MIN_LEVEL
if cfg.FPN.MULTILEVEL_RPN and cfg.FPN.MULTILEVEL_ROIS:
max_level = max(cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.ROI_MAX_LEVEL)
min_level = min(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.ROI_MIN_LEVEL)
return min_level, max_level
# ---------------------------------------------------------------------------- #
# RPN with an FPN backbone
# ---------------------------------------------------------------------------- #
def add_fpn_rpn_outputs(model, blobs_in, dim_in, spatial_scales):
"""
在 FPN 上添加 RPN 输出.
"""
num_anchors = len(cfg.FPN.RPN_ASPECT_RATIOS)
dim_out = dim_in
k_max = cfg.FPN.RPN_MAX_LEVEL # coarsest level of pyramid
k_min = cfg.FPN.RPN_MIN_LEVEL # finest level of pyramid
assert len(blobs_in) == k_max - k_min + 1
for lvl in range(k_min, k_max + 1):
bl_in = blobs_in[k_max - lvl] # blobs_in 是倒序
sc = spatial_scales[k_max - lvl] # in reversed order
slvl = str(lvl)
if lvl == k_min:
# 对 first FPN levels, 创建 conv ops,且权重随机初始化,zeroed biases.
# 所有的其它 FPN levels 也共享该 conv ops.
# RPN hidden representation
conv_rpn_fpn = model.Conv(bl_in,
'conv_rpn_fpn' + slvl,
dim_in,
dim_out,
kernel=3,
pad=1,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0) )
model.Relu(conv_rpn_fpn, conv_rpn_fpn)
# Proposal classification scores
rpn_cls_logits_fpn = model.Conv(conv_rpn_fpn,
'rpn_cls_logits_fpn' + slvl,
dim_in,
num_anchors,
kernel=1,
pad=0,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0) )
# Proposal bbox regression deltas
rpn_bbox_pred_fpn = model.Conv(conv_rpn_fpn,
'rpn_bbox_pred_fpn' + slvl,
dim_in,
4 * num_anchors,
kernel=1,
pad=0,
stride=1,
weight_init=gauss_fill(0.01),
bias_init=const_fill(0.0) )
else:
# 共享 weights and biases
sk_min = str(k_min)
# RPN hidden representation
conv_rpn_fpn = model.ConvShared(
bl_in,
'conv_rpn_fpn' + slvl,
dim_in,
dim_out,
kernel=3,
pad=1,
stride=1,
weight='conv_rpn_fpn' + sk_min + '_w',
bias='conv_rpn_fpn' + sk_min + '_b'
)
model.Relu(conv_rpn_fpn, conv_rpn_fpn)
# Proposal classification scores
rpn_cls_logits_fpn = model.ConvShared(conv_rpn_fpn,
'rpn_cls_logits_fpn' + slvl,
dim_in,
num_anchors,
kernel=1,
pad=0,
stride=1,
weight='rpn_cls_logits_fpn' + sk_min + '_w',
bias='rpn_cls_logits_fpn' + sk_min + '_b' )
# Proposal bbox regression deltas
rpn_bbox_pred_fpn = model.ConvShared(conv_rpn_fpn,
'rpn_bbox_pred_fpn' + slvl,
dim_in,
4 * num_anchors,
kernel=1,
pad=0,
stride=1,
weight='rpn_bbox_pred_fpn' + sk_min + '_w',
bias='rpn_bbox_pred_fpn' + sk_min + '_b' )
if not model.train or cfg.MODEL.FASTER_RCNN:
"""
需要 proposals 的情况:
- 1. inference (== not model.train) 时,RPN Only 和 Faster R-CNN
- 2. training 时, Faster R-CNN.
其它情况(training for RPN only),不需要 proposals.
"""
lvl_anchors = generate_anchors(
stride=2.**lvl,
sizes=(cfg.FPN.RPN_ANCHOR_START_SIZE * 2.**(lvl - k_min), ),
aspect_ratios=cfg.FPN.RPN_ASPECT_RATIOS )
rpn_cls_probs_fpn = model.net.Sigmoid(rpn_cls_logits_fpn, 'rpn_cls_probs_fpn' + slvl)
model.GenerateProposals([rpn_cls_probs_fpn, rpn_bbox_pred_fpn, 'im_info'],
['rpn_rois_fpn' + slvl, 'rpn_roi_probs_fpn' + slvl],
anchors=lvl_anchors,
spatial_scale=sc )
def add_fpn_rpn_losses(model):
"""
在 FPN 添加 RPN losses.
"""
loss_gradients = {}
for lvl in range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1):
slvl = str(lvl)
# 空间上限定 full-sized RPN label arrays, 以匹配 feature map 的大小shape.
model.net.SpatialNarrowAs(['rpn_labels_int32_wide_fpn' + slvl, 'rpn_cls_logits_fpn' + slvl],
'rpn_labels_int32_fpn' + slvl )
for key in ('targets', 'inside_weights', 'outside_weights'):
model.net.SpatialNarrowAs(['rpn_bbox_' + key + '_wide_fpn' + slvl,
'rpn_bbox_pred_fpn' + slvl ],
'rpn_bbox_' + key + '_fpn' + slvl )
loss_rpn_cls_fpn = model.net.SigmoidCrossEntropyLoss(
['rpn_cls_logits_fpn' + slvl, 'rpn_labels_int32_fpn' + slvl],
'loss_rpn_cls_fpn' + slvl,
normalize=0,
scale=(1. / cfg.NUM_GPUS / cfg.TRAIN.RPN_BATCH_SIZE_PER_IM / cfg.TRAIN.IMS_PER_BATCH )
)
"""
Normalization by (1) RPN_BATCH_SIZE_PER_IM and (2) IMS_PER_BATCH is handled by (1) setting bbox outside weights and (2) SmoothL1Loss normalizes by IMS_PER_BATCH.
"""
loss_rpn_bbox_fpn = model.net.SmoothL1Loss(
['rpn_bbox_pred_fpn' + slvl,
'rpn_bbox_targets_fpn' + slvl,
'rpn_bbox_inside_weights_fpn' + slvl,
'rpn_bbox_outside_weights_fpn' + slvl ],
'loss_rpn_bbox_fpn' + slvl,
beta=1. / 9.,
scale=1. / cfg.NUM_GPUS )
loss_gradients.update(blob_utils.
get_loss_gradients(model, [loss_rpn_cls_fpn, loss_rpn_bbox_fpn]) )
model.AddLosses(['loss_rpn_cls_fpn' + slvl, 'loss_rpn_bbox_fpn' + slvl])
return loss_gradients
# ---------------------------------------------------------------------------- #
# multilevel FPN RoIs 相关的辅助函数Helper functions
# ---------------------------------------------------------------------------- #
def map_rois_to_fpn_levels(rois, k_min, k_max):
"""
判断在一系列 RoIs 集合中每个 RoI 应该映射的 FPN level,
基于 FPN 论文中的启发式.
"""
# 计算 level ids
s = np.sqrt(box_utils.boxes_area(rois))
s0 = cfg.FPN.ROI_CANONICAL_SCALE # default: 224
lvl0 = cfg.FPN.ROI_CANONICAL_LEVEL # default: 4
# Eqn.(1) in FPN paper
target_lvls = np.floor(lvl0 + np.log2(s / s0 + 1e-6))
target_lvls = np.clip(target_lvls, k_min, k_max)
return target_lvls
def add_multilevel_roi_blobs(blobs, blob_prefix, rois, target_lvls, lvl_min, lvl_max):
"""
将 multiple FPN levels 的 RoI blobs 添加到 blobs dict.
blobs: blob name 到 numpy ndarray 映射的 dict.
blob_prefix: FPN blobs 使用的 name 前缀prefix.
rois: rois源,2D numpy array,shape (N, 5)
每一行是一个 roi,各列分别编码: (batch_idx, x1, y1, x2, y2)
target_lvls: numpy array,shape (N, ),表示 rois 中每个 roi 应该被分配的 FPN level.
lvl_min: the finest (highest resolution) FPN level (e.g., 2)
lvl_max: the coarest (lowest resolution) FPN level (e.g., 6)
"""
rois_idx_order = np.empty((0, ))
rois_stacked = np.zeros((0, 5), dtype=np.float32) # for assert
for lvl in range(lvl_min, lvl_max + 1):
idx_lvl = np.where(target_lvls == lvl)[0]
blobs[blob_prefix + '_fpn' + str(lvl)] = rois[idx_lvl, :]
rois_idx_order = np.concatenate((rois_idx_order, idx_lvl))
rois_stacked = np.vstack([rois_stacked, blobs[blob_prefix + '_fpn' + str(lvl)]] )
rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False)
blobs[blob_prefix + '_idx_restore_int32'] = rois_idx_restore
# 安全性检测,判断恢复restore的顺序是否正确.
assert (rois_stacked[rois_idx_restore] == rois).all()
# ---------------------------------------------------------------------------- #
# FPN level info for stages 5, 4, 3, 2 for select models (more can be added)
# ---------------------------------------------------------------------------- #
FpnLevelInfo = collections.namedtuple('FpnLevelInfo', ['blobs', 'dims', 'spatial_scales'] )
def fpn_level_info_ResNet50_conv5():
return FpnLevelInfo(blobs=('res5_2_sum', 'res4_5_sum', 'res3_3_sum', 'res2_2_sum'),
dims=(2048, 1024, 512, 256),
spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )
def fpn_level_info_ResNet101_conv5():
return FpnLevelInfo(blobs=('res5_2_sum', 'res4_22_sum', 'res3_3_sum', 'res2_2_sum'),
dims=(2048, 1024, 512, 256),
spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )
def fpn_level_info_ResNet152_conv5():
return FpnLevelInfo(blobs=('res5_2_sum', 'res4_35_sum', 'res3_7_sum', 'res2_2_sum'),
dims=(2048, 1024, 512, 256),
spatial_scales=(1. / 32., 1. / 16., 1. / 8., 1. / 4.) )
"""
优化 op 图构建.
Optimization operator graph construction.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import logging
from caffe2.python import muji
from core.config import cfg
import utils.c2 as c2_utils
logger = logging.getLogger(__name__)
def build_data_parallel_model(model, single_gpu_build_func):
"""
给定单 GPU 模型构建函数,创建数据并行模型.
"""
if model.train:
# 在每个 GPU 上构建 forward graph.
all_loss_gradients = _build_forward_graph(model, single_gpu_build_func)
# 在所有 GPUs 上添加 backward pass
model.AddGradientOperators(all_loss_gradients)
if cfg.NUM_GPUS > 1:
# 构建 graph,在 gradients 上进行 Allreduce.
_add_allreduce_graph(model)
for gpu_id in range(cfg.NUM_GPUS):
# allreduce 后,所有 GPUs 并行地分别 SGD 更新参数.
_add_parameter_update_ops(model, gpu_id)
else:
# 测试阶段Test-time,网络 ops 在单 GPU 上进行.
# 测试阶段Test-time,并行是通过多线程multiprocessing 实现的.
with c2_utils.NamedCudaScope(0):
single_gpu_build_func(model)
def _build_forward_graph(model, single_gpu_build_func):
"""
在每个 GPU 上构建 forward graph.
"""
all_loss_gradients = {} # 包括所有 GPUs 上的 loss gradients
# 以正确的 name 和 device scoping 在每个 GPU 上构建模型
for gpu_id in range(cfg.NUM_GPUS):
with c2_utils.NamedCudaScope(gpu_id):
all_loss_gradients.update(single_gpu_build_func(model))
return all_loss_gradients
def _add_allreduce_graph(model):
"""
构建 graph,对 gradients 进行 Allreduce.
"""
# 如果训练时GPU 数大于1, 需要 all-reduce the per-GPU gradients.
all_params = model.TrainableParams()
assert len(all_params) % cfg.NUM_GPUS == 0
# 模型参数在每个 GPU 上进行复制,获取不同参数 blobs 数.
# (i.e., 在每个 GPU 上参数 blobs 数)
params_per_gpu = int(len(all_params) / cfg.NUM_GPUS)
with c2_utils.CudaScope(0):
# 对不同的参数 blobs 进行迭代
for i in range(params_per_gpu):
# 对于该参数 blob,所有 GPUs 上的 Gradients from all GPUs for this parameter blob
gradients = [model.param_to_grad[p] for p in all_params[i::params_per_gpu]]
if len(gradients) > 0:
if cfg.USE_NCCL:
model.net.NCCLAllreduce(gradients, gradients)
else:
muji.Allreduce(model.net, gradients, reduced_affix='')
def _add_parameter_update_ops(model, gpu_id):
"""
构建优化更新 op 图(optimizer update op graph)
"""
with c2_utils.NamedCudaScope(gpu_id):
# 在训练开始时,lr 设为 0,是作为 a dummy value
lr = model.param_init_net.ConstantFill([], 'lr', shape=[1], value=0.0)
one = model.param_init_net.ConstantFill([], 'one', shape=[1], value=1.0)
wd = model.param_init_net.ConstantFill([], 'wd', shape=[1], value=cfg.SOLVER.WEIGHT_DECAY)
for param in model.TrainableParams(gpu_id=gpu_id):
logger.info('param ' + str(param) + ' will be updated')
param_grad = model.param_to_grad[param]
# 初始化 momentum 向量
param_momentum = model.param_init_net.ConstantFill([param], param + '_momentum', value=0.0)
if param in model.biases:
# biases 特殊处理(主要是因为历史因素):
# (1) Do not apply weight decay
# (2) Use a 2x higher learning rate
model.Scale(param_grad, param_grad, scale=2.0)
elif cfg.SOLVER.WEIGHT_DECAY > 0:
# 对 non-bias weights 应用 weight decay
model.WeightedSum([param_grad, one, param, wd], param_grad)
# 更新 param_grad 和 param_momentum in place
model.net.MomentumSGDUpdate([param_grad, param_momentum, lr, param],
[param_grad, param_momentum, param],
momentum=cfg.SOLVER.MOMENTUM )
import numpy as np
# Verify that we compute the same anchors as Shaoqing's matlab implementation:
#
# >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
# >> anchors
#
# anchors =
#
# -83 -39 100 56
# -175 -87 192 104
# -359 -183 376 200
# -55 -55 72 72
# -119 -119 136 136
# -247 -247 264 264
# -35 -79 52 96
# -79 -167 96 184
# -167 -343 184 360
# array([[ -83., -39., 100., 56.],
# [-175., -87., 192., 104.],
# [-359., -183., 376., 200.],
# [ -55., -55., 72., 72.],
# [-119., -119., 136., 136.],
# [-247., -247., 264., 264.],
# [ -35., -79., 52., 96.],
# [ -79., -167., 96., 184.],
# [-167., -343., 184., 360.]])
def generate_anchors(stride=16, sizes=(32, 64, 128, 256, 512), aspect_ratios=(0.5, 1, 2)):
"""
生成 anchor boxes 矩阵,其格式为 (x1, y1, x2, y2).
Anchors 是以 stride / 2 的中心,逼近指定大小的平方根面积(sqrt areas),长宽比
Anchors are centered on stride / 2, have (approximate) sqrt areas of the specified
sizes, and aspect ratios as given.
"""
return _generate_anchors(stride,
np.array(sizes, dtype=np.float) / stride,
np.array(aspect_ratios, dtype=np.float) )
def _generate_anchors(base_size, scales, aspect_ratios):
"""
通过枚举关于参考窗口window (0, 0, base_size - 1, base_size - 1) 的长宽比(aspect ratios) X scales,
来生成 anchore 窗口(参考窗口 reference windows).
"""
anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
anchors = _ratio_enum(anchor, aspect_ratios)
anchors = np.vstack([_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])])
return anchors
def _whctrs(anchor):
"""
返回 anchor 窗口的 width, height, x center, y center.
"""
w = anchor[2] - anchor[0] + 1
h = anchor[3] - anchor[1] + 1
x_ctr = anchor[0] + 0.5 * (w - 1)
y_ctr = anchor[1] + 0.5 * (h - 1)
return w, h, x_ctr, y_ctr
def _mkanchors(ws, hs, x_ctr, y_ctr):
"""
给定 center(x_ctr, y_ctr) 及 widths (ws),heights (hs) 向量,输出 anchors窗口window 集合.
"""
ws = ws[:, np.newaxis]
hs = hs[:, np.newaxis]
anchors = np.hstack( (x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) ) )
return anchors
def _ratio_enum(anchor, ratios):
"""
对于每个关于一个 anchor 的长宽比aspect ratio,枚举 anchors 集合.
"""
w, h, x_ctr, y_ctr = _whctrs(anchor)
size = w * h
size_ratios = size / ratios
ws = np.round(np.sqrt(size_ratios))
hs = np.round(ws * ratios)
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors
def _scale_enum(anchor, scales):
"""
对于每个关于一个 anchor 的尺度scale,枚举 anchors 集合.
Enumerate a set of anchors for each scale wrt an anchor."""
w, h, x_ctr, y_ctr = _whctrs(anchor)
ws = w * scales
hs = h * scales
anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
return anchors