# File name : dataset.py
# Author : YunYang1994
学习了yolov3 在 tensorflow2.0 的实现,加深理解
import numpy as np
import tensorflow as tf
import core.utils as utils #工具代码
import core.common as common #网络结构
import core.backbone as backbone #网络结构
from core.config import cfg #配置文件
def YOLOv3(input_layer):
#input_layer.shape=[416,416,3] 标准图片
route_1, route_2, conv = backbone.darknet53(input_layer)
#route_1,route_2和conv的shape=[52 ,52 ,256],[26 ,26 ,512],[13 ,13 ,1024]
conv = common.convolutional(conv, (1, 1, 1024, 512))
conv = common.convolutional(conv, (3, 3, 512, 1024))
conv = common.convolutional(conv, (1, 1, 1024, 512))
conv = common.convolutional(conv, (3, 3, 512, 1024))
conv = common.convolutional(conv, (1, 1, 1024, 512))
conv_lobj_branch = common.convolutional(conv, (3, 3, 512, 1024))
# conv_lobj_branch.shape =[13 ,13 ,1024]
conv_lbbox = common.convolutional(conv_lobj_branch, (1, 1, 1024, 3*(NUM_CLASS + 5)), activate=False, bn=False)
#conv_lbbox.shape=[13,13,45]
conv = common.convolutional(conv, (1, 1, 512, 256))
conv = common.upsample(conv) #[26,26,256]
conv = tf.concat([conv, route_2], axis=-1)
#在最后一个维度上,合并conv,route_2,conv.shape=[26,26,768]
conv = common.convolutional(conv, (1, 1, 768, 256))
conv = common.convolutional(conv, (3, 3, 256, 512))
conv = common.convolutional(conv, (1, 1, 512, 256))
conv = common.convolutional(conv, (3, 3, 256, 512))
conv = common.convolutional(conv, (1, 1, 512, 256))
conv_mobj_branch = common.convolutional(conv, (3, 3, 256, 512))
conv_mbbox = common.convolutional(conv_mobj_branch, (1, 1, 512, 3*(NUM_CLASS + 5)), activate=False, bn=False)
conv = common.convolutional(conv, (1, 1, 256, 128))
conv = common.upsample(conv)
conv = tf.concat([conv, route_1], axis=-1)
conv = common.convolutional(conv, (1, 1, 384, 128))
conv = common.convolutional(conv, (3, 3, 128, 256))
conv = common.convolutional(conv, (1, 1, 256, 128))
conv = common.convolutional(conv, (3, 3, 128, 256))
conv = common.convolutional(conv, (1, 1, 256, 128))
conv_sobj_branch = common.convolutional(conv, (3, 3, 128, 256))
conv_sbbox = common.convolutional(conv_sobj_branch, (1, 1, 256, 3*(NUM_CLASS +5)), activate=False, bn=False)
return [conv_sbbox, conv_mbbox, conv_lbbox]
输入经过缩放的标准化图片,输出feature_map
conv_sbbox, conv_mbbox, conv_lbbox=[13,13,45],[26,26,45],[52,52,45] ,其中NUM_CLASS=10
def decode(conv_output, i=0):#conv_output=([none,52,52,45],[none,26,26,45],[none,13,13,45]) i=0 1 2
"""
return tensor of shape [batch_size, output_size, output_size, anchor_per_scale, 5 + num_classes]
contains (x, y, w, h, score, probability)
以下注释默认 i=2 , conv_output=[none,13,13,45]
"""
conv_shape = tf.shape(conv_output)
batch_size = conv_shape[0]
output_size = conv_shape[1]
conv_output = tf.reshape(conv_output, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
#[none,13,13,3,15]13为特征图尺寸,3为每个单元格anchor的数量,5为单元格的x,y,w,h和置信度,10为数字的种类
conv_raw_dxdy = conv_output[:, :, :, :, 0:2]#位置
conv_raw_dwdh = conv_output[:, :, :, :, 2:4]#长宽
conv_raw_conf = conv_output[:, :, :, :, 4:5]#置信度
conv_raw_prob = conv_output[:, :, :, :, 5: ]#种类
y = tf.tile(tf.range(output_size, dtype=tf.int32)[:, tf.newaxis], [1, output_size])
x = tf.tile(tf.range(output_size, dtype=tf.int32)[tf.newaxis, :], [output_size, 1])
#tf.newaxis 增加维度, tf.tile扩充维度
xy_grid = tf.concat([x[:, :, tf.newaxis], y[:, :, tf.newaxis]], axis=-1)
xy_grid = tf.tile(xy_grid[tf.newaxis, :, :, tf.newaxis, :], [batch_size, 1, 1, 3, 1])
xy_grid = tf.cast(xy_grid, tf.float32)
#特征图的x,y,w,h是以单元格为基准,xy_grid画出相应维度的网格,将feature_map中的x,y,w,h换算成实际图片上的尺寸
pred_xy = (tf.sigmoid(conv_raw_dxdy) + xy_grid) * STRIDES[i] #加上网格左上角的坐标值,乘以步长
pred_wh = (tf.exp(conv_raw_dwdh) * ANCHORS[i]) * STRIDES[i]
pred_xywh = tf.concat([pred_xy, pred_wh], axis=-1)
pred_conf = tf.sigmoid(conv_raw_conf)
pred_prob = tf.sigmoid(conv_raw_prob)
return tf.concat([pred_xywh, pred_conf, pred_prob], axis=-1)
最终输出转换后的feature_map,画网格的部分可以单独拉出来做下简单的修改,多跑几遍加深理解。
def bbox_iou(boxes1, boxes2):
#求交并比 boxes=[x,y,w,h,label]
#x,y为box中心坐标,w,h为尺寸,label为数字类别(0,1,2,3,4,5,6,7,8,9)中的一个
boxes1_area = boxes1[..., 2] * boxes1[..., 3] #面积
boxes2_area = boxes2[..., 2] * boxes2[..., 3]
boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)#求左上角坐标和右下角坐标
left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = tf.maximum(right_down - left_up, 0.0)#交集的长宽
inter_area = inter_section[..., 0] * inter_section[..., 1]#交集的面积
union_area = boxes1_area + boxes2_area - inter_area#并集的面积
return 1.0 * inter_area / union_area #交并比
def bbox_giou(boxes1, boxes2):
# boxes=[x,y,w,h,label]
boxes1 = tf.concat([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)
boxes2 = tf.concat([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
boxes1 = tf.concat([tf.minimum(boxes1[..., :2], boxes1[..., 2:]),
tf.maximum(boxes1[..., :2], boxes1[..., 2:])], axis=-1)
boxes2 = tf.concat([tf.minimum(boxes2[..., :2], boxes2[..., 2:]),
tf.maximum(boxes2[..., :2], boxes2[..., 2:])], axis=-1)
boxes1_area = (boxes1[..., 2] - boxes1[..., 0]) * (boxes1[..., 3] - boxes1[..., 1])
boxes2_area = (boxes2[..., 2] - boxes2[..., 0]) * (boxes2[..., 3] - boxes2[..., 1])
left_up = tf.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = tf.minimum(boxes1[..., 2:], boxes2[..., 2:])
inter_section = tf.maximum(right_down - left_up, 0.0)
inter_area = inter_section[..., 0] * inter_section[..., 1] #交集面积
union_area = boxes1_area + boxes2_area - inter_area #并集面积
iou = inter_area / union_area #交并比
enclose_left_up = tf.minimum(boxes1[..., :2], boxes2[..., :2])
enclose_right_down = tf.maximum(boxes1[..., 2:], boxes2[..., 2:])
enclose = tf.maximum(enclose_right_down - enclose_left_up, 0.0)
enclose_area = enclose[..., 0] * enclose[..., 1]
#包含box1和box2的最小的方框的面积
giou = iou - 1.0 * (enclose_area - union_area) / enclose_area
#用giou替代iou,会避免两个box无交集,梯度为零无法优化的情况。giou能让模型朝着正确的方向优化
return giou
用giou替代iou,最终结果也会有少许提升。
def compute_loss(pred, conv, label, bboxes, i=0):
#损失函数,以下注释默认i=2,大网格
#pred为转换过的feature_map[4,13,13,45]
#conv为feature_map[4,13,13,45]
#label为batch_larger_target = batch_label_lbbox, batch_lbboxes#([4,13,13,3,45],[4,150,4])
conv_shape = tf.shape(conv)
batch_size = conv_shape[0]
output_size = conv_shape[1]
input_size = STRIDES[i] * output_size #416
conv = tf.reshape(conv, (batch_size, output_size, output_size, 3, 5 + NUM_CLASS))
#[4,13,13,3,45]
conv_raw_conf = conv[:, :, :, :, 4:5]#置信度
conv_raw_prob = conv[:, :, :, :, 5:] #类别
pred_xywh = pred[:, :, :, :, 0:4]#x y w h
pred_conf = pred[:, :, :, :, 4:5]#置信度
label_xywh = label[:, :, :, :, 0:4]#x y w h
respond_bbox = label[:, :, :, :, 4:5]#置信度
label_prob = label[:, :, :, :, 5:] #类别
giou = tf.expand_dims(bbox_giou(pred_xywh, label_xywh), axis=-1)#计算giou的值,维度增加一维
input_size = tf.cast(input_size, tf.float32)
bbox_loss_scale = 2.0 - 1.0 * label_xywh[:, :, :, :, 2:3] * label_xywh[:, :, :, :, 3:4] / (input_size ** 2)
giou_loss = respond_bbox * bbox_loss_scale * (1- giou)
iou = bbox_iou(pred_xywh[:, :, :, :, np.newaxis, :], bboxes[:, np.newaxis, np.newaxis, np.newaxis, :, :])
max_iou = tf.expand_dims(tf.reduce_max(iou, axis=-1), axis=-1)
#tf.reduce_max计算张量的各个维度上的元素的最大值
respond_bgd = (1.0 - respond_bbox) * tf.cast( max_iou < IOU_LOSS_THRESH, tf.float32 )
conf_focal = tf.pow(respond_bbox - pred_conf, 2)
#tf.pow函数能够计算一个值到另一个值的幂
conf_loss = conf_focal * (
respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
+
respond_bgd * tf.nn.sigmoid_cross_entropy_with_logits(labels=respond_bbox, logits=conv_raw_conf)
)
prob_loss = respond_bbox * tf.nn.sigmoid_cross_entropy_with_logits(labels=label_prob, logits=conv_raw_prob)
giou_loss = tf.reduce_mean(tf.reduce_sum(giou_loss, axis=[1,2,3,4]))
conf_loss = tf.reduce_mean(tf.reduce_sum(conf_loss, axis=[1,2,3,4]))
prob_loss = tf.reduce_mean(tf.reduce_sum(prob_loss, axis=[1,2,3,4]))
return giou_loss, conf_loss, prob_loss
YOLOv3采用端到端的训练方式,损失函数的代码相对比较直观,分别为位置损失、置信度损失和类别损失。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。