# File name : dataset.py
# Author : YunYang1994
学习了yolov3 在 tensorflow2.0 的实现,加深理解
class Dataset(object):
"""implement Dataset here"""
def __init__(self, dataset_type):
self.annot_path = cfg.TRAIN.ANNOT_PATH if dataset_type == 'train' else cfg.TEST.ANNOT_PATH
#TRAIN=./data/dataset/yymnist_train.txt" ,cfg.TRAIN.ANNOT_PATH的地址
#TEST =./data/dataset/yymnist_test.txt"
#yyminst为简化的数字目标检测数据集
self.input_sizes = cfg.TRAIN.INPUT_SIZE if dataset_type == 'train' else cfg.TEST.INPUT_SIZE
#TRAIN self.input_sizes = 416
#TEST self.input_sizes = 544
self.batch_size = cfg.TRAIN.BATCH_SIZE if dataset_type == 'train' else cfg.TEST.BATCH_SIZE
#TRAIN self.batch_size = 4
#TEST self.batch_size = 2
self.data_aug = cfg.TRAIN.DATA_AUG if dataset_type == 'train' else cfg.TEST.DATA_AUG
#TRAIN=True 参数可训练
#TEST =False 参数冻结不可训练
self.train_input_sizes = cfg.TRAIN.INPUT_SIZE # 416
self.strides = np.array(cfg.YOLO.STRIDES) # [8, 16, 32]
self.classes = utils.read_class_names(cfg.YOLO.CLASSES)
#self.classes = 1 2 3 4 5 6 7 8 9 0 十个数字
self.num_classes = len(self.classes)#[10]
self.anchors = np.array(utils.get_anchors(cfg.YOLO.ANCHORS))#[3,3,2]
#anchors =1.25,1.625, 2.0,3.75, 4.125,2.875, 1.875,3.8125, 3.875,2.8125, 3.6875,7.4375, 3.625,2.8125, 4.875,6.1875, 11.65625,10.1875
#anchors通过聚类求出,可以加快训练速度,但对最终训练精度无影响
#anchors分为三个步长,每个步长包含三个尺寸的边框,实现不同精度的检验
self.anchor_per_scale = cfg.YOLO.ANCHOR_PER_SCALE # 3
self.max_bbox_per_scale = 150 #每个尺寸下最多有150个待选框
self.annotations = self.load_annotations(dataset_type) #获取注释文件信息
self.num_samples = len(self.annotations) #样本数量,假设 数量=1000
self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
#np.ceil向上取整,一个batch取多少样本 int(1000/4)=250
'''#np.ceil向上取整
>>> a = np.array([-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0])
>>> np.ceil(a)
array([-1., -1., -0., 1., 2., 2., 2.])
'''
self.batch_count = 0
class Dataset() 属于旧式类,class Dataset(object) 新式类继承了object对象,包含很多可操作对象,这些对象属于类中的高级特性。
新式类的特点: 1. 所有类的类型都是type 2. 所有类调用的结果都是构造,返回这个类的实例 3. 所有类都是object的子类 4. 新式类不仅可以用旧类调用父类的方法,也可以用super方法
def load_annotations(self, dataset_type):
with open(self.annot_path, 'r') as f:
txt = f.readlines() #读取行内容
annotations = [line.strip() for line in txt if len(line.strip().split()[1:]) != 0]
np.random.shuffle(annotations)#打乱顺序
line.strip()用法举例如下: for line in file: line.strip().split() 当参数为空时,默认删除空白符(包括'\n', '\r', '\t', ' ') split('.')表示用'.'分割文本 len(line.strip().split()[1:]) != 0 表示非空白行
def __iter__(self):
return self
def __next__(self):
with tf.device('/cpu:0'):
self.train_input_size = self.train_input_sizes #416
self.train_output_sizes = self.train_input_size // self.strides
# 416/[8,16,32]=[52,26,13]
batch_image = np.zeros((self.batch_size, self.train_input_size, self.train_input_size, 3), dtype=np.float32)#[4,416,416,3] 填充0
#small_bounding_box = sbbox
batch_label_sbbox = np.zeros((self.batch_size, self.train_output_sizes[0], self.train_output_sizes[0],
self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)#[4,52,52,3,85] 填充0
#middle_bounding_box = mbbox
batch_label_mbbox = np.zeros((self.batch_size, self.train_output_sizes[1], self.train_output_sizes[1],
self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)#[4,26,26,3,85] 填充0
#large_bounding_box = lbbox
batch_label_lbbox = np.zeros((self.batch_size, self.train_output_sizes[2], self.train_output_sizes[2],
self.anchor_per_scale, 5 + self.num_classes), dtype=np.float32)#[4,13,13,3,85] 填充0
batch_sbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)# [4,150,4] 填充0
batch_mbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)# [4,150,4] 填充0
batch_lbboxes = np.zeros((self.batch_size, self.max_bbox_per_scale, 4), dtype=np.float32)# [4,150,4] 填充0
num = 0
#############################################################
'''加深一下印象
self.batch_count=0
self.batch_size = #TRAIN=4 #TEST =2
self.num_samples = len(self.annotations) #样本数量,假设=1000
self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))#np.ceil向上取整,一个batch取多少样本 int(800/4)=200
num = 0
'''加深一下印象
##############################################################
if self.batch_count < self.num_batchs:#self.num_batchs = int(np.ceil(self.num_samples / self.batch_size))
#np.ceil向上取整,一个batch取多少样本 int(800/4)=200
while num < self.batch_size: #self.batch_size: TRAIN=4 TEST =2
index = self.batch_count * self.batch_size + num
if index >= self.num_samples: index -= self.num_samples #num_samples 样本数量,假设=1000
annotation = self.annotations[index] #样本信息
#annotation[n]的格式如下,图片地址+box1的位置(x1,y1,x2,y2)和类别+……boxn的位置和类别
#/home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000005.jpg 194,13,216,35,9 126,34,168,76,2 80,183,108,211,6 341,332,397,388,6
image, bboxes = self.parse_annotation(annotation)#parse 英 [pɑːz] 美 [pɑːrs] :分析
label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes = self.preprocess_true_boxes(bboxes)#预处理
#small 52, middle 26, large 13,
batch_image[num, :, :, :] = image#将样本image填入 batch_image [4,416,416,3]
batch_label_sbbox[num, :, :, :, :] = label_sbbox#[4,52,52,3,85] feature map
batch_label_mbbox[num, :, :, :, :] = label_mbbox#[4,26,26,3,85]
batch_label_lbbox[num, :, :, :, :] = label_lbbox#[4,13,13,3,85]
batch_sbboxes[num, :, :] = sbboxes# [4,150,4]
batch_mbboxes[num, :, :] = mbboxes# [4,150,4]
batch_lbboxes[num, :, :] = lbboxes# [4,150,4] 4=batch_size 150=最多的bounding_box的数量 4=x1,y1,x2,y2边框的位置
num += 1
self.batch_count += 1
batch_smaller_target = batch_label_sbbox, batch_sbboxes#([4,52,52,3,85],[4,150,4])
batch_medium_target = batch_label_mbbox, batch_mbboxes#([4,26,26,3,85],[4,150,4])
batch_larger_target = batch_label_lbbox, batch_lbboxes#([4,13,13,3,85],[4,150,4])
return batch_image, (batch_smaller_target, batch_medium_target, batch_larger_target)
else:
self.batch_count = 0
np.random.shuffle(self.annotations)#随机打乱顺序
raise StopIteration
def __iter__(self) 和 def __next__(self)搭配使用 , 用于数据迭代 , 连续读取4张图片及边框,然后return。
def random_horizontal_flip(self, image, bboxes):#图片翻转
if random.random() < 0.5:
_, w, _ = image.shape
image = image[:, ::-1, :]#水平翻转
bboxes[:, [0,2]] = w - bboxes[:, [2,0]] # [0,2]=[左上角x1,右下角x2] w=图片宽度
# [num,5] 例:194,13,216,35,9 126,34,168,76,2 80,183,108,211,6 341,332,397,388,6
return image, bboxes
def random_crop(self, image, bboxes):#随机裁剪
if random.random() < 0.5:
h, w, _ = image.shape
max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
max_l_trans = max_bbox[0]
max_u_trans = max_bbox[1]
max_r_trans = w - max_bbox[2]
max_d_trans = h - max_bbox[3]
crop_xmin = max(0, int(max_bbox[0] - random.uniform(0, max_l_trans)))#random.uniform(x, y) 方法将随机生成一个实数,它在 [x,y] 范围内。
crop_ymin = max(0, int(max_bbox[1] - random.uniform(0, max_u_trans)))
crop_xmax = max(w, int(max_bbox[2] + random.uniform(0, max_r_trans)))
crop_ymax = max(h, int(max_bbox[3] + random.uniform(0, max_d_trans)))
image = image[crop_ymin : crop_ymax, crop_xmin : crop_xmax]
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] - crop_xmin
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] - crop_ymin
return image, bboxes
def random_translate(self, image, bboxes):
if random.random() < 0.5:
h, w, _ = image.shape
max_bbox = np.concatenate([np.min(bboxes[:, 0:2], axis=0), np.max(bboxes[:, 2:4], axis=0)], axis=-1)
#数组拼接
max_l_trans = max_bbox[0]
max_u_trans = max_bbox[1]
max_r_trans = w - max_bbox[2]
max_d_trans = h - max_bbox[3]
tx = random.uniform(-(max_l_trans - 1), (max_r_trans - 1))
ty = random.uniform(-(max_u_trans - 1), (max_d_trans - 1))
M = np.array([[1, 0, tx], [0, 1, ty]])
image = cv2.warpAffine(image, M, (w, h)) #opencv中的仿射变换
bboxes[:, [0, 2]] = bboxes[:, [0, 2]] + tx
bboxes[:, [1, 3]] = bboxes[:, [1, 3]] + ty
通过图片翻转、裁剪和仿射变换增加训练样本多样性
def parse_annotation(self, annotation):#注释的语法分析
line = annotation.split()
#分隔符为空格,annotation格式如下
#/home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000005.jpg 194,13,216,35,9 126,34,168,76,2 80,183,108,211,6 341,332,397,388,6
image_path = line[0]
#/home/yang/test/TensorFlow2.0-Examples/4-Object_Detection/YOLOV3/data/dataset/test/000005.jpg
if not os.path.exists(image_path):
raise KeyError("%s does not exist ... " %image_path)
image = cv2.imread(image_path)
bboxes = np.array([list(map(int, box.split(','))) for box in line [1:]])#194,13,216,35,9 126,34,168,76,2 80,183,108,211,6 341,332,397,388,6
if self.data_aug:
#TRAIN=True
#TEST =False
image, bboxes = self.random_horizontal_flip(np.copy(image), np.copy(bboxes))#水平翻转
image, bboxes = self.random_crop(np.copy(image), np.copy(bboxes))#随机裁剪
image, bboxes = self.random_translate(np.copy(image), np.copy(bboxes))#仿射变换
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) #cv2读取的图片,默认为BGR,需要转换成RGB
image, bboxes = utils.image_preporcess(np.copy(image), [self.train_input_size, self.train_input_size], np.copy(bboxes))
#utils.image_preporcess(np.copy(image)将图片处理成[416,416,3]大小,简单的拉伸会使图片失真降低模型精度
return image, bboxes
parse_annotation(self, annotation)提取出annotation中的图片和先验框
def bbox_iou(self, boxes1, boxes2):
boxes1 = np.array(boxes1)
boxes2 = np.array(boxes2) #shape= [3,4]
boxes1_area = boxes1[..., 2] * boxes1[..., 3]#boxes1的面积
boxes2_area = boxes2[..., 2] * boxes2[..., 3]
boxes1 = np.concatenate([boxes1[..., :2] - boxes1[..., 2:] * 0.5,
boxes1[..., :2] + boxes1[..., 2:] * 0.5], axis=-1)# 左上角和右下角的坐标
boxes2 = np.concatenate([boxes2[..., :2] - boxes2[..., 2:] * 0.5,
boxes2[..., :2] + boxes2[..., 2:] * 0.5], axis=-1)
left_up = np.maximum(boxes1[..., :2], boxes2[..., :2])
right_down = np.minimum(boxes1[..., 2:], boxes2[..., 2:])#交集
inter_section = np.maximum(right_down - left_up, 0.0)# 交集的长宽
inter_area = inter_section[..., 0] * inter_section[..., 1]#交集的面积
union_area = boxes1_area + boxes2_area - inter_area#并集的面积
return inter_area / union_area
计算boxes1 和 boxes2的交并比
def preprocess_true_boxes(self, bboxes):
#selt.train_output_size=[52,26,13]
#self.anchor_per_scale=3
#self.num_classes=80
label = [np.zeros((self.train_output_sizes[i], self.train_output_sizes[i], self.anchor_per_scale,5 + self.num_classes)) for i in range(3)]
#shape=[52,52,3,85],[26,26,3,85],[13,13,3,85]
bboxes_xywh = [np.zeros((self.max_bbox_per_scale, 4)) for _ in range(3)]#[150,4],[150,4],[150,4]
bbox_count = np.zeros((3,))#[0,0,0]
for bbox in bboxes:
bbox_coor = bbox[:4] #coordinate 坐标
bbox_class_ind = bbox[4]
onehot = np.zeros(self.num_classes, dtype=np.float)#[0,0,0,0,0,0,0,0,0,0]
onehot[bbox_class_ind] = 1.0
uniform_distribution = np.full(self.num_classes, 1.0 / self.num_classes)
deta = 0.01
smooth_onehot = onehot * (1 - deta) + deta * uniform_distribution
bbox_xywh = np.concatenate([(bbox_coor[2:] + bbox_coor[:2]) * 0.5, bbox_coor[2:] - bbox_coor[:2]], axis=-1)
bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
iou = []
exist_positive = False
for i in range(3):
anchors_xywh = np.zeros((self.anchor_per_scale, 4)) #[3,4]
anchors_xywh[:, 0:2] = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32) + 0.5#坐标
#np.floor 用法举例
#n = np.array([-1.7, -2.5, -0.2, 0.6, 1.2, 2.7, 11])
#floor = np.floor(n)
#print(floor)
#输出 [ -2. -3. -1. 0. 1. 2. 11.]
anchors_xywh[:, 2:4] = self.anchors[i]#[3,3,2] bounding_box's lenth ans width
iou_scale = self.bbox_iou(bbox_xywh_scaled[i][np.newaxis, :], anchors_xywh)
iou.append(iou_scale)
iou_mask = iou_scale > 0.3
if np.any(iou_mask):
xind, yind = np.floor(bbox_xywh_scaled[i, 0:2]).astype(np.int32)
label[i][yind, xind, iou_mask, :] = 0
label[i][yind, xind, iou_mask, 0:4] = bbox_xywh
label[i][yind, xind, iou_mask, 4:5] = 1.0
label[i][yind, xind, iou_mask, 5:] = smooth_onehot
bbox_ind = int(bbox_count[i] % self.max_bbox_per_scale)
bboxes_xywh[i][bbox_ind, :4] = bbox_xywh
bbox_count[i] += 1
exist_positive = True
if not exist_positive:
best_anchor_ind = np.argmax(np.array(iou).reshape(-1), axis=-1)#取出元素最大值所对应的索引
best_detect = int(best_anchor_ind / self.anchor_per_scale)
best_anchor = int(best_anchor_ind % self.anchor_per_scale)
xind, yind = np.floor(bbox_xywh_scaled[best_detect, 0:2]).astype(np.int32)
label[best_detect][yind, xind, best_anchor, :] = 0
label[best_detect][yind, xind, best_anchor, 0:4] = bbox_xywh
label[best_detect][yind, xind, best_anchor, 4:5] = 1.0
label[best_detect][yind, xind, best_anchor, 5:] = smooth_onehot
bbox_ind = int(bbox_count[best_detect] % self.max_bbox_per_scale)
bboxes_xywh[best_detect][bbox_ind, :4] = bbox_xywh
bbox_count[best_detect] += 1
label_sbbox, label_mbbox, label_lbbox = label
sbboxes, mbboxes, lbboxes = bboxes_xywh
return label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes
def __len__(self):
return self.num_batchs
举个栗子: bbox_coor: [126 34 168 76] = [x1 y1 x2 y2] 坐标 bbox_class_ind: 2 bbox_xywh: [147. 55. 42. 42.] 中心坐标+长宽
147=(126+168)/2 , 42=168-126 ,42=76-34
bbox_xywh_scaled = 1.0 * bbox_xywh[np.newaxis, :] / self.strides[:, np.newaxis]
self.strides=[8,16,32]
bbox_xywh_scaled: [[18.375 6.875 5.25 5.25 ] [ 9.1875 3.4375 2.625 2.625 ] [ 4.59375 1.71875 1.3125 1.3125 ]]
preprocess_true_boxes(self, bboxes)作用是将先验框转换到和feature map 同 shape 的label上,便于实现和feature map的端到端训练。
def __len__(self):
return self.num_batchs
返回batch的数量
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。