在调参记录16的基础上,增加了两个残差模块,继续测试其在Cifar10数据集上的效果。
自适应参数化ReLU激活函数的基本原理如下:
Keras程序:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 14 04:17:45 2020
Implemented using TensorFlow 1.0.1 and Keras 2.2.1
Minghang Zhao, Shisheng Zhong, Xuyun Fu, Baoping Tang, Shaojiang Dong, Michael Pecht,
Deep Residual Networks with Adaptively Parametric Rectifier Linear Units for Fault Diagnosis,
IEEE Transactions on Industrial Electronics, 2020, DOI: 10.1109/TIE.2020.2972458
@author: Minghang Zhao
"""
from __future__ import print_function
import keras
import numpy as np
from keras.datasets import cifar10
from keras.layers import Dense, Conv2D, BatchNormalization, Activation, Minimum
from keras.layers import AveragePooling2D, Input, GlobalAveragePooling2D, Concatenate, Reshape
from keras.regularizers import l2
from keras import backend as K
from keras.models import Model
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
K.set_learning_phase(1)
# The data, split between train and test sets
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Noised data
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_test = x_test-np.mean(x_train)
x_train = x_train-np.mean(x_train)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
# Schedule the learning rate, multiply 0.1 every 1500 epoches
def scheduler(epoch):
if epoch % 1500 == 0 and epoch != 0:
lr = K.get_value(model.optimizer.lr)
K.set_value(model.optimizer.lr, lr * 0.1)
print("lr changed to {}".format(lr * 0.1))
return K.get_value(model.optimizer.lr)
# An adaptively parametric rectifier linear unit (APReLU)
def aprelu(inputs):
# get the number of channels
channels = inputs.get_shape().as_list()[-1]
# get a zero feature map
zeros_input = keras.layers.subtract([inputs, inputs])
# get a feature map with only positive features
pos_input = Activation('relu')(inputs)
# get a feature map with only negative features
neg_input = Minimum()([inputs,zeros_input])
# define a network to obtain the scaling coefficients
scales_p = GlobalAveragePooling2D()(pos_input)
scales_n = GlobalAveragePooling2D()(neg_input)
scales = Concatenate()([scales_n, scales_p])
scales = Dense(channels//16, activation='linear', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(scales)
scales = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(scales)
scales = Activation('relu')(scales)
scales = Dense(channels, activation='linear', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(scales)
scales = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(scales)
scales = Activation('sigmoid')(scales)
scales = Reshape((1,1,channels))(scales)
# apply a paramtetric relu
neg_part = keras.layers.multiply([scales, neg_input])
return keras.layers.add([pos_input, neg_part])
# Residual Block
def residual_block(incoming, nb_blocks, out_channels, downsample=False,
downsample_strides=2):
residual = incoming
in_channels = incoming.get_shape().as_list()[-1]
for i in range(nb_blocks):
identity = residual
if not downsample:
downsample_strides = 1
residual = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(residual)
residual = aprelu(residual)
residual = Conv2D(out_channels, 3, strides=(downsample_strides, downsample_strides),
padding='same', kernel_initializer='he_normal',
kernel_regularizer=l2(1e-4))(residual)
residual = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(residual)
residual = aprelu(residual)
residual = Conv2D(out_channels, 3, padding='same', kernel_initializer='he_normal',
kernel_regularizer=l2(1e-4))(residual)
# Downsampling
if downsample_strides > 1:
identity = AveragePooling2D(pool_size=(1,1), strides=(2,2))(identity)
# Zero_padding to match channels
if in_channels != out_channels:
zeros_identity = keras.layers.subtract([identity, identity])
identity = keras.layers.concatenate([identity, zeros_identity])
in_channels = out_channels
residual = keras.layers.add([residual, identity])
return residual
# define and train a model
inputs = Input(shape=(32, 32, 3))
net = Conv2D(16, 3, padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(inputs)
net = residual_block(net, 1, 32, downsample=False)
net = residual_block(net, 1, 32, downsample=True)
net = residual_block(net, 1, 32, downsample=False)
net = residual_block(net, 1, 64, downsample=True)
net = residual_block(net, 1, 64, downsample=False)
net = BatchNormalization(momentum=0.9, gamma_regularizer=l2(1e-4))(net)
net = aprelu(net)
net = GlobalAveragePooling2D()(net)
outputs = Dense(10, activation='softmax', kernel_initializer='he_normal', kernel_regularizer=l2(1e-4))(net)
model = Model(inputs=inputs, outputs=outputs)
sgd = optimizers.SGD(lr=0.1, decay=0., momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# data augmentation
datagen = ImageDataGenerator(
# randomly rotate images in the range (deg 0 to 180)
rotation_range=30,
# Range for random zoom
zoom_range = 0.2,
# shear angle in counter-clockwise direction in degrees
shear_range = 30,
# randomly flip images
horizontal_flip=True,
# randomly shift images horizontally
width_shift_range=0.125,
# randomly shift images vertically
height_shift_range=0.125)
reduce_lr = LearningRateScheduler(scheduler)
# fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(x_train, y_train, batch_size=100),
validation_data=(x_test, y_test), epochs=5000,
verbose=1, callbacks=[reduce_lr], workers=4)
# get results
K.set_learning_phase(0)
DRSN_train_score = model.evaluate(x_train, y_train, batch_size=100, verbose=0)
print('Train loss:', DRSN_train_score[0])
print('Train accuracy:', DRSN_train_score[1])
DRSN_test_score = model.evaluate(x_test, y_test, batch_size=100, verbose=0)
print('Test loss:', DRSN_test_score[0])
print('Test accuracy:', DRSN_test_score[1])
实验结果如下(为了方便观看,删除了部分等号):
Epoch 1158/5000
500/500 [=] - 14s 28ms/step - loss: 0.7799 - acc: 0.8241 - val_loss: 0.7069 - val_acc: 0.8525
Epoch 1159/5000
500/500 [=] - 14s 28ms/step - loss: 0.7801 - acc: 0.8253 - val_loss: 0.7056 - val_acc: 0.8543
Epoch 1160/5000
500/500 [=] - 14s 28ms/step - loss: 0.7777 - acc: 0.8258 - val_loss: 0.7226 - val_acc: 0.8501
Epoch 1161/5000
500/500 [=] - 14s 28ms/step - loss: 0.7806 - acc: 0.8246 - val_loss: 0.7263 - val_acc: 0.8458
Epoch 1162/5000
500/500 [=] - 14s 28ms/step - loss: 0.7793 - acc: 0.8243 - val_loss: 0.7063 - val_acc: 0.8522
Epoch 1163/5000
500/500 [=] - 14s 28ms/step - loss: 0.7783 - acc: 0.8263 - val_loss: 0.7391 - val_acc: 0.8430
Epoch 1164/5000
500/500 [=] - 14s 28ms/step - loss: 0.7753 - acc: 0.8263 - val_loss: 0.7254 - val_acc: 0.8465
Epoch 1165/5000
500/500 [=] - 14s 28ms/step - loss: 0.7838 - acc: 0.8232 - val_loss: 0.7287 - val_acc: 0.8479
Epoch 1166/5000
500/500 [=] - 14s 28ms/step - loss: 0.7827 - acc: 0.8229 - val_loss: 0.7269 - val_acc: 0.8464
Epoch 1167/5000
500/500 [=] - 14s 28ms/step - loss: 0.7782 - acc: 0.8262 - val_loss: 0.7305 - val_acc: 0.8438
Epoch 1168/5000
500/500 [=] - 14s 28ms/step - loss: 0.7778 - acc: 0.8251 - val_loss: 0.7065 - val_acc: 0.8501
Epoch 1169/5000
500/500 [=] - 14s 28ms/step - loss: 0.7768 - acc: 0.8249 - val_loss: 0.7039 - val_acc: 0.8540
Epoch 1170/5000
500/500 [=] - 14s 28ms/step - loss: 0.7797 - acc: 0.8261 - val_loss: 0.7052 - val_acc: 0.8547
Epoch 1171/5000
500/500 [=] - 14s 29ms/step - loss: 0.7799 - acc: 0.8245 - val_loss: 0.6993 - val_acc: 0.8564
Epoch 1172/5000
500/500 [=] - 14s 29ms/step - loss: 0.7768 - acc: 0.8253 - val_loss: 0.7237 - val_acc: 0.8473
到目前为止,还没有过拟合的迹象。
似乎将自适应参数化ReLU激活函数中第一个全连接层的神经元个数设置为1/16,是一种非常有效的避免过拟合的方法。印象中,Squeeze-and-Excitation network就是这么做的。
Minghang Zhao, Shisheng Zhong, Xuyun Fu, Baoping Tang, Shaojiang Dong, Michael Pecht, Deep Residual Networks with Adaptively Parametric Rectifier Linear Units for Fault Diagnosis, IEEE Transactions on Industrial Electronics, 2020, DOI: 10.1109/TIE.2020.2972458
https://ieeexplore.ieee.org/document/8998530
————————————————
版权声明:本文为CSDN博主「dangqing1988」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/dangqing1988/article/details/105853603
本文系转载,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。
本文系转载,前往查看
如有侵权,请联系 cloudcommunity@tencent.com 删除。