探究 batch normalization 过程中的 decay 参数项 在 train 和 test 过程中的不同作用。
在 decay=0
(即移动偏移无损失)时:
import tensorflow as tf
def func(in_put, layer_name, is_training=True):
with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE):
bn = tf.contrib.layers.batch_norm(inputs=in_put,
decay=0,
is_training=is_training,
updates_collections=None)
return bn
def main():
with tf.Graph().as_default():
# input_x:只使用一套input_x,以控制变量进行对照试验
input_x = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])
import numpy as np
i_p = np.random.uniform(low=0, high=255, size=[1, 4, 4, 1])
# outputs:画好 train 和 test 状态下不同的数据流图,分别放在outputs[0]和outputs[1]里
outputs = [0, 0]
for i, is_training in enumerate([True, False]):
outputs[i] = func(input_x, 'my', is_training=is_training)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# 先 train ,再用 train 训练好的 'my/BatchNorm/moving_mean:0' 和'my/BatchNorm/moving_variance:0' 去跑 test
for i in xrange(2):
# 跑 train/test 之前的参数
print '\n\n-------------\n\n'
for (x, y) in zip(tf.global_variables(), sess.run(tf.global_variables())):
print '\n', x, ':\n', y
# 跑 train/test
t = sess.run(outputs[i], feed_dict={input_x:i_p})
print t
# 跑完 train/test 之后的参数
for (x, y) in zip(tf.global_variables(), sess.run(tf.global_variables())):
print '\n', x, ':\n', y
if __name__ == "__main__":
main()
观察:由以下打印出的 output 可知,经过train之后,’my/BatchNorm/moving_mean:0’ 变成了[ 114.78817749]、’my/BatchNorm/moving_variance:0’ 变成了[ 5496.60107422]。
结论: 1. train 过程改变参数,而 test 过程不改变参数; 2. test过程中直接沿用了train出来的参数进行计算。
2017-09-29 09:08:27.739093: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
------------- # train
<tf.Variable 'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
[[[[ 0.27311635]
[-0.7590425 ]
[-0.4103117 ]
[-1.40655911]]
[[-0.25747275]
[ 0.40145767]
[ 0.46040225]
[ 1.17546725]]
[[-1.32714963]
[-1.20369065]
[-0.08007884]
[-0.7148838 ]]
[[ 1.18637156]
[ 1.85863471]
[-0.70913869]
[ 1.51287651]]]]
<tf.Variable 'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
# 经过train之后,'my/BatchNorm/moving_mean:0' 变成了[ 114.78817749]、
# 'my/BatchNorm/moving_variance:0' 变成了[ 5496.60107422]。
<tf.Variable 'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 114.78817749]
<tf.Variable 'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 5496.60107422]
------------- # test
# test过程中直接沿用了train出来的参数进行计算。
<tf.Variable 'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 114.78817749]
<tf.Variable 'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 5496.60107422]
[[[[ 0.27311635]
[-0.7590425 ]
[-0.4103117 ]
[-1.40655911]]
[[-0.25747275]
[ 0.40145767]
[ 0.46040225]
[ 1.17546725]]
[[-1.32714963]
[-1.20369065]
[-0.08007884]
[-0.7148838 ]]
[[ 1.18637156]
[ 1.85863471]
[-0.70913869]
[ 1.51287651]]]]
<tf.Variable 'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
# test过程中,'my/BatchNorm/moving_mean:0'、
# 'my/BatchNorm/moving_variance:0' 不发生变化。
<tf.Variable 'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 114.78817749]
<tf.Variable 'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 5496.60107422]
Process finished with exit code 0
在 decay=1
(即移动偏移全损失)时:
import tensorflow as tf
def func(in_put, layer_name, is_training=True):
with tf.variable_scope(layer_name, reuse=tf.AUTO_REUSE):
bn = tf.contrib.layers.batch_norm(inputs=in_put,
decay=1,
is_training=is_training,
updates_collections=None)
return bn
def main():
with tf.Graph().as_default():
# input_x
input_x = tf.placeholder(dtype=tf.float32, shape=[1, 4, 4, 1])
import numpy as np
i_p = np.random.uniform(low=0, high=255, size=[1, 4, 4, 1])
# outputs
outputs = [0, 0]
for i, is_training in enumerate([True, False]):
outputs[i] = func(input_x, 'my', is_training=is_training)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in xrange(2):
print '\n\n-------------\n\n'
for (x, y) in zip(tf.global_variables(), sess.run(tf.global_variables())):
print '\n', x, ':\n', y
t = sess.run(outputs[i], feed_dict={input_x:i_p})
print t
for (x, y) in zip(tf.global_variables(), sess.run(tf.global_variables())):
print '\n', x, ':\n', y
# print (sess.run(tf.get_variable('my/BatchNorm/beta:0')))
if __name__ == "__main__":
main()
观察:由以下打印出的 output 可知,decay=1时,经过train之后,’my/BatchNorm/moving_mean:0’ 变成了[ 114.78817749]、 ‘my/BatchNorm/moving_variance:0’ 变成了[ 5496.60107422]。test过程中沿用了train过程中改变的参数值,但是test结果并没有被归一化。
结论: 1. decay参数项目虽然在 train 和 test 过程中都有,在train过程中,不对求解结果产生影响,只对求解结束时参数项的偏移程度产生影响。当 decay=1 时,train求解过程结束后,虽然计算结果是正确的,但是内存中的参数项没有得到相应的偏移,直接导致了调用这些参数项的test过程无法进行归一化计算。
2017-09-29 09:10:34.590984: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1052] Creating TensorFlow device (/device:GPU:0) -> (device: 0, name: GeForce GTX 1070, pci bus id: 0000:01:00.0, compute capability: 6.1)
------------- # train
<tf.Variable 'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
# decay参数项 不影响 train 过程的计算结果
[[[[ 1.58194923]
[ 1.092067 ]
[ 0.90377307]
[-0.54407573]]
[[-1.40420842]
[-1.2678678 ]
[-2.07936239]
[-0.7029525 ]]
[[ 0.66369891]
[ 0.8902123 ]
[-0.08132553]
[-0.80555594]]
[[ 0.52472615]
[ 0.6974256 ]
[ 0.22497487]
[ 0.30652118]]]]
<tf.Variable 'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
# decay参数项 会影响 train过程的 参数值 变化
<tf.Variable 'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
------------- # test
<tf.Variable 'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
# 由于从train继承来的参数值的缘故,test过程中的计算变成了 非归一化计算
[[[[ 223.02729797]
[ 193.41201782]
[ 182.02893066]
[ 94.50087738]]
[[ 42.50252914]
[ 50.74484634]
[ 1.68685985]
[ 84.89615631]]
[[ 167.51550293]
[ 181.2091217 ]
[ 122.47590637]
[ 78.69338226]]
[[ 159.11407471]
[ 169.5544281 ]
[ 140.99295044]
[ 145.92274475]]]]
<tf.Variable 'my/BatchNorm/beta:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_mean:0' shape=(1,) dtype=float32_ref> :
[ 0.]
<tf.Variable 'my/BatchNorm/moving_variance:0' shape=(1,) dtype=float32_ref> :
[ 1.]
Process finished with exit code 0