In [1]:
import cifar10_input
import tensorflow as tf
import numpy as np
import time
import math
In [2]:
max_steps = 3000
batch_size = 128
data_dir = 'data/cifar10/cifar-10-batches-bin/'
model_dir = 'model/_cifar10_v2/'
使用cifa10_input来获取数据,这个文件来自tensorflow github,可以下载下来直接使用。如果使用distorted_input方法,那么得到的数据是经过增强处理的。会对图片随机做出切片、翻转、修改亮度、修改对比度等操作。这样就能多样化我们的训练数据。
得到一个tensor,batch_size大小的batch。并且可以迭代的读取下一个batch。
In [3]:
X_train, y_train = cifar10_input.distorted_inputs(data_dir, batch_size)
In [4]:
X_test, y_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size)
In [6]:
image_holder = tf.placeholder(tf.float32, [batch_size, 24, 24, 3])
label_holder = tf.placeholder(tf.int32, [batch_size])
同样的,我们使用5x5卷积核,3个通道(input_channel),64个output_channel。不对第一层的参数做正则化,所以将lambda_value设定为0。其中涉及到一个小技巧,就是在pool层,使用了3x3大小的ksize,但是使用2x2的stride,这样增加数据的丰富性。最后使用LRN。LRN最早见于Alex参见ImageNet的竞赛的那篇CNN论文中,Alex在论文中解释了LRN层模仿了生物神经系统的“侧抑制”机制,对局部神经元的活动创建竞争环境,使得其中响应比较大的值变得相对更大,并抑制其他反馈较小的神经元,增加了模型的泛化能力。不过在之后的VGGNet论文中,对比了使用和不使用LRN两种模型,结果表明LRN并不能提高模型的性能。不过这里还是基于AlexNet的设计将其加上。
In [7]:
weight1 = variable_with_weight_loss([5, 5, 3, 64], stddev=0.05, lambda_value=0)
kernel1 = tf.nn.conv2d(image_holder, weight1, [1, 1, 1, 1], padding='SAME')
bias1 = tf.Variable(tf.constant(0.0, shape=[64]))
conv1 = tf.nn.relu(tf.nn.bias_add(kernel1, bias1))
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
In [8]:
weight2 = variable_with_weight_loss(shape=[5, 5, 64, 64], stddev=5e-2, lambda_value=0.0)
kernel2 = tf.nn.conv2d(norm1, weight2, strides=[1, 1, 1, 1], padding='SAME')
bias2 = tf.Variable(tf.constant(0.1, shape=[64]))
conv2 = tf.nn.relu(tf.nn.bias_add(kernel2, bias2))
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
In [9]:
flattern = tf.reshape(pool2, [batch_size, -1])
dim = flattern.get_shape()[1].value
weight3 = variable_with_weight_loss(shape=[dim, 384], stddev=0.04, lambda_value=0.04)
bias3 = tf.Variable(tf.constant(0.1, shape=[384]))
local3 = tf.nn.relu(tf.matmul(flattern, weight3) + bias3)
In [10]:
weight4 = variable_with_weight_loss(shape=[384, 192], stddev=0.04, lambda_value=0.04)
bias4 = tf.Variable(tf.constant(0.1, shape=[192]))
local4 = tf.nn.relu(tf.matmul(local3, weight4) + bias4)
In [11]:
weight5 = variable_with_weight_loss(shape=[192, 10], stddev=1/192.0, lambda_value=0.0)
bias5 = tf.Variable(tf.constant(0.0, shape=[10]))
logits = tf.add(tf.matmul(local4, weight5), bias5)
In [12]:
def loss(logits, labels):
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits=logits, labels=labels,
name = 'cross_entropy_per_example'
)
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
return tf.add_n(tf.get_collection('losses'), name='total_loss')
In [13]:
loss = loss(logits, label_holder)
In [14]:
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
使用in_top_k来输出top k的准确率,默认使用top 1。常用的可以是top 5。
In [15]:
top_k_op = tf.nn.in_top_k(logits, label_holder, 1)
In [16]:
sess = tf.InteractiveSession()
In [ ]:
saver = tf.train.Saver()
In [17]:
tf.global_variables_initializer().run()
启动caifar_input中需要用的线程队列。主要用途是图片数据增强。这里总共使用了16个线程来处理图片。
In [18]:
tf.train.start_queue_runners()
Out[18]:
每次在计算之前,先执行image_train,label_train来获取一个batch_size大小的训练数据。然后,feed到train_op和loss中,训练样本。每10次迭代计算就会输出一些必要的信息。
In [ ]:
for step in range(max_steps):
start_time = time.time()
image_batch, label_batch = sess.run([X_train, y_train])
_, loss_value = sess.run([train_op, loss],
feed_dict={image_holder: image_batch, label_holder: label_batch})
duration = time.time() - start_time
if step % 10 == 0:
examples_per_sec = batch_size / duration
sec_this_batch = float(duration)
format_str = ('step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)')
print(format_str % (step, loss_value, examples_per_sec, sec_this_batch))
In [21]:
saver.save(sess, save_path=os.path.join(model_dir, 'model.chpt'), global_step=max_steps)
Out[21]:
In [ ]:
num_examples = 10000
num_iter = int(math.ceil(num_examples / batch_size))
ture_count = 0
total_sample_count = num_iter * batch_size
step = 0
while step < num_iter:
image_batch, label_batch = sess.run([X_test, y_test])
predictions = sess.run([top_k_op],
feed_dict={image_holder: image_batch, label_holder: label_batch})
true_count += np.sum(predictions)
step += 1
In [ ]:
precision = ture_count / total_sample_count
print("Precision @ 1 = %.3f" % precision)
In [ ]:
sess.close()
In [ ]: