In [1]:
import cifar10_input
import tensorflow as tf
import numpy as np
import time
import math

全局参数


In [2]:
max_steps = 3000
batch_size = 128
data_dir = 'data/cifar10/cifar-10-batches-bin/'
model_dir = 'model/_cifar10_v2/'

初始化权重

如果需要,会给权重加上L2 loss。为了在后面计算神经网络的总体loss的时候被用上,需要统一存到一个collection。

加载数据

使用cifa10_input来获取数据,这个文件来自tensorflow github,可以下载下来直接使用。如果使用distorted_input方法,那么得到的数据是经过增强处理的。会对图片随机做出切片、翻转、修改亮度、修改对比度等操作。这样就能多样化我们的训练数据。

得到一个tensor,batch_size大小的batch。并且可以迭代的读取下一个batch。


In [3]:
X_train, y_train = cifar10_input.distorted_inputs(data_dir, batch_size)


Filling queue with 20000 CIFAR images before starting to train. This will take a few minutes.

In [4]:
X_test, y_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size)

In [6]:
image_holder = tf.placeholder(tf.float32, [batch_size, 24, 24, 3])
label_holder = tf.placeholder(tf.int32, [batch_size])

第一个卷积层

同样的,我们使用5x5卷积核,3个通道(input_channel),64个output_channel。不对第一层的参数做正则化,所以将lambda_value设定为0。其中涉及到一个小技巧,就是在pool层,使用了3x3大小的ksize,但是使用2x2的stride,这样增加数据的丰富性。最后使用LRN。LRN最早见于Alex参见ImageNet的竞赛的那篇CNN论文中,Alex在论文中解释了LRN层模仿了生物神经系统的“侧抑制”机制,对局部神经元的活动创建竞争环境,使得其中响应比较大的值变得相对更大,并抑制其他反馈较小的神经元,增加了模型的泛化能力。不过在之后的VGGNet论文中,对比了使用和不使用LRN两种模型,结果表明LRN并不能提高模型的性能。不过这里还是基于AlexNet的设计将其加上。


In [7]:
weight1 = variable_with_weight_loss([5, 5, 3, 64], stddev=0.05, lambda_value=0)
kernel1 = tf.nn.conv2d(image_holder, weight1, [1, 1, 1, 1], padding='SAME')
bias1 = tf.Variable(tf.constant(0.0, shape=[64]))
conv1 = tf.nn.relu(tf.nn.bias_add(kernel1, bias1))
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

第二个卷积层

  1. 输入64个channel,输出依然是64个channel
  2. 设定bias的大小为0.1
  3. 调换最大池化层和LRN的顺序,先进行LRN然后再最大池化层

但是为什么要这么做,完全不知道?

多看论文。


In [8]:
weight2 = variable_with_weight_loss(shape=[5, 5, 64, 64], stddev=5e-2, lambda_value=0.0)
kernel2 = tf.nn.conv2d(norm1, weight2, strides=[1, 1, 1, 1], padding='SAME')
bias2 = tf.Variable(tf.constant(0.1, shape=[64]))
conv2 = tf.nn.relu(tf.nn.bias_add(kernel2, bias2))
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001/9.0, beta=0.75)
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

第一个全连接层

  1. 要将卷积层拉伸
  2. 全连接到新的隐藏层,设定为384个节点
  3. 正态分布设定为0.04,bias设定为0.1
  4. 重点是,在这里我们还设定weight loss的lambda数值为0.04

In [9]:
flattern = tf.reshape(pool2, [batch_size, -1])
dim = flattern.get_shape()[1].value
weight3 = variable_with_weight_loss(shape=[dim, 384], stddev=0.04, lambda_value=0.04)
bias3 = tf.Variable(tf.constant(0.1, shape=[384]))
local3 = tf.nn.relu(tf.matmul(flattern, weight3) + bias3)

第二个全连接层

  1. 下降为192个节点,减少了一半

In [10]:
weight4 = variable_with_weight_loss(shape=[384, 192], stddev=0.04, lambda_value=0.04)
bias4 = tf.Variable(tf.constant(0.1, shape=[192]))
local4 = tf.nn.relu(tf.matmul(local3, weight4) + bias4)

输出层

  1. 最后有10个类别

In [11]:
weight5 = variable_with_weight_loss(shape=[192, 10], stddev=1/192.0, lambda_value=0.0)
bias5 = tf.Variable(tf.constant(0.0, shape=[10]))
logits = tf.add(tf.matmul(local4, weight5), bias5)

In [12]:
def loss(logits, labels):
    labels = tf.cast(labels, tf.int64)
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        logits=logits, labels=labels,
        name = 'cross_entropy_per_example'
    )
    cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
    tf.add_to_collection('losses', cross_entropy_mean)
    
    return tf.add_n(tf.get_collection('losses'), name='total_loss')

In [13]:
loss = loss(logits, label_holder)

In [14]:
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)

使用in_top_k来输出top k的准确率,默认使用top 1。常用的可以是top 5。


In [15]:
top_k_op = tf.nn.in_top_k(logits, label_holder, 1)

In [16]:
sess = tf.InteractiveSession()

In [ ]:
saver = tf.train.Saver()

In [17]:
tf.global_variables_initializer().run()

启动caifar_input中需要用的线程队列。主要用途是图片数据增强。这里总共使用了16个线程来处理图片。


In [18]:
tf.train.start_queue_runners()


Out[18]:
[<Thread(Thread-4, started daemon 123145336750080)>,
 <Thread(Thread-5, started daemon 123145340956672)>,
 <Thread(Thread-6, started daemon 123145345163264)>,
 <Thread(Thread-7, started daemon 123145349369856)>,
 <Thread(Thread-8, started daemon 123145353576448)>,
 <Thread(Thread-9, started daemon 123145357783040)>,
 <Thread(Thread-10, started daemon 123145361989632)>,
 <Thread(Thread-11, started daemon 123145366196224)>,
 <Thread(Thread-12, started daemon 123145370402816)>,
 <Thread(Thread-13, started daemon 123145374609408)>,
 <Thread(Thread-14, started daemon 123145378816000)>,
 <Thread(Thread-15, started daemon 123145383022592)>,
 <Thread(Thread-16, started daemon 123145387229184)>,
 <Thread(Thread-17, started daemon 123145391435776)>,
 <Thread(Thread-18, started daemon 123145395642368)>,
 <Thread(Thread-19, started daemon 123145399848960)>,
 <Thread(Thread-20, started daemon 123145404055552)>,
 <Thread(Thread-21, started daemon 123145408262144)>,
 <Thread(Thread-22, started daemon 123145412468736)>,
 <Thread(Thread-23, started daemon 123145416675328)>,
 <Thread(Thread-24, started daemon 123145420881920)>,
 <Thread(Thread-25, started daemon 123145425088512)>,
 <Thread(Thread-26, started daemon 123145429295104)>,
 <Thread(Thread-27, started daemon 123145433501696)>,
 <Thread(Thread-28, started daemon 123145437708288)>,
 <Thread(Thread-29, started daemon 123145441914880)>,
 <Thread(Thread-30, started daemon 123145446121472)>,
 <Thread(Thread-31, started daemon 123145450328064)>,
 <Thread(Thread-32, started daemon 123145454534656)>,
 <Thread(Thread-33, started daemon 123145458741248)>,
 <Thread(Thread-34, started daemon 123145462947840)>,
 <Thread(Thread-35, started daemon 123145467154432)>,
 <Thread(Thread-36, started daemon 123145471361024)>,
 <Thread(Thread-37, started daemon 123145475567616)>]

每次在计算之前,先执行image_train,label_train来获取一个batch_size大小的训练数据。然后,feed到train_op和loss中,训练样本。每10次迭代计算就会输出一些必要的信息。


In [ ]:
for step in range(max_steps):
    start_time = time.time()
    image_batch, label_batch = sess.run([X_train, y_train])
    _, loss_value = sess.run([train_op, loss], 
                             feed_dict={image_holder: image_batch, label_holder: label_batch})
    duration = time.time() - start_time
    if step % 10 == 0:
        examples_per_sec = batch_size / duration
        sec_this_batch = float(duration)
        
        format_str = ('step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)')
        print(format_str % (step, loss_value, examples_per_sec, sec_this_batch))

In [21]:
saver.save(sess, save_path=os.path.join(model_dir, 'model.chpt'), global_step=max_steps)


Out[21]:
'model.chpt-3000'

In [ ]:
num_examples = 10000
num_iter = int(math.ceil(num_examples / batch_size))
ture_count = 0
total_sample_count = num_iter * batch_size
step = 0
while step < num_iter:
    image_batch, label_batch = sess.run([X_test, y_test])
    predictions = sess.run([top_k_op], 
                           feed_dict={image_holder: image_batch, label_holder: label_batch})
    true_count += np.sum(predictions)
    step += 1

In [ ]:
precision = ture_count / total_sample_count
print("Precision @ 1 = %.3f" % precision)

In [ ]:
sess.close()

In [ ]: