In [1]:
import time
import tensorflow as tf
import tensorflow.contrib.layers as layers
from tensorflow.examples.tutorials.mnist import input_data

In [2]:
# 10 个数字 0 - 9
N_classes = 10

mnist = input_data.read_data_sets("../data/mnist", one_hot=True)


Extracting ../data/mnist/train-images-idx3-ubyte.gz
Extracting ../data/mnist/train-labels-idx1-ubyte.gz
Extracting ../data/mnist/t10k-images-idx3-ubyte.gz
Extracting ../data/mnist/t10k-labels-idx1-ubyte.gz

In [3]:
# 参数
lr = .001
BATCH_SIZE = 128
SKIP_STEP = 10
DROPOUT = 0.75
N_EPOCHS = 1

In [4]:
# 定义图

with tf.name_scope('data'):
    X = tf.placeholder(tf.float32, [None, 784], name='X')
    Y = tf.placeholder(tf.float32, [None, 10], name='Y')  
    
dropout = tf.placeholder(tf.float32, name='dropout')

# model1: conv -> relu -> pool -> conv -> relu -> pool -> fully connected ->  softmax

# ???
global_step = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')
alpha = .2

with tf.variable_scope('conv1'):
    # batch_size 个图片 28 * 28 * 1
    images = tf.reshape(X, shape=[-1, 28, 28, 1])
    # 32 个 filters 5 * 5 * 1
    conv1 = tf.layers.conv2d(
        images, 
        32, # kernel_number
        5, # kernel_size
        strides=1, 
        padding='same',
        kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
        bias_initializer=tf.random_normal_initializer()
    )
    # => batch_size * 28 * 28 * 32
    leaky_relu1 = tf.maximum(alpha * conv1, conv1)
    # drop_out1 = tf.nn.dropout(leaky_relu1, drop_out)
    
    pool1 = tf.layers.max_pooling2d(
        leaky_relu1,
        2, # pooling_size
        strides=2,
        padding='same'
    )
    # => batch_size * 14 * 14 * 32
    
with tf.variable_scope('conv2'):
    conv2 = tf.layers.conv2d(
        pool1,
        64,
        5,
        strides=1,
        padding='same',
        kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
        bias_initializer=tf.random_normal_initializer()
    )
    leaky_relu2 = tf.maximum(alpha * conv2, conv2)
    # => batch_size 14 * 14 * 64
    pool2 = tf.layers.max_pooling2d(
        leaky_relu2,
        2, # pooling_size
        strides=2,
        padding='same'
    )
    # => batch_size 7 * 7 * 64

with tf.variable_scope('fc'):
    # flatten
    flat = tf.reshape(pool2, (-1, 7 * 7 * 64))
    fc = tf.layers.dense(
        flat,
        1024
    )
    logits = tf.layers.dense(
        fc,
        10
    )

with tf.name_scope('loss'):
    entropy = tf.nn.softmax_cross_entropy_with_logits(labels=Y, logits=logits)
    loss = tf.reduce_mean(entropy, name='loss')

with tf.name_scope('summaries'):
    tf.summary.scalar('loss', loss)
    tf.summary.histogram('histogram_loss', loss)
    summary_op = tf.summary.merge_all()

optimizer = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step)

In [5]:
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
#     saver = tf.train.Saver()
    # to visualize using TensorBoard
    writer = tf.summary.FileWriter('./graphs/convnet', sess.graph)
#     ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/convnet_mnist/checkpoint'))
    # if that checkpoint exists, restore from checkpoint
#     if ckpt and ckpt.model_checkpoint_path:
#         saver.restore(sess, ckpt.model_checkpoint_path)
    
    initial_step = global_step.eval()

    start_time = time.time()
    n_batches = int(mnist.train.num_examples / BATCH_SIZE)

    total_loss = 0.0
    for index in range(initial_step, n_batches * N_EPOCHS): # train the model n_epochs times
        X_batch, Y_batch = mnist.train.next_batch(BATCH_SIZE)
        _, loss_batch, summary = sess.run([optimizer, loss, summary_op], 
                                feed_dict={X: X_batch, Y:Y_batch, dropout: DROPOUT}) 
        writer.add_summary(summary, global_step=index)
        total_loss += loss_batch
        if (index + 1) % SKIP_STEP == 0:
            print('Average loss at step {}: {:5.1f}'.format(index + 1, total_loss / SKIP_STEP))
            total_loss = 0.0
#             saver.save(sess, 'checkpoints/convnet_mnist/mnist-convnet', index)
    
    print("Optimization Finished!") # should be around 0.35 after 25 epochs
    print("Total time: {0} seconds".format(time.time() - start_time))
    
    # test the model
    n_batches = int(mnist.test.num_examples/BATCH_SIZE)
    total_correct_preds = 0
    for i in range(n_batches):
        X_batch, Y_batch = mnist.test.next_batch(BATCH_SIZE)
        _, loss_batch, logits_batch = sess.run([optimizer, loss, logits], 
                                        feed_dict={X: X_batch, Y:Y_batch, dropout: 1.0}) 
        preds = tf.nn.softmax(logits_batch)
        correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(Y_batch, 1))
        accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))
        total_correct_preds += sess.run(accuracy)   
    
    print("Accuracy {0}".format(total_correct_preds/mnist.test.num_examples))


Average loss at step 10:  12.0
Average loss at step 20:   4.5
Average loss at step 30:   2.8
Average loss at step 40:   2.5
Average loss at step 50:   2.4
Average loss at step 60:   2.3
Average loss at step 70:   2.2
Average loss at step 80:   2.0
Average loss at step 90:   1.5
Average loss at step 100:   0.9
Average loss at step 110:   0.6
Average loss at step 120:   0.5
Average loss at step 130:   0.4
Average loss at step 140:   0.5
Average loss at step 150:   0.4
Average loss at step 160:   0.4
Average loss at step 170:   0.3
Average loss at step 180:   0.4
Average loss at step 190:   0.4
Average loss at step 200:   0.3
Average loss at step 210:   0.3
Average loss at step 220:   0.3
Average loss at step 230:   0.3
Average loss at step 240:   0.3
Average loss at step 250:   0.2
Average loss at step 260:   0.3
Average loss at step 270:   0.2
Average loss at step 280:   0.2
Average loss at step 290:   0.2
Average loss at step 300:   0.2
Average loss at step 310:   0.2
Average loss at step 320:   0.2
Average loss at step 330:   0.2
Average loss at step 340:   0.2
Average loss at step 350:   0.2
Average loss at step 360:   0.2
Average loss at step 370:   0.2
Average loss at step 380:   0.2
Average loss at step 390:   0.2
Average loss at step 400:   0.2
Average loss at step 410:   0.1
Average loss at step 420:   0.2
Optimization Finished!
Total time: 126.58293843269348 seconds
Accuracy 0.9622

In [ ]: