In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data
%matplotlib inline

In [2]:
# Get data
mnist = input_data.read_data_sets("/tmp/data")


Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz

In [3]:
h = 28
w = 28
channels = 1
n_inputs = h * w

conv1_fmaps = 32
conv1_ksize = 3
conv1_stride = 1
conv1_pad = 'SAME'

conv2_fmaps = 64
conv2_ksize = 3
conv2_stride = 2
conv2_pad = 'SAME'

pool3_fmaps = conv2_fmaps

n_fc1 = 64
n_outputs = 10

In [4]:
graph = tf.Graph()
with graph.as_default():
 with tf.device("/cpu:0"):
    with tf.name_scope('inputs'):
        X = tf.placeholder(tf.float32, shape=[None, n_inputs], name='X')
        X_reshaped = tf.reshape(X, shape=[-1, h, w, channels])
        y = tf.placeholder(tf.int32, shape=[None], name='y')
        
    conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize, strides=conv1_stride,
                            padding=conv1_pad, activation=tf.nn.relu, name='conv1')
    conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize, strides=conv2_stride,
                            padding=conv2_pad, activation=tf.nn.relu, name='conv2')
    
    with tf.name_scope('pool3'):
        # pool3 has [?, 784] dimensions
        pool3 = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='VALID')
        # not sure whay it's being reshaped in this way
        pool3_flat = tf.reshape(pool3, shape=[-1, 7 * 7 * pool3_fmaps])
    
    with tf.name_scope('fc1'):
        fc1 = tf.layers.dense(pool3_flat, n_fc1, activation=tf.nn.relu, name='fc1')
        
    with tf.name_scope('output'):
        logits = tf.layers.dense(fc1, n_outputs, name='output')
        Y_proba = tf.nn.softmax(logits, name='Y_proba')

    with tf.name_scope('train'):
        xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
        loss = tf.reduce_mean(xentropy)
        optimizer = tf.train.AdamOptimizer()
        training_op = optimizer.minimize(loss)
    
    with tf.name_scope('eval'):
        correct = tf.nn.in_top_k(logits, y, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
        
    with tf.name_scope('init_and_save'):
        init = tf.global_variables_initializer()
        saver = tf.train.Saver()

In [5]:
%%time

n_epochs = 10
batch_size = 10
with tf.Session(graph=graph) as sess:
    #with tf.device("/cpu:0"):
        init.run()
        for epoch in range(n_epochs):
            for iteration in range(mnist.train.num_examples // batch_size):
                X_batch, y_batch = mnist.train.next_batch(batch_size)
                sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
            acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
            acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels})
            print(epoch, 'Train acc: ', acc_train, 'Test acc: ', acc_test)
        #save_path = saver.save(sess, "./my_conv_mnist_model")


0 Train acc:  1.0 Test acc:  0.9801
1 Train acc:  0.9 Test acc:  0.986
2 Train acc:  1.0 Test acc:  0.9845
3 Train acc:  1.0 Test acc:  0.9901
4 Train acc:  1.0 Test acc:  0.9889
5 Train acc:  1.0 Test acc:  0.988
6 Train acc:  1.0 Test acc:  0.9874
7 Train acc:  1.0 Test acc:  0.9904
8 Train acc:  1.0 Test acc:  0.9866
9 Train acc:  1.0 Test acc:  0.9875
CPU times: user 19min 24s, sys: 4min 37s, total: 24min 1s
Wall time: 8min 40s

Strangely, TF still runs on GPUs even with above code.