MNIST Digit Recognition - Hybrid CNN w/Keras in TF

MNIST Digit Recognition built using Keras built into Tensorflow.


In [1]:
from __future__ import division, print_function
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil
import tensorflow as tf
%matplotlib inline

In [2]:
DATA_DIR = "../../data"
TRAIN_FILE = os.path.join(DATA_DIR, "mnist_train.csv")
TEST_FILE = os.path.join(DATA_DIR, "mnist_test.csv")

OUTPUT_DATA_DIR = os.path.join(DATA_DIR, "01-mnist-cnn")
LOG_DIR = os.path.join(OUTPUT_DATA_DIR, "logs")
MODEL_FILE = os.path.join(OUTPUT_DATA_DIR, "model")

IMG_SIZE = 28
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_CLASSES = 10
NUM_EPOCHS = 5

Prepare Data


In [3]:
def parse_file(filename):
    xdata, ydata = [], []
    fin = open(filename, "rb")
    i = 0
    for line in fin:
        if i % 10000 == 0:
            print("{:s}: {:d} lines read".format(
                os.path.basename(filename), i))
        cols = line.strip().split(",")
        ydata.append(int(cols[0]))
        xdata.append(np.reshape(np.array([float(x) / 255. 
            for x in cols[1:]]), (IMG_SIZE, IMG_SIZE, 1)))
        i += 1
    fin.close()
    print("{:s}: {:d} lines read".format(os.path.basename(filename), i))
    y = np.array(ydata)
    X = np.array(xdata)
    return X, y

Xtrain, ytrain = parse_file(TRAIN_FILE)
Xtest, ytest = parse_file(TEST_FILE)
print(Xtrain.shape, ytrain.shape, Xtest.shape, ytest.shape)


mnist_train.csv: 0 lines read
mnist_train.csv: 10000 lines read
mnist_train.csv: 20000 lines read
mnist_train.csv: 30000 lines read
mnist_train.csv: 40000 lines read
mnist_train.csv: 50000 lines read
mnist_train.csv: 60000 lines read
mnist_test.csv: 0 lines read
mnist_test.csv: 10000 lines read
(60000, 28, 28, 1) (60000,) (10000, 28, 28, 1) (10000,)

In [4]:
def datagen(X, y, batch_size=BATCH_SIZE, num_classes=NUM_CLASSES):
    ohe = OneHotEncoder(n_values=num_classes)
    while True:
        shuffled_indices = np.random.permutation(np.arange(len(y)))
        num_batches = len(y) // batch_size
        for bid in range(num_batches):
            batch_indices = shuffled_indices[bid*batch_size:(bid+1)*batch_size]
            Xbatch = np.zeros((batch_size, X.shape[1], X.shape[2], X.shape[3]))
            Ybatch = np.zeros((batch_size, num_classes))
            for i in range(batch_size):
                Xbatch[i] = X[batch_indices[i]]
                Ybatch[i] = ohe.fit_transform(y[batch_indices[i]]).todense()
            yield Xbatch, Ybatch

self_test_gen = datagen(Xtrain, ytrain)
Xbatch, Ybatch = self_test_gen.next()
print(Xbatch.shape, Ybatch.shape)


(128, 28, 28, 1) (128, 10)

Define Network

The network is defined using Keras. The loss and accuracy also use Keras functions. However, we use a Tensorflow optimizer, as well as execute the whole thing in the context of a Tensorflow session. Note that we need to set the Keras session and pass in the value of learning_phase during training and evaluation.

We also use the SummaryWriter to log the loss and accuracy at each step so they can be viewed using Tensorboard.

Finally, and most importantly for our Tensorflow Serving experiment, we use the Tensorflow Saver to save the model in Tensorflow format.


In [5]:
sess = tf.Session()
tf.contrib.keras.backend.set_session(sess)

In [6]:
with tf.name_scope("data"):
    X = tf.placeholder(tf.float32, [None, IMG_SIZE, IMG_SIZE, 1], name="X")
    Y = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="Y")

In [7]:
model = tf.contrib.keras.models.Sequential()
model.add(tf.contrib.keras.layers.Conv2D(32, (3, 3), activation="relu", 
                                         input_shape=(IMG_SIZE, IMG_SIZE, 1)))
model.add(tf.contrib.keras.layers.Conv2D(64, (3, 3), activation="relu"))
model.add(tf.contrib.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.contrib.keras.layers.Dropout(0.25))
model.add(tf.contrib.keras.layers.Flatten())
model.add(tf.contrib.keras.layers.Dense(128, activation="relu"))
model.add(tf.contrib.keras.layers.Dropout(0.5))
model.add(tf.contrib.keras.layers.Dense(NUM_CLASSES, activation="softmax"))

Y_ = model(X)

In [8]:
loss = tf.reduce_mean(tf.contrib.keras.losses.categorical_crossentropy(Y, Y_))
accuracy = tf.reduce_mean(tf.contrib.keras.metrics.categorical_accuracy(Y, Y_))

In [9]:
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss)

In [10]:
init_op = tf.global_variables_initializer()
sess.run(init_op)

In [11]:
shutil.rmtree(OUTPUT_DATA_DIR)

In [12]:
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", accuracy)
# Merge all summaries into a single op
summary = tf.summary.merge_all()

Train Network


In [13]:
with sess.as_default():

    saver = tf.train.Saver()
    
    logger = tf.summary.FileWriter(LOG_DIR, sess.graph)
    
    train_gen = datagen(Xtrain, ytrain, BATCH_SIZE)
    num_batches = len(Xtrain) // BATCH_SIZE
    
    for epoch in range(NUM_EPOCHS):
        total_loss, total_acc = 0, 0
        for bid in range(num_batches):
            Xbatch, Ybatch = train_gen.next()
            _, batch_loss, batch_acc, batch_summary = sess.run(
                [optimizer, loss, accuracy, summary], 
                feed_dict={X: Xbatch, Y: Ybatch, tf.contrib.keras.backend.learning_phase(): 1})
            
            # write to tensorboard
            logger.add_summary(batch_summary, epoch * num_batches + bid)
            # accumulate to print once per epoch
            total_acc += batch_acc
            total_loss += batch_loss
            
        total_acc /= num_batches
        total_loss /= num_batches
        print("Epoch {:d}/{:d}: loss={:.3f}, accuracy={:.3f}".format(
            (epoch + 1), NUM_EPOCHS, total_loss, total_acc))
        saver.save(sess, MODEL_FILE, (epoch + 1))
        
logger.close()


Epoch 1/5: loss=0.245, accuracy=0.926
Epoch 2/5: loss=0.086, accuracy=0.975
Epoch 3/5: loss=0.060, accuracy=0.982
Epoch 4/5: loss=0.051, accuracy=0.984
Epoch 5/5: loss=0.044, accuracy=0.986

Visualize Training logs via Tensorboard

On the command line, run following commands:

cd ../../data/01-tf-serving
tensorboard --logdir=logs

Control-Click on http://localhost:6006 to see loss and accuracy plots on the browser.

Here are (representative) images from tensorboard for the accuracy and loss.

Evaluate Network


In [14]:
BEST_MODEL = os.path.join(OUTPUT_DATA_DIR, "model-5")
saver = tf.train.Saver()
ys, ys_ = [], []
with sess.as_default():

    sess.run(tf.global_variables_initializer())
    saver.restore(sess, BEST_MODEL)
    
    test_gen = datagen(Xtest, ytest, BATCH_SIZE)
    val_loss, val_acc = 0., 0.
    num_batches = len(Xtrain) // BATCH_SIZE
    for _ in range(num_batches):
        Xbatch, Ybatch = test_gen.next()
        Ybatch_ = sess.run(Y_, feed_dict={X: Xbatch, 
            tf.contrib.keras.backend.learning_phase(): 0})
        ys.extend(np.argmax(Ybatch, axis=1))
        ys_.extend(np.argmax(Ybatch_, axis=1))

acc = accuracy_score(ys_, ys)
cm = confusion_matrix(ys_, ys)
print("Accuracy: {:.4f}".format(acc))
print("Confusion Matrix")
print(cm)


INFO:tensorflow:Restoring parameters from ../../data/01-tf-serving/model-5
Accuracy: 0.9898
Confusion Matrix
[[5848    0    6    0    0   12   36    6   30    6]
 [   6 6773   12    0    0    0   12    6    6   18]
 [   0    6 6127   12    0    0    0   18   12    0]
 [   0    6    0 6017    0   48    0    6    6    0]
 [   0    0    6    0 5827    0    6    0    0   24]
 [   0    6    0   12    0 5270   30    0    0   30]
 [   6    6    0    0   17    6 5648    0    0    0]
 [   0    0   30    6    0    0    0 6117   30   18]
 [   0    0    0    6    6    6    6    6 5724   12]
 [   6    0    0    0   36    0    0    0   24 5942]]

In [ ]: