In [1]:
from __future__ import division, print_function
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import os
import shutil
import tensorflow as tf
%matplotlib inline
In [2]:
DATA_DIR = "../../data"
TRAIN_FILE = os.path.join(DATA_DIR, "mnist_train.csv")
TEST_FILE = os.path.join(DATA_DIR, "mnist_test.csv")
OUTPUT_DATA_DIR = os.path.join(DATA_DIR, "01-mnist-cnn")
LOG_DIR = os.path.join(OUTPUT_DATA_DIR, "logs")
MODEL_FILE = os.path.join(OUTPUT_DATA_DIR, "model")
IMG_SIZE = 28
LEARNING_RATE = 0.001
BATCH_SIZE = 128
NUM_CLASSES = 10
NUM_EPOCHS = 5
In [3]:
def parse_file(filename):
xdata, ydata = [], []
fin = open(filename, "rb")
i = 0
for line in fin:
if i % 10000 == 0:
print("{:s}: {:d} lines read".format(
os.path.basename(filename), i))
cols = line.strip().split(",")
ydata.append(int(cols[0]))
xdata.append(np.reshape(np.array([float(x) / 255.
for x in cols[1:]]), (IMG_SIZE, IMG_SIZE, 1)))
i += 1
fin.close()
print("{:s}: {:d} lines read".format(os.path.basename(filename), i))
y = np.array(ydata)
X = np.array(xdata)
return X, y
Xtrain, ytrain = parse_file(TRAIN_FILE)
Xtest, ytest = parse_file(TEST_FILE)
print(Xtrain.shape, ytrain.shape, Xtest.shape, ytest.shape)
In [4]:
def datagen(X, y, batch_size=BATCH_SIZE, num_classes=NUM_CLASSES):
ohe = OneHotEncoder(n_values=num_classes)
while True:
shuffled_indices = np.random.permutation(np.arange(len(y)))
num_batches = len(y) // batch_size
for bid in range(num_batches):
batch_indices = shuffled_indices[bid*batch_size:(bid+1)*batch_size]
Xbatch = np.zeros((batch_size, X.shape[1], X.shape[2], X.shape[3]))
Ybatch = np.zeros((batch_size, num_classes))
for i in range(batch_size):
Xbatch[i] = X[batch_indices[i]]
Ybatch[i] = ohe.fit_transform(y[batch_indices[i]]).todense()
yield Xbatch, Ybatch
self_test_gen = datagen(Xtrain, ytrain)
Xbatch, Ybatch = self_test_gen.next()
print(Xbatch.shape, Ybatch.shape)
The network is defined using Keras. The loss and accuracy also use Keras functions. However, we use a Tensorflow optimizer, as well as execute the whole thing in the context of a Tensorflow session. Note that we need to set the Keras session and pass in the value of learning_phase during training and evaluation.
We also use the SummaryWriter to log the loss and accuracy at each step so they can be viewed using Tensorboard.
Finally, and most importantly for our Tensorflow Serving experiment, we use the Tensorflow Saver to save the model in Tensorflow format.
In [5]:
sess = tf.Session()
tf.contrib.keras.backend.set_session(sess)
In [6]:
with tf.name_scope("data"):
X = tf.placeholder(tf.float32, [None, IMG_SIZE, IMG_SIZE, 1], name="X")
Y = tf.placeholder(tf.float32, [None, NUM_CLASSES], name="Y")
In [7]:
model = tf.contrib.keras.models.Sequential()
model.add(tf.contrib.keras.layers.Conv2D(32, (3, 3), activation="relu",
input_shape=(IMG_SIZE, IMG_SIZE, 1)))
model.add(tf.contrib.keras.layers.Conv2D(64, (3, 3), activation="relu"))
model.add(tf.contrib.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.contrib.keras.layers.Dropout(0.25))
model.add(tf.contrib.keras.layers.Flatten())
model.add(tf.contrib.keras.layers.Dense(128, activation="relu"))
model.add(tf.contrib.keras.layers.Dropout(0.5))
model.add(tf.contrib.keras.layers.Dense(NUM_CLASSES, activation="softmax"))
Y_ = model(X)
In [8]:
loss = tf.reduce_mean(tf.contrib.keras.losses.categorical_crossentropy(Y, Y_))
accuracy = tf.reduce_mean(tf.contrib.keras.metrics.categorical_accuracy(Y, Y_))
In [9]:
optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(loss)
In [10]:
init_op = tf.global_variables_initializer()
sess.run(init_op)
In [11]:
shutil.rmtree(OUTPUT_DATA_DIR)
In [12]:
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", accuracy)
# Merge all summaries into a single op
summary = tf.summary.merge_all()
In [13]:
with sess.as_default():
saver = tf.train.Saver()
logger = tf.summary.FileWriter(LOG_DIR, sess.graph)
train_gen = datagen(Xtrain, ytrain, BATCH_SIZE)
num_batches = len(Xtrain) // BATCH_SIZE
for epoch in range(NUM_EPOCHS):
total_loss, total_acc = 0, 0
for bid in range(num_batches):
Xbatch, Ybatch = train_gen.next()
_, batch_loss, batch_acc, batch_summary = sess.run(
[optimizer, loss, accuracy, summary],
feed_dict={X: Xbatch, Y: Ybatch, tf.contrib.keras.backend.learning_phase(): 1})
# write to tensorboard
logger.add_summary(batch_summary, epoch * num_batches + bid)
# accumulate to print once per epoch
total_acc += batch_acc
total_loss += batch_loss
total_acc /= num_batches
total_loss /= num_batches
print("Epoch {:d}/{:d}: loss={:.3f}, accuracy={:.3f}".format(
(epoch + 1), NUM_EPOCHS, total_loss, total_acc))
saver.save(sess, MODEL_FILE, (epoch + 1))
logger.close()
On the command line, run following commands:
cd ../../data/01-tf-serving
tensorboard --logdir=logs
Control-Click on http://localhost:6006 to see loss and accuracy plots on the browser.
Here are (representative) images from tensorboard for the accuracy and loss.
In [14]:
BEST_MODEL = os.path.join(OUTPUT_DATA_DIR, "model-5")
saver = tf.train.Saver()
ys, ys_ = [], []
with sess.as_default():
sess.run(tf.global_variables_initializer())
saver.restore(sess, BEST_MODEL)
test_gen = datagen(Xtest, ytest, BATCH_SIZE)
val_loss, val_acc = 0., 0.
num_batches = len(Xtrain) // BATCH_SIZE
for _ in range(num_batches):
Xbatch, Ybatch = test_gen.next()
Ybatch_ = sess.run(Y_, feed_dict={X: Xbatch,
tf.contrib.keras.backend.learning_phase(): 0})
ys.extend(np.argmax(Ybatch, axis=1))
ys_.extend(np.argmax(Ybatch_, axis=1))
acc = accuracy_score(ys_, ys)
cm = confusion_matrix(ys_, ys)
print("Accuracy: {:.4f}".format(acc))
print("Confusion Matrix")
print(cm)
In [ ]: