In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
%matplotlib inline
plt.rcParams['figure.figsize'] = (15,10)
# auto reloading of modules
%load_ext autoreload
%autoreload 2
In [2]:
# Settings
batch_size = 1000
epochs = 10 # number of epochs to train for
learning_rate = .1
momentum = .9
print_every = 25 # print info about training at iterations
test_every = 50 # number of iterations to go between doing 1 epoch of testing
reg = 0.01 # regularization strength
num_train = 60000 # 60000 max
num_test = 5000 # 10000 max
layer_sizes = [512,256] # these are the sizes of the hidden layers
checkpoint_file = 'models/MNIST_FC.ckpt'
In [3]:
# these are updates/modifications/ utilites based on settings, do not change
# fixed seed for consistancy
np.random.seed(0)
# set up number of batches, make sure its a valid number
num_batches_train = num_train/batch_size
num_batches_test = num_test/batch_size
assert num_train%batch_size == 0, 'The number of training samples must be divisible by the batch size'
assert num_test%batch_size == 0, 'The number of test samples must be divisible by the batch size'
# MNIST is 28x28x1 images
img_shape = (28,28,1)
img_size = 28*28*1
# modify the layer_sizes to account for input and output
layer_sizes.insert(0, img_size) # input layer
layer_sizes.append(10) # output later
In [4]:
import getMNIST
fNames = getMNIST.downloadMNIST()
train_data = getMNIST.extract_data(fNames[0],num_train)
test_data = getMNIST.extract_data(fNames[2],num_test)
train_labels = getMNIST.extract_labels(fNames[1], num_train)
test_labels = getMNIST.extract_labels(fNames[3], num_test)
In [5]:
train_data = train_data.reshape(num_train, img_size)
test_data = test_data.reshape(num_test, img_size)
In [6]:
# Set up inputs and outputs
X = tf.placeholder(tf.float32, shape = [batch_size,img_size])
Y = tf.placeholder(tf.float32, shape = [batch_size,10]) # 10 labels
In [7]:
# setup model weights
weights = {}
bias = {}
for layer in range(1,len(layer_sizes)):
layer_name = 'FC'+str(layer)
weights[layer_name] = tf.Variable(tf.random_normal([layer_sizes[layer-1],layer_sizes[layer]])*.01,name=layer_name)
bias[layer_name] = tf.Variable(tf.constant(0.1, shape=[layer_sizes[layer]]))
print weights
print bias
In [8]:
# build the model
# dicts to store intermediate values
matMuls = {}
relu = {}
previous_layer = X
for layer in range(1,len(layer_sizes)):
layer_name = 'FC' + str(layer)
matMuls[layer_name] = tf.matmul(previous_layer,weights[layer_name]) + bias[layer_name]
#apply relu
relu[layer_name] = tf.nn.relu(matMuls[layer_name])
previous_layer = relu[layer_name]
# print the shapes
print('{} * {} = {}'.format(previous_layer.shape,weights[layer_name].shape, matMuls[layer_name].shape))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=Y,logits = matMuls[layer_name]))
# regularization loss
for ii in weights:
loss += reg * tf.nn.l2_loss(weights[ii])
predictions = tf.nn.softmax(logits = matMuls[layer_name])
num_correct = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(predictions,1), tf.argmax(Y,1)),tf.float32))
In [9]:
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(loss)
init = tf.global_variables_initializer()
In [10]:
# loss history
losses_train = np.array([])
losses_test = np.array([])
# accuracy history
acc_train = np.array([])
acc_test = np.array([])
saver = tf.train.Saver()
iteration = -1 # counter
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in xrange(epochs):
for batch in xrange(num_batches_train):
iteration += 1
batch_start = batch*batch_size
batch_end = batch_start + batch_size
# We want different info based on the iteration.
# We dont actually need to compute everything on every iteration
if iteration % print_every == 0:
solveFor = [optimizer, loss, predictions, num_correct]
else:
solveFor = [optimizer, loss]
# run it
out = sess.run(solveFor,
feed_dict={X:train_data[batch_start:batch_end,:],
Y:train_labels[batch_start:batch_end]})
if iteration % print_every == 0:
_, l, pred, correct = out
acc_train = np.append(acc_train, correct.astype(np.float32)/float(batch_size))
else:
_, l = out
losses_train = np.append(losses_train,l)
if iteration % print_every == 0:
print('Iteration: {}/{}, batch {} on epoch {}, \tLoss: {:.2e}, Train Acc: {:.3f}'.format(iteration,
epochs*num_batches_train,
batch,
epoch,
l,
acc_train[-1]))
if iteration % test_every == 0:
l_test = 0
correct_test = 0
for test_it in xrange(num_batches_test):
batch_start = test_it*batch_size
batch_end = batch_start + batch_size
l, correct = sess.run([loss,num_correct],
feed_dict={X:test_data[batch_start:batch_end,:],
Y:test_labels[batch_start:batch_end]})
correct_test += correct
l_test += l
losses_train = np.append(losses_train,l_test.astype(np.float32)/float(num_batches_test))
acc_test = np.append(acc_test, correct_test.astype(np.float32)/float(num_test))
print('Testing Loss: {:.2e}\tTesting Accuracy: {:.3f}\n'.format(l_test, acc_test[-1]))
save_path = saver.save(sess, checkpoint_file)
print("Model saved in file: %s" % save_path)
In [11]:
# Visualize the cost and log cost from training
plt.rcParams['figure.figsize'] = (15,6)
plt.subplot(121)
plt.plot(np.linspace(0,iteration,losses_train.shape[0]),losses_train,label='Training Loss')
plt.plot(np.linspace(0,iteration,losses_test.shape[0]),losses_test,label='Testing Loss')
plt.title('Cost')
plt.xlabel('Iteration Number')
plt.ylabel('Cost')
plt.subplot(122)
plt.plot(np.log(np.linspace(1,iteration,losses_train.shape[0])),np.log(losses_train),label='Training Loss')
plt.plot(np.log(np.linspace(1,iteration,losses_test.shape[0])),np.log(losses_test),label='Testing Loss')
plt.title('Log Cost')
plt.xlabel('log(Iteration Number)')
plt.ylabel('log(Cost)')
Out[11]:
In [12]:
# See how the accuracy plays out between the test and training set
plt.rcParams['figure.figsize'] = (15,4)
plt.figure()
plt.plot(np.linspace(0,iteration,acc_train.shape[0]),acc_train,label = 'Train Accuracy')
plt.plot(np.linspace(0,iteration,acc_test.shape[0]),acc_test,label = 'Test Accuracy')
plt.title('Accuracy')
plt.xlabel('Iteration Number')
plt.ylabel('Accuracy')
plt.legend()
Out[12]:
In [13]:
# Generate predictions for the test and train set
saver = tf.train.Saver()
pred_train = np.zeros((num_train,10))
pred_test = np.zeros((num_test,10))
with tf.Session() as sess:
# load the model from before
saver.restore(sess, checkpoint_file)
print('Finished loading the model')
# batch over the entire training set and record predictions
for it in xrange(num_batches_train):
batch_start = it*batch_size
batch_end = batch_start + batch_size
pred_train[batch_start:batch_end,:] = sess.run(predictions,
feed_dict={X:train_data[batch_start:batch_end,:],
Y:train_labels[batch_start:batch_end]})
print('Finished predictions for training data')
# batch over the entire testing set and record predictions
for it in xrange(num_batches_test):
batch_start = it*batch_size
batch_end = batch_start + batch_size
pred_test[batch_start:batch_end,:] = sess.run(predictions,
feed_dict={X:test_data[batch_start:batch_end,:],
Y:test_labels[batch_start:batch_end]})
print('Finished predictions for testing data')
print('Done!')
In [14]:
def error_rate(predictions, labels):
"""Return the error rate and confusions."""
actual = np.argmax(labels,1)
pred = np.argmax(predictions,1)
correct = np.sum(pred == actual)
total = pred.shape[0]
error = 100.0 - (100 * float(correct) / float(total))
confusions = np.zeros([10, 10], np.float32)
for ii in xrange(predictions.shape[0]):
confusions[pred[ii], actual[ii]] += 1
return error, confusions
In [15]:
# Make confusion plot of the training set
plt.rcParams['figure.figsize'] = (20, 12)
error_train, confusions_train = error_rate(pred_train, train_labels)
print('Train error: %.1f%%' % error_train)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.grid(False)
plt.xticks(np.arange(10))
plt.yticks(np.arange(10))
plt.imshow(confusions_train, cmap=plt.cm.jet, interpolation='nearest');
for i, cas in enumerate(confusions_train):
for j, count in enumerate(cas):
if count > 0:
xoff = .07 * len(str(count))
plt.text(j-xoff, i+.2, int(count), fontsize=12, color='white')
In [16]:
# Make confusion plot of the test set
plt.rcParams['figure.figsize'] = (20, 12)
error_test, confusions_test = error_rate(pred_test, test_labels)
print('Test error: %.1f%%' % error_test)
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.grid(False)
plt.xticks(np.arange(10))
plt.yticks(np.arange(10))
plt.imshow(confusions_train, cmap=plt.cm.jet, interpolation='nearest');
for i, cas in enumerate(confusions_test):
for j, count in enumerate(cas):
if count > 0:
xoff = .07 * len(str(count))
plt.text(j-xoff, i+.2, int(count), fontsize=12, color='white')
In [ ]:
In [ ]: