In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
In [2]:
trainx = pd.read_csv("usps_trainx.data", header=None, delimiter=r"\s+")
trainy = pd.read_csv("usps_trainy.data", header=None, delimiter=r"\s+")
testx = pd.read_csv("usps_testx.data", header=None, delimiter=r"\s+")
testy = pd.read_csv("usps_testy.data", header=None, delimiter=r"\s+")
im_height = 16 #image height
im_width = 16 #image width
num_classes = 10 #number of classes for classificaiton
#normalise the images
trainx = trainx / 256
testx = testx / 256
#transform pandas to numpy arrays
trainx = trainx.as_matrix()
trainy = trainy.as_matrix()
testx = testx.as_matrix()
testy = testy.as_matrix()
#remove the second dimensions:
trainy = trainy[:,0]
testy = testy[:,0]
#randomly shuffle the train data
import random
random_idx = random.sample([x for x in range(len(trainx))],len(trainx))
trainx = trainx[random_idx,]
trainy = trainy[random_idx]
In [3]:
# Transform the images in a 16x16 form; the resulting tensor would be 2000x16x16 both for training and for test data.
# Simplified by Dr Seth Flaxman
trainx = trainx.reshape(len(trainx),16,16)
testx = testx.reshape(len(trainx),16,16)
In [4]:
# 1-hot encode the labels trainy and testy
# Simplified by Dr Seth Flaxman
trainy = pd.get_dummies(trainy).as_matrix()
testy = pd.get_dummies(testy).as_matrix()
In [5]:
# Some notation for convenience
train_size = len(trainx)
test_size = len(testx)
In [6]:
# Extract batches funciton that returns a dictionary ready to be fed to the model
def next_batch(data_x,data_y,indices):
return data_x[indices,:,:] , data_y[indices,:]
In [7]:
# Defining the convolution operation (with relu activation), and the maxpool operation
# https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/convolutional_network.ipynb
def conv2d(x, W, b, strides=1):
# Conv2D wrapper, with bias and relu activation
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2, s=2):
# MaxPool2D wrapper
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, s, s, 1],
padding='SAME')
In [11]:
# CONSTRUCT THE COMPUTATIONAL GRAPH
# Introduce some hyperparameters
batch_size = 20
# an epoch is a single run over all the training data
num_epochs = 150
learning_rate = 0.001
# applying dropout makes sure that the model doesn't rely on any specific parameter, and is thus more robust and prevents overfitting
dropout = 1 # probability to keep a parameter
# l2 - regularisation parameter
alpha = 0
c = 5 #convolution window size
stride = 2 #stride of the convolution #DON'T CHANGE
conv1_size = 128 #number of outputs from the conv layer
c2 = 5 #2nd convolution window size
stride_2 = 2 #stride of the 2nd convolution #DON'T CHANGE
conv2_size = 128 #number of outputs from the 2nd conv layer
c3= 5 #3rd convolution window size
stride_3 = 2 #stride of the 2nd convolution #DON'T CHANGE
conv3_size = 128 #number of outputs from the 2nd conv layer
full_conn_size = 2048 #number of nodes in the fully connected layer
# Construct the graph
compute_graph = tf.Graph()
with compute_graph.as_default():
#introduce placeholders for the data(images and labels) to later feed batches into
X = tf.placeholder(tf.float32, shape=(batch_size,im_height,im_width)) #images
y = tf.placeholder(tf.int32, shape=(batch_size,num_classes)) #labels
#transform input into a 4d tensor to include the colour channels (TensorFlow demands that)
X_prime = tf.reshape(X, shape=[-1, im_height, im_width, 1])
#define the parameters(variables) of the model
#for the convolution layer:
W1 = tf.Variable(tf.random_normal([c, c, 1, conv1_size]))
b1 = tf.Variable(tf.random_normal([conv1_size]))
#for the second convolution layer:
W2 = tf.Variable(tf.random_normal([c2, c2, conv1_size, conv2_size]))
b2 = tf.Variable(tf.random_normal([conv2_size]))
#for the third convolution layer
W3 = tf.Variable(tf.random_normal([c3, c3, conv2_size, conv3_size]))
b3 = tf.Variable(tf.random_normal([conv3_size]))
#for the fully connected layer:
W4 = tf.Variable(tf.random_normal([2*2*conv3_size, full_conn_size]))
b4 = tf.Variable(tf.random_normal([full_conn_size]))
#for the output:
WO = tf.Variable(tf.random_normal([full_conn_size, num_classes]))
bO = tf.Variable(tf.random_normal([num_classes]))
#define the model structure
# c x c convolution, 1 input, conv1_size outputs
convolution = conv2d(X_prime, W1, b1, stride)
# then apply pooling
pool = maxpool2d(convolution,2,1)
#one more convolution+pooling layer:
convolution2 = conv2d(pool, W2, b2, stride_2)
# then apply pooling
pool2 = maxpool2d(convolution2,2,1)
#one more convolution+pooling layer:
convolution3 = conv2d(pool2, W3, b3, stride_3)
# then apply pooling
pool3 = maxpool2d(convolution3,2,1)
#add a fully connected layer
# Reshape convolution output to fit the fully connected layer input
full_conn = tf.reshape(pool3, [-1, 2*2*conv3_size])
full_conn = tf.add(tf.matmul(full_conn, W4), b4)
full_conn = tf.nn.relu(full_conn)
# The dropout is applied (almost) at the end of the neural network
full_conn = tf.nn.dropout(full_conn, dropout)
# Output, class prediction
predictions = tf.add(tf.matmul(full_conn, WO), bO)
# Define loss and optimizer
# To the cost add the l2_loss of all the weights (but not the biases)
cost = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y)) + \
alpha * (tf.nn.l2_loss(WO) + tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3)))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
# https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/convolutional_network.ipynb
correct_pred = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
""""
# Feed Dict and RUN SESSION
num_batches = int(len(trainx)/batch_size)
sess = tf.Session()
tf.global_variables_initializer().run(session=sess)
# Feed the batches one by one
for t in range(num_epochs):
for i in range(num_batches):
batch_X, batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
"""
# for _ in range(1000):
# batch_xs, batch_ys = mnist.train.next_batch(100)
# sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
Out[11]:
In [ ]:
# TUNE HYPERPARAMETERS
# Note: this can take a while to run
# Extract (from the test set) a small validation set to test the model accuracy while tuning the hyperparameters
valid_size = 200
trainx_ = trainx[0:train_size-valid_size]
trainy_ = trainy[0:train_size-valid_size]
validx = trainx[train_size-valid_size:]
validy = trainy[train_size-valid_size:]
num_train_batches = int(len(trainx_)/batch_size)
train_accuracy = np.zeros(num_train_batches)
num_valid_batches = int(len(validx)/batch_size)
valid_accuracy = np.zeros(num_valid_batches)
# Initialise all variables and run the session:
with tf.Session(graph=compute_graph) as sess:
tf.initialize_all_variables().run(session=sess)
# Feed the batches one by one
for t in range(num_epochs):
for i in range(num_train_batches):
batch_X, batch_y = next_batch(trainx_,trainy_,[x for x in range(train_size-valid_size)][i*batch_size:(i+1)*batch_size])
sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
# report train/valid accuracy regularly in order to perform early stoping
if((t+1) % 10 == 0 or ((t+1) % 5 == 0 and (t+1) <= 200) or (t+1) <= 10):
print("Train accuracy at epoch", t+1, "is:", sum(train_accuracy)/num_train_batches)
# validation accuracy
for i in range(num_valid_batches):
batch_X, batch_y = next_batch(validx,validy,[x for x in range(valid_size)][i*batch_size:(i+1)*batch_size])
valid_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
print("Valid accuracy at epoch", t+1, "is:", sum(valid_accuracy)/num_valid_batches)
In [46]:
# TUNE HYPERPARAMETERS
# Do cross-validation to check the model performance and tune the parameters (num layers, layer types, batch size, etc.)
# Note: it's not wise to use the test set in hyperparameter tuning
# Note: cross-validation is much slower than fixed-set validation
#num_cross_valid = 10 #number of cross-validation sectors
#train_accuracy = np.zeros(num_cross_valid)
# Train the model on each 9 of the 10 train data sectors, while recording the test accuracy on the remaining segment
#for i in range(num_cross_valid):
# train_batch_X, train_batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
# sess.run(optimizer, feed_dict={X: train_batch_X, y: train_batch_y})
# test_batch_X, test_batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
# train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
In [12]:
# NOW TRAIN THE MODEL OVER THE WHOLE DATASET AND REPORT TEST ACCURACY
# Note: this can take a while to run
num_batches = int(train_size/batch_size)
train_accuracy = np.zeros(num_batches)
num_test_batches = int(test_size/batch_size)
accuracy_ = np.zeros(num_test_batches)
# Initialise all variables and run the session
with tf.Session(graph=compute_graph) as sess:
tf.initialize_all_variables().run(session=sess)
# Feed the batches one by one
for t in range(num_epochs):
for i in range(num_batches):
batch_X, batch_y = next_batch(trainx,trainy,[x for x in range(train_size)][i*batch_size:(i+1)*batch_size])
sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
if((t+1) % 20 == 0 or ((t+1) % 5 == 0 and (t+1) <= 100) or (t+1) <= 10):
print("The train accuracy at epoch", t+1, "is:", sum(train_accuracy)/num_batches)
for i in range(num_test_batches):
batch_X, batch_y = next_batch(testx,testy,[x for x in range(len(testx))][i*batch_size:(i+1)*batch_size])
accuracy_[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
print("The test accuracy is ",sum(accuracy_)/num_test_batches)
In [ ]: