In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

In [2]:
trainx = pd.read_csv("usps_trainx.data", header=None, delimiter=r"\s+")
trainy = pd.read_csv("usps_trainy.data", header=None, delimiter=r"\s+")
testx = pd.read_csv("usps_testx.data", header=None, delimiter=r"\s+")
testy = pd.read_csv("usps_testy.data", header=None, delimiter=r"\s+")

im_height = 16 #image height
im_width = 16 #image width
num_classes = 10 #number of classes for classificaiton

#normalise the images
trainx = trainx / 256
testx  = testx  / 256

#transform pandas to numpy arrays
trainx = trainx.as_matrix()
trainy = trainy.as_matrix()
testx = testx.as_matrix()
testy = testy.as_matrix()

#remove the second dimensions:
trainy = trainy[:,0]
testy = testy[:,0]

#randomly shuffle the train data
import random
random_idx = random.sample([x for x in range(len(trainx))],len(trainx))
trainx = trainx[random_idx,]
trainy = trainy[random_idx]

In [3]:
# Transform the images in a 16x16 form; the resulting tensor would be 2000x16x16 both for training and for test data.
# Simplified by Dr Seth Flaxman
trainx = trainx.reshape(len(trainx),16,16)
testx = testx.reshape(len(trainx),16,16)

In [4]:
# 1-hot encode the labels trainy and testy
# Simplified by Dr Seth Flaxman
trainy = pd.get_dummies(trainy).as_matrix()
testy = pd.get_dummies(testy).as_matrix()

In [5]:
# Some notation for convenience
train_size = len(trainx)
test_size = len(testx)

In [6]:
# Extract batches funciton that returns a dictionary ready to be fed to the model

def next_batch(data_x,data_y,indices):
    return data_x[indices,:,:] , data_y[indices,:]

In [7]:
# Defining the convolution operation (with relu activation), and the maxpool operation
# https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/convolutional_network.ipynb

def conv2d(x, W, b, strides=1):
    # Conv2D wrapper, with bias and relu activation
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

def maxpool2d(x, k=2, s=2):
    # MaxPool2D wrapper
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, s, s, 1],
                          padding='SAME')

In [11]:
# CONSTRUCT THE COMPUTATIONAL GRAPH


# Introduce some hyperparameters

batch_size = 20
# an epoch is a single run over all the training data
num_epochs = 150 
learning_rate = 0.001
# applying dropout makes sure that the model doesn't rely on any specific parameter, and is thus more robust and prevents overfitting
dropout = 1 # probability to keep a parameter
# l2 - regularisation parameter
alpha = 0

c = 5 #convolution window size
stride = 2 #stride of the convolution #DON'T CHANGE
conv1_size = 128 #number of outputs from the conv layer

c2 = 5 #2nd convolution window size
stride_2 = 2 #stride of the 2nd convolution #DON'T CHANGE
conv2_size = 128 #number of outputs from the 2nd conv layer

c3= 5 #3rd convolution window size
stride_3 = 2 #stride of the 2nd convolution #DON'T CHANGE
conv3_size = 128 #number of outputs from the 2nd conv layer

full_conn_size = 2048 #number of nodes in the fully connected layer


# Construct the graph

compute_graph = tf.Graph()
with compute_graph.as_default():
    
    #introduce placeholders for the data(images and labels) to later feed batches into
    X = tf.placeholder(tf.float32, shape=(batch_size,im_height,im_width)) #images
    y = tf.placeholder(tf.int32, shape=(batch_size,num_classes)) #labels

    
    #transform input into a 4d tensor to include the colour channels (TensorFlow demands that)
    X_prime = tf.reshape(X, shape=[-1, im_height, im_width, 1])
    
    #define the parameters(variables) of the model
    #for the convolution layer:
    W1 = tf.Variable(tf.random_normal([c, c, 1, conv1_size]))
    b1 = tf.Variable(tf.random_normal([conv1_size]))
    #for the second convolution layer:
    W2 = tf.Variable(tf.random_normal([c2, c2, conv1_size, conv2_size]))
    b2 = tf.Variable(tf.random_normal([conv2_size]))
    #for the third convolution layer
    W3 = tf.Variable(tf.random_normal([c3, c3, conv2_size, conv3_size]))
    b3 = tf.Variable(tf.random_normal([conv3_size]))
    #for the fully connected layer:
    W4 = tf.Variable(tf.random_normal([2*2*conv3_size, full_conn_size]))
    b4 = tf.Variable(tf.random_normal([full_conn_size]))
    #for the output:
    WO = tf.Variable(tf.random_normal([full_conn_size, num_classes]))
    bO = tf.Variable(tf.random_normal([num_classes]))
    
    
    #define the model structure
    # c x c convolution, 1 input, conv1_size outputs
    convolution = conv2d(X_prime, W1, b1, stride)
    # then apply pooling
    pool = maxpool2d(convolution,2,1)
    
    #one more convolution+pooling layer:
    convolution2 = conv2d(pool, W2, b2, stride_2)
    # then apply pooling
    pool2 = maxpool2d(convolution2,2,1)
    
    #one more convolution+pooling layer:
    convolution3 = conv2d(pool2, W3, b3, stride_3)
    # then apply pooling
    pool3 = maxpool2d(convolution3,2,1)
    
    
    #add a fully connected layer
    # Reshape convolution output to fit the fully connected layer input
    full_conn = tf.reshape(pool3, [-1, 2*2*conv3_size])
    full_conn = tf.add(tf.matmul(full_conn, W4), b4)
    full_conn = tf.nn.relu(full_conn)
    # The dropout is applied (almost) at the end of the neural network
    full_conn = tf.nn.dropout(full_conn, dropout)

    # Output, class prediction
    predictions = tf.add(tf.matmul(full_conn, WO), bO)
    
    # Define loss and optimizer
    # To the cost add the l2_loss of all the weights (but not the biases)
    cost = (tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=predictions, labels=y)) + \
            alpha * (tf.nn.l2_loss(WO) + tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3)))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    
    # Evaluate model
    # https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/convolutional_network.ipynb
    correct_pred = tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
""""   
    # Feed Dict and RUN SESSION
    num_batches = int(len(trainx)/batch_size)

    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)
    # Feed the batches one by one
    for t in range(num_epochs):
        for i in range(num_batches):
            batch_X, batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
            sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
"""

#    for _ in range(1000):
#        batch_xs, batch_ys = mnist.train.next_batch(100)
#        sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})


Out[11]:
'"   \n    # Feed Dict and RUN SESSION\n    num_batches = int(len(trainx)/batch_size)\n\n    sess = tf.Session()\n    tf.global_variables_initializer().run(session=sess)\n    # Feed the batches one by one\n    for t in range(num_epochs):\n        for i in range(num_batches):\n            batch_X, batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])\n            sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})\n'

In [ ]:
# TUNE HYPERPARAMETERS
# Note: this can take a while to run
# Extract (from the test set) a small validation set to test the model accuracy while tuning the hyperparameters

valid_size = 200

trainx_ = trainx[0:train_size-valid_size]
trainy_ = trainy[0:train_size-valid_size]
validx = trainx[train_size-valid_size:]
validy = trainy[train_size-valid_size:]

num_train_batches = int(len(trainx_)/batch_size)
train_accuracy = np.zeros(num_train_batches)

num_valid_batches = int(len(validx)/batch_size)
valid_accuracy = np.zeros(num_valid_batches)


# Initialise all variables and run the session:
with tf.Session(graph=compute_graph) as sess:
    tf.initialize_all_variables().run(session=sess)
    
    # Feed the batches one by one
    for t in range(num_epochs):
        for i in range(num_train_batches):
            batch_X, batch_y = next_batch(trainx_,trainy_,[x for x in range(train_size-valid_size)][i*batch_size:(i+1)*batch_size])
            sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
            train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
        
        # report train/valid accuracy regularly in order to perform early stoping
        if((t+1) % 10 == 0 or ((t+1) % 5 == 0 and (t+1) <= 200) or (t+1) <= 10):
            print("Train accuracy at epoch", t+1, "is:", sum(train_accuracy)/num_train_batches)
            # validation accuracy
            for i in range(num_valid_batches):
                batch_X, batch_y = next_batch(validx,validy,[x for x in range(valid_size)][i*batch_size:(i+1)*batch_size])
                valid_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
            print("Valid accuracy at epoch", t+1, "is:", sum(valid_accuracy)/num_valid_batches)


WARNING:tensorflow:From <ipython-input-9-5c319fcec616>:21 in <module>.: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
Train accuracy at epoch 1 is: 0.493333332737
Valid accuracy at epoch 1 is: 0.635000002384
Train accuracy at epoch 2 is: 0.787222224474
Valid accuracy at epoch 2 is: 0.764999997616
Train accuracy at epoch 3 is: 0.865555553966
Valid accuracy at epoch 3 is: 0.814999991655
Train accuracy at epoch 4 is: 0.923333324989
Valid accuracy at epoch 4 is: 0.805000007153
Train accuracy at epoch 5 is: 0.938888881604
Valid accuracy at epoch 5 is: 0.839999997616
Train accuracy at epoch 6 is: 0.944999994834
Valid accuracy at epoch 6 is: 0.815000003576
Train accuracy at epoch 7 is: 0.951111106078
Valid accuracy at epoch 7 is: 0.819999998808
Train accuracy at epoch 8 is: 0.958333327373
Valid accuracy at epoch 8 is: 0.795000004768
Train accuracy at epoch 9 is: 0.973333329625
Valid accuracy at epoch 9 is: 0.860000008345
Train accuracy at epoch 10 is: 0.970555549198
Valid accuracy at epoch 10 is: 0.864999997616
Train accuracy at epoch 15 is: 0.994999998808
Valid accuracy at epoch 15 is: 0.890000003576
Train accuracy at epoch 20 is: 0.993888888094
Valid accuracy at epoch 20 is: 0.87500000596
Train accuracy at epoch 25 is: 0.992222220368
Valid accuracy at epoch 25 is: 0.885000002384
Train accuracy at epoch 30 is: 0.996111110184
Valid accuracy at epoch 30 is: 0.934999996424
Train accuracy at epoch 35 is: 0.994999998808
Valid accuracy at epoch 35 is: 0.904999989271
Train accuracy at epoch 40 is: 0.994999998808
Valid accuracy at epoch 40 is: 0.944999992847
Train accuracy at epoch 45 is: 0.996111110184
Valid accuracy at epoch 45 is: 0.909999990463
Train accuracy at epoch 50 is: 1.0
Valid accuracy at epoch 50 is: 0.894999992847
Train accuracy at epoch 55 is: 0.99722222156
Valid accuracy at epoch 55 is: 0.929999989271
Train accuracy at epoch 60 is: 0.999444444312
Valid accuracy at epoch 60 is: 0.92499999404
Train accuracy at epoch 65 is: 0.997777777248
Valid accuracy at epoch 65 is: 0.939999991655
Train accuracy at epoch 70 is: 1.0
Valid accuracy at epoch 70 is: 0.929999989271
Train accuracy at epoch 75 is: 0.995555554496
Valid accuracy at epoch 75 is: 0.879999995232
Train accuracy at epoch 80 is: 1.0
Valid accuracy at epoch 80 is: 0.939999991655
Train accuracy at epoch 85 is: 0.999444444312
Valid accuracy at epoch 85 is: 0.934999984503
Train accuracy at epoch 90 is: 0.994999998808
Valid accuracy at epoch 90 is: 0.949999988079
Train accuracy at epoch 95 is: 0.995555554496
Valid accuracy at epoch 95 is: 0.919999986887
Train accuracy at epoch 100 is: 1.0
Valid accuracy at epoch 100 is: 0.939999991655
Train accuracy at epoch 105 is: 1.0
Valid accuracy at epoch 105 is: 0.939999991655
Train accuracy at epoch 110 is: 1.0
Valid accuracy at epoch 110 is: 0.939999991655

In [46]:
# TUNE HYPERPARAMETERS
# Do cross-validation to check the model performance and tune the parameters (num layers, layer types, batch size, etc.)
# Note: it's not wise to use the test set in hyperparameter tuning
# Note: cross-validation is much slower than fixed-set validation

#num_cross_valid = 10 #number of cross-validation sectors
#train_accuracy = np.zeros(num_cross_valid)

# Train the model on each 9 of the 10 train data sectors, while recording the test accuracy on the remaining segment
#for i in range(num_cross_valid):
#    train_batch_X, train_batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
#    sess.run(optimizer, feed_dict={X: train_batch_X, y: train_batch_y})
#    test_batch_X, test_batch_y = next_batch(trainx,trainy,[x for x in range(len(trainx))][i*batch_size:(i+1)*batch_size])
#    train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})

In [12]:
# NOW TRAIN THE MODEL OVER THE WHOLE DATASET AND REPORT TEST ACCURACY
# Note: this can take a while to run

num_batches = int(train_size/batch_size)
train_accuracy = np.zeros(num_batches)

num_test_batches = int(test_size/batch_size)
accuracy_ = np.zeros(num_test_batches)

# Initialise all variables and run the session
with tf.Session(graph=compute_graph) as sess:
    tf.initialize_all_variables().run(session=sess)

    # Feed the batches one by one
    for t in range(num_epochs):
        for i in range(num_batches):
            batch_X, batch_y = next_batch(trainx,trainy,[x for x in range(train_size)][i*batch_size:(i+1)*batch_size])
            sess.run(optimizer, feed_dict={X: batch_X, y: batch_y})
            train_accuracy[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
        if((t+1) % 20 == 0 or ((t+1) % 5 == 0 and (t+1) <= 100) or (t+1) <= 10):
            print("The train accuracy at epoch", t+1, "is:", sum(train_accuracy)/num_batches)
            
    
        
    
    for i in range(num_test_batches):
        batch_X, batch_y = next_batch(testx,testy,[x for x in range(len(testx))][i*batch_size:(i+1)*batch_size])
        accuracy_[i] = sess.run(accuracy, feed_dict={X: batch_X, y: batch_y})
        
    print("The test accuracy is ",sum(accuracy_)/num_test_batches)


WARNING:tensorflow:From <ipython-input-12-267ffd6154d4>:12 in <module>.: initialize_all_variables (from tensorflow.python.ops.variables) is deprecated and will be removed after 2017-03-02.
Instructions for updating:
Use `tf.global_variables_initializer` instead.
The train accuracy at epoch 1 is: 0.541999997944
The train accuracy at epoch 2 is: 0.798000003099
The train accuracy at epoch 3 is: 0.853999999762
The train accuracy at epoch 4 is: 0.893499996066
The train accuracy at epoch 5 is: 0.930499995351
The train accuracy at epoch 6 is: 0.961999991536
The train accuracy at epoch 7 is: 0.966499993801
The train accuracy at epoch 8 is: 0.971999994516
The train accuracy at epoch 9 is: 0.963499994278
The train accuracy at epoch 10 is: 0.975499994755
The train accuracy at epoch 15 is: 0.991999998093
The train accuracy at epoch 20 is: 0.991499997973
The train accuracy at epoch 25 is: 0.990499997735
The train accuracy at epoch 30 is: 0.994999998808
The train accuracy at epoch 35 is: 0.994999998808
The train accuracy at epoch 40 is: 0.996999999285
The train accuracy at epoch 45 is: 0.996499999166
The train accuracy at epoch 50 is: 0.997499999404
The train accuracy at epoch 55 is: 0.996999999285
The train accuracy at epoch 60 is: 0.997499999404
The train accuracy at epoch 65 is: 0.998499999642
The train accuracy at epoch 70 is: 1.0
The train accuracy at epoch 75 is: 0.998999999762
The train accuracy at epoch 80 is: 0.997999999523
The train accuracy at epoch 85 is: 0.999499999881
The train accuracy at epoch 90 is: 1.0
The train accuracy at epoch 95 is: 1.0
The train accuracy at epoch 100 is: 1.0
The train accuracy at epoch 120 is: 1.0
The train accuracy at epoch 140 is: 1.0
The test accuracy is  0.945999994874

In [ ]: