In [5]:
import numpy as np
import tensorflow as tf
import sklearn
tf.__version__


Out[5]:
'1.1.0'

In [6]:
# get MNIST data

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)


Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz

In [7]:
# divide into train and test sets
X_train, Y_train, X_test, Y_test = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels

In [14]:
# Placeholders
X = tf.placeholder(tf.float32, [None, 784]) # 28 pixels * 28 pixels=784
Y = tf.placeholder(tf.float32, [None, 10])  # 0-9

# Variables
HIDDEN_LAYER_1_SIZE = 256  # should play around with these
HIDDEN_LAYER_2_SIZE = 128

with tf.variable_scope("mnist_3layers_L2_expt1"):
    # note: with tf.get_variable() unlike initializing variables directly (W1 = tf.Variable(...)),
    #       variable names must be unique under a given namespace. If you get an error
    #       like "ValueError: Variable $variable_scope_name/$variable_name already exists, disallowed"
    #       at this stage, change your variable_scope name
    W1 = tf.get_variable("hidden_weights_1", [784, HIDDEN_LAYER_1_SIZE],
                         initializer=tf.random_normal_initializer())
    b1 = tf.get_variable("bias1", [HIDDEN_LAYER_1_SIZE],
                         initializer=tf.constant_initializer(0))
    W2 = tf.get_variable("hidden_weights_2", [HIDDEN_LAYER_1_SIZE, HIDDEN_LAYER_2_SIZE],
                         initializer=tf.random_normal_initializer())
    b2 = tf.get_variable("bias2", [HIDDEN_LAYER_2_SIZE],
                         initializer=tf.constant_initializer(0))
    W3 = tf.get_variable("hidden_weights_3", [HIDDEN_LAYER_2_SIZE, 10],
                         initializer=tf.random_normal_initializer())
    b3 = tf.get_variable("bias3", [10],
                         initializer=tf.constant_initializer(0))

# Model
hidden_layer_1 = tf.nn.relu(tf.matmul(X, W1) + b1)
hidden_layer_2 = tf.nn.relu(tf.matmul(hidden_layer_1, W2) + b2)
logits = tf.matmul(hidden_layer_2, W3) + b3

# Cost function
REGULARIZATION_CONSTANT = 0.01
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) \
     + REGULARIZATION_CONSTANT * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3))

# Optimization
LEARNING_RATE = 0.05
optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cost)

# Prediction
predict = tf.argmax(logits, 1)

# Train
NUM_ITERATIONS = 200  # honestly, 50 is enough
BATCH_SIZE = 128

with tf.Session() as sess:
    # initialize variables
    sess.run(tf.global_variables_initializer())
    
    # train
    for i in range(NUM_ITERATIONS):
        # minibatch training
        X_train, Y_train = sklearn.utils.shuffle(X_train, Y_train)
        for start, end in zip(range(0, len(X_train), BATCH_SIZE), range(BATCH_SIZE, len(X_train)+1, BATCH_SIZE)):
            sess.run(optimizer, feed_dict={X: X_train[start:end], Y: Y_train[start:end]})

        # print train and test error
        if i == 0 or i % 10 == 9:
            # calculate train error
            train_error = np.mean(np.argmax(Y_train, axis=1) == sess.run(predict, feed_dict={X: X_train}))
            # calculate test error
            test_error = np.mean(np.argmax(Y_test, axis=1) == sess.run(predict, feed_dict={X: X_test}))
            print(i, train_error, test_error)


0 0.465072727273 0.467
9 0.937545454545 0.9405
19 0.9486 0.9503
29 0.951327272727 0.9518
39 0.952327272727 0.9521
49 0.953418181818 0.9534
59 0.953854545455 0.9547
69 0.954254545455 0.9538
79 0.952454545455 0.9531
89 0.9514 0.9503
99 0.953836363636 0.954
109 0.952836363636 0.9531
119 0.953581818182 0.9514
129 0.954018181818 0.954
139 0.953054545455 0.9512
149 0.953709090909 0.953
159 0.953727272727 0.9547
169 0.954981818182 0.954
179 0.954254545455 0.9536
189 0.953909090909 0.9532
199 0.954654545455 0.9545