In [5]:
import numpy as np
import tensorflow as tf
import sklearn
tf.__version__
Out[5]:
In [6]:
# get MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
In [7]:
# divide into train and test sets
X_train, Y_train, X_test, Y_test = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels
In [14]:
# Placeholders
X = tf.placeholder(tf.float32, [None, 784]) # 28 pixels * 28 pixels=784
Y = tf.placeholder(tf.float32, [None, 10]) # 0-9
# Variables
HIDDEN_LAYER_1_SIZE = 256 # should play around with these
HIDDEN_LAYER_2_SIZE = 128
with tf.variable_scope("mnist_3layers_L2_expt1"):
# note: with tf.get_variable() unlike initializing variables directly (W1 = tf.Variable(...)),
# variable names must be unique under a given namespace. If you get an error
# like "ValueError: Variable $variable_scope_name/$variable_name already exists, disallowed"
# at this stage, change your variable_scope name
W1 = tf.get_variable("hidden_weights_1", [784, HIDDEN_LAYER_1_SIZE],
initializer=tf.random_normal_initializer())
b1 = tf.get_variable("bias1", [HIDDEN_LAYER_1_SIZE],
initializer=tf.constant_initializer(0))
W2 = tf.get_variable("hidden_weights_2", [HIDDEN_LAYER_1_SIZE, HIDDEN_LAYER_2_SIZE],
initializer=tf.random_normal_initializer())
b2 = tf.get_variable("bias2", [HIDDEN_LAYER_2_SIZE],
initializer=tf.constant_initializer(0))
W3 = tf.get_variable("hidden_weights_3", [HIDDEN_LAYER_2_SIZE, 10],
initializer=tf.random_normal_initializer())
b3 = tf.get_variable("bias3", [10],
initializer=tf.constant_initializer(0))
# Model
hidden_layer_1 = tf.nn.relu(tf.matmul(X, W1) + b1)
hidden_layer_2 = tf.nn.relu(tf.matmul(hidden_layer_1, W2) + b2)
logits = tf.matmul(hidden_layer_2, W3) + b3
# Cost function
REGULARIZATION_CONSTANT = 0.01
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) \
+ REGULARIZATION_CONSTANT * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(W2) + tf.nn.l2_loss(W3))
# Optimization
LEARNING_RATE = 0.05
optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cost)
# Prediction
predict = tf.argmax(logits, 1)
# Train
NUM_ITERATIONS = 200 # honestly, 50 is enough
BATCH_SIZE = 128
with tf.Session() as sess:
# initialize variables
sess.run(tf.global_variables_initializer())
# train
for i in range(NUM_ITERATIONS):
# minibatch training
X_train, Y_train = sklearn.utils.shuffle(X_train, Y_train)
for start, end in zip(range(0, len(X_train), BATCH_SIZE), range(BATCH_SIZE, len(X_train)+1, BATCH_SIZE)):
sess.run(optimizer, feed_dict={X: X_train[start:end], Y: Y_train[start:end]})
# print train and test error
if i == 0 or i % 10 == 9:
# calculate train error
train_error = np.mean(np.argmax(Y_train, axis=1) == sess.run(predict, feed_dict={X: X_train}))
# calculate test error
test_error = np.mean(np.argmax(Y_test, axis=1) == sess.run(predict, feed_dict={X: X_test}))
print(i, train_error, test_error)