This is a quick illustration of a single-layer network training on the MNIST data.
( Credit for the original workbook : Aymeric Damien :: https://github.com/aymericdamien/TensorFlow-Examples )
In [ ]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline
import gzip
import pickle
In [ ]:
# Seed for reproducibility
np.random.seed(42)
In [ ]:
# Download the MNIST digits dataset (only if not present locally)
import os
import urllib.request
mnist_data = './data/MNIST'
mnist_pkl_gz = mnist_data+'/mnist.pkl.gz'
if not os.path.isfile(mnist_pkl_gz):
if not os.path.exists(mnist_data):
os.makedirs(mnist_data)
print("Downloading MNIST data file")
urllib.request.urlretrieve(
'http://deeplearning.net/data/mnist/mnist.pkl.gz',
mnist_pkl_gz)
print("MNIST data file available locally")
In [ ]:
# Load training and test splits as numpy arrays
train, val, test = pickle.load(gzip.open(mnist_pkl_gz), encoding='iso-8859-1')
X_train, y_train = train
X_val, y_val = val
X_test, y_test = test
In [ ]:
# The original 28x28 pixel images are flattened into 784 dimensional feature vectors
X_train.shape
In [ ]:
# Plot the first few examples
plt.figure(figsize=(12,3))
for i in range(10):
plt.subplot(1, 10, i+1)
plt.imshow(X_train[i].reshape((28, 28)), cmap='gray', interpolation='nearest')
plt.axis('off')
In [ ]:
# Network Parameters
n_input = 784 # MNIST data input (img shape: 28*28)
n_hidden_1 = 256 # 1st layer number of features
n_hidden_2 = 256 # 2nd layer number of features
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
x = tf.placeholder("float32", [None, n_input], name='x_input')
#y = tf.placeholder("int32", [None, n_classes], name='y_target') # originally, a one-hot label
y = tf.placeholder("int32", [ None, ], name='y_target') # This is the label index instead
In [ ]:
# Create model
def multilayer_perceptron(x, layer):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(x, layer['h1']['weights']), layer['h1']['bias'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with RELU activation
layer_2 = tf.add(tf.matmul(layer_1, layer['h2']['weights']), layer['h2']['bias'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, layer['out']['weights']) + layer['out']['bias']
return out_layer
In [ ]:
# Store layers weight & bias
layers=dict(
h1 = {
'weights':tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'bias' :tf.Variable(tf.random_normal([n_hidden_1])),
},
h2 = {
'weights':tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'bias' :tf.Variable(tf.random_normal([n_hidden_2])),
},
out = {
'weights':tf.Variable(tf.random_normal([n_hidden_2, n_classes])),
'bias' :tf.Variable(tf.random_normal([n_classes])),
},
)
# Construct model
logits = multilayer_perceptron(x, layers)
#pred = tf.argmax(logits, axis=1) # being deprecated
pred = tf.arg_max(logits, 1)
#pred = tf.reshape( tf.arg_max(logits, 1), [-1])
In [ ]:
# Define optimizer for the labels (expressed as a onehot encoding)
labels = tf.one_hot(indices=y, depth=10)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels))
In [ ]:
# Parameters for the training phase
learning_rate = 0.001
TRAINING_EPOCHS = 10
BATCH_SIZE = 100
display_step = 1
In [ ]:
# Define optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
#optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate).minimize(cost)
In [ ]:
# Define an 'op' that initializes the variables
init = tf.global_variables_initializer()
In [ ]:
# We'll choose a batch size, and calculate the number of batches in an "epoch"
# (approximately one pass through the data).
N_BATCHES = len(X_train) // BATCH_SIZE
#N_VAL_BATCHES = len(X_val) // BATCH_SIZE
In [ ]:
# For training, we want to sample examples at random in small batches
def batch_gen(X_, y_, N):
while True:
idx = np.random.choice(len(y_), N)
yield X_[idx], y_[idx]
In [ ]:
# Minibatch generator(s) for the training and validation sets
train_batches = batch_gen(X_train, y_train, BATCH_SIZE)
#val_batches = batch_gen(X_val, y_val, BATCH_SIZE)
In [ ]:
# Try sampling from the batch generator.
# Plot an image and corresponding label from the training batcher to verify they match.
X_batch, y_batch = next(train_batches)
plt.imshow(X_batch[0].reshape((28, 28)), cmap='gray', interpolation='nearest')
print(y_batch[0])
In [ ]:
# Plot an image and corresponding label from the validation set to verify they match.
X_batch, y_batch = X_val, y_val
plt.imshow(X_batch[0].reshape((28, 28)), cmap='gray', interpolation='nearest')
print(y_batch[0])
X_batch.shape, y_batch.shape,
In [ ]:
#correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1)) # with one-hots
correct_prediction = tf.equal(pred, tf.cast(y, tf.int64)) # with indices
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float32"))
For each epoch, we call the training function N_BATCHES times, accumulating an estimate of the training loss and accuracy.
TODO : Print out the ratio of loss in the validation set vs the training set to help recognize overfitting.
In [ ]:
# Launch the graph
with tf.Session() as sess:
sess.run(init) # Running this 'op' initialises the network weights
# Training cycle
for epoch in range(TRAINING_EPOCHS):
avg_cost = 0.
# Loop over all batches
for _ in range(N_BATCHES):
batch_x, batch_y = next(train_batches)
#print(batch_x.shape, batch_y.shape)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([optimizer, cost], feed_dict={x:batch_x, y:batch_y})
# Compute average loss
avg_cost += c / N_BATCHES
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1), "cost=","{:.2f}".format(avg_cost))
print("Optimization Finished!")
# Test model
accuracy_, y_, pred_ = sess.run([accuracy, y, pred ], feed_dict={x:X_val[0:10], y:y_val[0:10] })
print("Validation Accuracy: %.2f%% for first 10 examples" % ( 100. * accuracy_, ))
#print(y_)
#print(pred_)
print("Validation Accuracy: %.2f%%" % ( 100. * accuracy.eval({ x: X_val, y: y_val, }),))
print("DONE")
In [ ]:
# This proves that when the sessions are done, the network is thrown away...
#with tf.Session() as sess:
# accuracy_, y_, pred_ = sess.run([accuracy, y, pred ], feed_dict={x:X_test[0:100], y:y_test[0:100] })
# print("Test Accuracy: %.2f%% for first 100 examples" % ( 100. * accuracy_, ))
#print("DONE")
In [ ]:
#weights = l_out.W.get_value()
#print(weights.shape)
#with tf.Session() as sess:
# accuracy_, y_, pred_ = sess.run([accuracy, y, pred ], feed_dict={x:X_val[0:10], y:y_val[0:10] })
In [ ]:
#plt.figure(figsize=(12,3))
#for i in range(10):
# plt.subplot(1, 10, i+1)
# plt.imshow(weights[:,i].reshape((28, 28)), cmap='gray', interpolation='nearest')
# plt.axis('off')
In [ ]: