In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.linear_model import LogisticRegression
In [2]:
def accuracy(predictions, labels):
return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
/ predictions.shape[0])
# Reformat the dataset for the convolutional networks
def reformat(dataset):
dataset = dataset.reshape((-1, image_size, image_size, num_channels)).astype(np.float32)
return dataset
In [3]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
# The mnist images have a dimension of 28*28.
image_size = 28
# There are 10 labels.
num_labels = 10
train_dataset = mnist.train.images
train_labels = mnist.train.labels
perm = np.random.permutation(mnist.test.images.shape[0])
split_point = int(mnist.test.images.shape[0] * 0.1)
valid_dataset, test_dataset = mnist.test.images[:split_point], mnist.test.images[split_point:]
valid_labels, test_labels = mnist.test.labels[:split_point], mnist.test.labels[split_point:]
Let's set a baseline with a simple logistic regression, ommiting reguralization. Just to see where we are starting from.
In [7]:
train_labels_not_hot = np.nonzero(mnist.train.labels)[1]
test_labels_not_hot = np.nonzero(mnist.test.labels[split_point:])[1]
In [10]:
lr = LogisticRegression()
In [11]:
lr.fit(train_dataset, train_labels_not_hot)
lr.score(test_dataset, test_labels_not_hot)
Out[11]:
Building the model
In [12]:
batch_size = 128
graph = tf.Graph()
with graph.as_default():
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
with tf.name_scope('input'):
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size * image_size))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables.
with tf.name_scope('hidden'):
weights_hidden = tf.Variable(tf.truncated_normal([image_size * image_size, 1024], stddev=0.1),
name='weights')
biases_hidden = tf.Variable(tf.constant(0.1, shape=[1024]), name='biases')
relu_output = tf.nn.relu(tf.matmul(tf_train_dataset, weights_hidden) + biases_hidden)
with tf.name_scope('output'):
weights_output = tf.Variable(tf.truncated_normal([1024, num_labels], stddev=0.1), name='weights')
biases_output = tf.Variable(tf.constant(0.1, shape=[num_labels]), name='biases')
logits = tf.matmul(relu_output, weights_output) + biases_output
loss = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(
tf.matmul(tf_valid_dataset, weights_hidden) +
biases_hidden),
weights_output) +
biases_output)
test_prediction = tf.nn.softmax(tf.matmul(tf.nn.relu(
tf.matmul(tf_test_dataset, weights_hidden) +
biases_hidden),
weights_output) +
biases_output)
Training the model
In [13]:
num_steps = 3001
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print("Initialized")
for step in range(num_steps):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {tf_train_dataset : batch_data,
tf_train_labels : batch_labels}
_, l, predictions = session.run([optimizer, loss, train_prediction],
feed_dict=feed_dict)
if (step % 500 == 0):
print("Minibatch loss at step %d: %f" % (step, l))
print("Minibatch accuracy: %.1f%%" % accuracy(predictions, batch_labels))
print("Validation accuracy: %.1f%%" % accuracy(valid_prediction.eval(), valid_labels))
print("Test accuracy: %.1f%%" % accuracy(test_prediction.eval(), test_labels))
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
merged = tf.merge_all_summaries()
train_writer = tf.train.SummaryWriter('./train',
session.graph)
test_writer = tf.train.SummaryWriter('./test')
In [ ]:
batch_size = 128
beta = 0.001
graph = tf.Graph()
with graph.as_default():
# Input data. For the training data, we use a placeholder that will be fed
# at run time with a training minibatch.
with tf.name_scope('input'):
tf_train_dataset = tf.placeholder(tf.float32,
shape=(batch_size, image_size * image_size))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset)
tf_test_dataset = tf.constant(test_dataset)
# Variables.
keep_prob = tf.placeholder(tf.float32)
with tf.name_scope('hidden'):
weights_hidden = tf.Variable(tf.truncated_normal([image_size * image_size, 1024], stddev=0.1),
name='weights')
weights_hidden_dropped = tf.nn.dropout(weights_hidden, keep_prob)
biases_hidden = tf.Variable(tf.constant(0.1, shape=[1024]), name='biases')
relu_output = tf.nn.relu(tf.matmul(tf_train_dataset, weights_hidden_dropped) + biases_hidden)
with tf.name_scope('output'):
weights_output = tf.Variable(tf.truncated_normal([1024, num_labels], stddev=0.1), name='weights')
weights_output_dropped = tf.nn.dropout(weights_output, keep_prob)
biases_output = tf.Variable(tf.constant(0.1, shape=[num_labels]), name='biases')
logits = tf.matmul(relu_output, weights_output_dropped) + biases_output
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
loss = tf.reduce_mean( loss + beta * tf.nn.l2_loss(weights_output_dropped))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(
tf.matmul(
tf.nn.relu(tf.matmul(
tf_valid_dataset, weights_hidden) + biases_hidden),
weights_output) + biases_output)
test_prediction = tf.nn.softmax(
tf.matmul(
tf.nn.relu(tf.matmul(
tf_test_dataset, weights_hidden) + biases_hidden),
weights_output) + biases_output)
num_steps = 3001
for kp in np.arange(0.5,1,0.1):
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print("Initialized")
for step in range(num_steps):
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (train_labels.shape[0] - batch_size) % 10
# Generate a minibatch.
batch_data = train_dataset[offset:(offset + batch_size), :]
batch_labels = train_labels[offset:(offset + batch_size), :]
# Prepare a dictionary telling the session where to feed the minibatch.
# The key of the dictionary is the placeholder node of the graph to be fed,
# and the value is the numpy array to feed to it.
feed_dict = {
tf_train_dataset : batch_data,
tf_train_labels : batch_labels,
keep_prob: kp}
_, l, predictions = session.run([optimizer, loss, train_prediction],
feed_dict=feed_dict)
print("Keep prob: %s Test accuracy: %.1f%%" % (kp, accuracy(test_prediction.eval(), test_labels)))
accuracy_val_nn_l2.append(accuracy(test_prediction.eval(), test_labels))
The accuracy measures with different dropout tactics:
Dropout on both layers results:
Initialized
Keep prob: 0.5 Test accuracy: 75.3%
Initialized
Keep prob: 0.6 Test accuracy: 76.7%
Initialized
Keep prob: 0.7 Test accuracy: 76.8%
Initialized
Keep prob: 0.8 Test accuracy: 76.4%
Initialized
Keep prob: 0.9 Test accuracy: 74.1%
Dropout on both layers plus l2 reguralization with a 0.0001 beta:
Initialized
Keep prob: 0.5 Test accuracy: 75.9%
Initialized
Keep prob: 0.6 Test accuracy: 76.0%
Initialized
Keep prob: 0.7 Test accuracy: 75.8%
Initialized
Keep prob: 0.8 Test accuracy: 75.3%
Initialized
Keep prob: 0.9 Test accuracy: 75.5%
In [15]:
num_channels = 1
batch_size = 16
patch_size = 5
depth = 32
num_hidden = 64
num_channels = 1
train_dataset_conv = reformat(train_dataset)
valid_dataset_conv = reformat(valid_dataset)
test_dataset_conv = reformat(test_dataset)
print(train_dataset_conv.shape, train_labels.shape)
print(valid_dataset_conv.shape, valid_labels.shape)
print(test_dataset_conv.shape, test_labels.shape)
In [16]:
depth = 16
graph = tf.Graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
tf_valid_dataset = tf.constant(valid_dataset_conv)
tf_test_dataset = tf.constant(test_dataset_conv)
# Variables.
layer1_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, num_channels, depth], stddev=0.1))
layer1_biases = tf.Variable(tf.zeros([depth]))
layer2_weights = tf.Variable(tf.truncated_normal([patch_size, patch_size, depth, depth], stddev=0.1))
layer2_biases = tf.Variable(tf.constant(1.0, shape=[depth]))
layer3_weights = tf.Variable(tf.truncated_normal([image_size // 4 * image_size // 4 * depth, num_hidden],
stddev=0.1))
layer3_biases = tf.Variable(tf.constant(1.0, shape=[num_hidden]))
layer4_weights = tf.Variable(tf.truncated_normal([num_hidden, num_labels], stddev=0.1))
layer4_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
# Model.
def model(data):
conv = tf.nn.conv2d(data, layer1_weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer1_biases)
conv = tf.nn.conv2d(hidden, layer2_weights, [1, 2, 2, 1], padding='SAME')
hidden = tf.nn.relu(conv + layer2_biases)
shape = hidden.get_shape().as_list()
reshape = tf.reshape(hidden, [shape[0], shape[1] * shape[2] * shape[3]])
hidden = tf.nn.relu(tf.matmul(reshape, layer3_weights) + layer3_biases)
return tf.matmul(hidden, layer4_weights) + layer4_biases
# Training computation.
logits = model(tf_train_dataset)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels))
# Optimizer.
optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
test_prediction = tf.nn.softmax(model(tf_test_dataset))
In [17]:
num_steps = 1001
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print('Initialized')
for step in range(num_steps):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
batch_data = train_dataset_conv[offset:(offset + batch_size), :, :, :]
batch_labels = train_labels[offset:(offset + batch_size), :]
feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
_, l, predictions = session.run([optimizer, loss, train_prediction], feed_dict=feed_dict)
if (step % 50 == 0):
print('Minibatch loss at step %d: %f' % (step, l))
print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
print('Validation accuracy: %.1f%%' % accuracy(valid_prediction.eval(), valid_labels))
print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
Some helper functions
In [18]:
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
In [19]:
depth = 32
graph = tf.Graph()
with graph.as_default():
# Placeholders
keep_prob = tf.placeholder(tf.float32)
# Input data.
tf_train_batch = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels))
# The None at the shape argument means that the dimension is not defined,
tf_train_labels = tf.placeholder(tf.float32, shape=(None, num_labels))
# Constants
tf_valid_dataset = tf.constant(valid_dataset_conv)
tf_test_dataset = tf.constant(test_dataset_conv)
# Variables.
h_conv1_weights = weight_variable([patch_size, patch_size, num_channels, depth])
h_conv1_biases = bias_variable([depth])
h_conv2_weights = weight_variable([patch_size, patch_size, depth, depth * 2])
h_conv2_biases = bias_variable([depth * 2])
conv_image_size = image_size // 4
fc1_weights = weight_variable([conv_image_size * conv_image_size * depth * 2, num_hidden])
fc1_biases = bias_variable([num_hidden])
output_softmax_weights = weight_variable([num_hidden, num_labels])
output_softmax_biases = bias_variable([num_labels])
#Define the model:
# First layer, patches of 5x5 into 32 features
h_conv1 = tf.nn.relu(conv2d(tf_train_batch, h_conv1_weights) + h_conv1_biases)
h_pool1 = max_pool_2x2(h_conv1)
# Second layer, patches of 5x5 into 64 features
h_conv2 = tf.nn.relu(conv2d(h_pool1, h_conv2_weights) + h_conv2_biases)
h_pool2 = max_pool_2x2(h_conv2)
# Reshape into the densely connected layer
h_pool2_flat = tf.reshape(h_pool2, [-1, conv_image_size * conv_image_size * depth * 2])
# Define the fully connected layer
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, fc1_weights) + fc1_biases)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
# Readout layer
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, output_softmax_weights) + output_softmax_biases)
In [20]:
with tf.Session(graph=graph) as sess:
# Training computation.
cross_entropy = tf.reduce_mean(-tf.reduce_sum(tf_train_labels * tf.log(y_conv), reduction_indices=[1]))
# Optimizer
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# These two lines are measure the accuracy of our model.
# y_conv is a softmax output, the highest entry is the most probable according to our model
# (e.g.: [0.7, 0.2, 0.5, 0.5])
# tf_train_labels are the original labels for the training set.
# (eg.: [0, 0, 0, 1])
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(tf_train_labels,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# Initialize the session variables.
sess.run(tf.initialize_all_variables())
for step in range(3001):
offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
# I should randomize this part a bit more to reduce the possibility of reoccuring batches.
batch_data = train_dataset_conv[offset:(offset + batch_size), :, :, :]
batch_labels = train_labels[offset:(offset + batch_size), :]
if step % 100 == 0:
train_accuracy = accuracy.eval(feed_dict={tf_train_batch: batch_data,
tf_train_labels: batch_labels,
keep_prob: 1.0})
print("step %d, training accuracy %g" % (step, train_accuracy))
train_step.run(feed_dict={tf_train_batch: batch_data,
tf_train_labels: batch_labels,
keep_prob: 0.5})
print("test accuracy %g" % accuracy.eval(feed_dict={tf_train_batch: test_dataset_conv,
tf_train_labels: test_labels,
keep_prob: 1.0}))
At 20'000 Iterations it has achieved a test accuracy of 0.986...
In [ ]: