Previously in 2_fullyconnected.ipynb
and 3_regularization.ipynb
, we trained fully connected networks to classify notMNIST characters.
The goal of this assignment is make the neural network convolutional.
In [1]:
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import numpy as np
import tensorflow as tf
from tensorflow.contrib import layers
from six.moves import cPickle as pickle
from six.moves import range
from tensorbuilder.api import *
from plotly import offline as py, graph_objs as go, tools
import random
py.init_notebook_mode()
In [2]:
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
save = pickle.load(f)
train_dataset = save['train_dataset']
train_labels = save['train_labels']
valid_dataset = save['valid_dataset']
valid_labels = save['valid_labels']
test_dataset = save['test_dataset']
test_labels = save['test_labels']
del save # hint to help gc free up memory
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
Reformat into a TensorFlow-friendly shape:
In [3]:
image_size = 28
num_labels = 10
num_channels = 1 # grayscale
import numpy as np
def reformat(dataset, labels):
dataset = dataset.reshape(
(-1, image_size, image_size, num_channels)).astype(np.float32)
labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
In [4]:
training_data = T.data(x=train_dataset, y=train_labels)
test_data = T.data(x=test_dataset, y=test_labels)
validation_data = T.data(x=valid_dataset, y=valid_labels)
x_data = np.vstack((train_dataset, test_dataset, valid_dataset))
y_data = np.vstack((train_labels, test_labels, valid_labels))
In [5]:
def crange(initial, final='z'):
initial = ord(initial)
final = ord(final) + 1
for i in xrange(initial, final):
yield chr(i)
char_dict = dict(enumerate(crange('a', 'j')))
def axis_ops():
return dict(
autorange=True,
showgrid=False,
zeroline=False,
showline=False,
autotick=True,
ticks='',
showticklabels=False
)
rand = random.sample(range(len(x_data)), 10)
titles = [ "Letter {0}".format(char_dict[np.argmax(y_data[i])].upper()) for i in rand ]
layout = go.Layout(xaxis=axis_ops(), yaxis=axis_ops())
fig = tools.make_subplots(rows=2, cols=5, print_grid=False, subplot_titles=titles)
def heatmap(z, smooth='best', reversescale=True, title=None):
return go.Heatmap(
z=z,
colorscale='Greys',
reversescale=reversescale,
showscale=False,
showlegend=False,
zsmooth=smooth
)
def printmat(x):
print(np.array2string(np.abs(x[:, :]), formatter={'float_kind':'{0:.1f}'.format}))
for row in range(2):
for col in range(5):
i = row * 5 + col
i = rand[i]
trace = heatmap(x_data[i][::-1, :, 0])
fig.append_trace(trace, row + 1, col + 1)
for i in range(10):
fig['layout']['xaxis{0}'.format(i+1)].update(**axis_ops())
fig['layout']['yaxis{0}'.format(i+1)].update(**axis_ops())
py.iplot(fig)
Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.
In [10]:
sample_indexes = rand
sample_index = 6
sample_features = x_data[sample_indexes]
sample_labels = y_data[sample_indexes]
sample = sample_features[sample_index][:, :, 0]
hmap = heatmap(sample)
hlayout = layout.copy()
hlayout.update(width=700,height=700)
print titles[sample_index]
py.iplot(go.Figure(data=[hmap], layout=hlayout))
printmat(sample[:, 5:-5])
In [24]:
import tflearn
graph = tf.Graph()
with graph.as_default():
with tf.name_scope('inputs'):
x = tf.placeholder('float', shape=[None, 28, 28, 1], name='x')
y = tf.placeholder('float', shape=[None, 10], name='y')
learning_rate = tf.placeholder('float', name='learning_rate')
keep_prob = tf.placeholder('float', name='keep_prob')
[h, trainer, loss] = T.Pipe(
x, T
.inception_layer(4, activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm)
.inception_layer(8, activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm)
.elu_conv2d_layer(64, [3, 3], normalizer_fn=layers.batch_norm)
.max_pool2d(2)
.elu_conv2d_layer(128, [3, 3], normalizer_fn=layers.batch_norm)
.max_pool2d(2)
.elu_conv2d_layer(128, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
.elu_conv2d_layer(128, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
.elu_conv2d_layer(256, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
.flatten()
.dropout(keep_prob)
.linear_layer(10) #, scope='logits'),
.List(
T.softmax(name='h')
,
With( tf.name_scope('loss'),
T.softmax_cross_entropy_with_logits(y).reduce_mean().summary.create_scalar('loss').Write('loss')
)
.minimize(tf.train.AdadeltaOptimizer(learning_rate))
,
Read('loss')
)
)
with tf.name_scope('accuracy'):
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(h, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar('accuracy', accuracy)
summaries = tf.summary.merge_all()
saver = tf.train.Saver()
model_name = "not-mnist-inception-batch-norm.model"
logs_dir = model_dir = "/logs/{0}".format(model_name)
writter = tf.summary.FileWriter(logs_dir, graph=graph)
def feed_dict(data, rate, prob):
return {
x: data.x,
y: data.y,
learning_rate: rate,
keep_prob: prob
}
def count_parameters(graph):
import operator
with graph.as_default():
prod = lambda iterable: reduce(operator.mul, iterable, 1)
dims = [ list([ int(d) for d in v.get_shape()]) for v in tf.global_variables() ]
vars = [ prod(shape) for shape in dims ]
return sum(vars)
print "Number of paramters:", count_parameters(graph)
In [25]:
[sample_test, _] = test_data.split(0.3, 0.7)
[sample_train, _] = training_data.split(0.02, 0.98)
[sample_validation, _] = validation_data.split(0.3, 0.7)
with tf.Session(graph=graph, config=tf.ConfigProto(log_device_placement=True)) as sess:
# sess.run(tf.global_variables_initializer()); best = 0.0
rate = 0.01
saver.restore(sess, model_dir)
for step, datum in training_data.epochs(20000).batches_of(500).enumerated():
[_, _summaries] = sess.run(
[trainer, summaries],
feed_dict=feed_dict(datum, rate, 0.4)
)
if step % 5 == 0:
writter.add_summary(_summaries, step)
if step % 200 == 0:
print "Step ", step
[test_loss, test_accuracy] = sess.run(
[loss, accuracy],
feed_dict=feed_dict(sample_test, rate, 1.0)
)
print "[Test] loss: {0}, accuracy: {1}".format(test_loss, test_accuracy)
[train_loss, train_accuracy] = sess.run(
[loss, accuracy],
feed_dict=feed_dict(sample_train, rate, 1.0)
)
print "[Train] loss: {0}, accuracy: {1}".format(train_loss, train_accuracy)
[validation_loss, validation_accuracy] = sess.run(
[loss, accuracy],
feed_dict=feed_dict(sample_validation, rate, 1.0)
)
print "[Validation] loss: {0}, accuracy: {1}".format(validation_loss, validation_accuracy)
if test_accuracy > best:
print "Saving"
saver.save(sess, model_dir)
best = test_accuracy
print ""
In [27]:
rand = random.sample(range(len(x_data)), 30)
answers = np.argmax(y_data[rand], 1)
samples = x_data[rand]
with tf.Session(graph=graph) as sess:
saver.restore(sess, model_dir)
predictions = sess.run(h, feed_dict={x: samples, keep_prob: 1.0})
prediction_vals = np.argmax(predictions, 1)
for answer, sample, prediction, prediction_val in zip(answers, samples, predictions, prediction_vals):
fig = tools.make_subplots(
rows=1, cols=2, print_grid=False,
subplot_titles=(
'Answer {0}'.format(char_dict[answer].upper()),
'Prediction {0} - {1}'.format(char_dict[prediction_val].upper(), prediction_val == answer)
)
)
heatmap_trace = heatmap(sample[::-1, :, 0])
bar_trace = go.Bar(
x=list(crange('a', 'j')),
y=prediction
)
fig.append_trace(heatmap_trace, 1, 1)
fig.append_trace(bar_trace, 1, 2)
for i in range(2):
fig['layout']['xaxis{0}'.format(i+1)].update(**axis_ops())
fig['layout']['yaxis{0}'.format(i+1)].update(**axis_ops())
py.iplot(fig)