Previously in 2_fullyconnected.ipynb and 3_regularization.ipynb, we trained fully connected networks to classify notMNIST characters.
The goal of this assignment is make the neural network convolutional.
In [1]:
    
# These are all the modules we'll be using later. Make sure you can import them
# before proceeding further.
import numpy as np
import tensorflow as tf
from tensorflow.contrib import layers
from six.moves import cPickle as pickle
from six.moves import range
from tensorbuilder.api import *
from plotly import offline as py, graph_objs as go, tools
import random
py.init_notebook_mode()
    
    
    
    
In [2]:
    
pickle_file = 'notMNIST.pickle'
with open(pickle_file, 'rb') as f:
  save = pickle.load(f)
  train_dataset = save['train_dataset']
  train_labels = save['train_labels']
  valid_dataset = save['valid_dataset']
  valid_labels = save['valid_labels']
  test_dataset = save['test_dataset']
  test_labels = save['test_labels']
  del save  # hint to help gc free up memory
  print('Training set', train_dataset.shape, train_labels.shape)
  print('Validation set', valid_dataset.shape, valid_labels.shape)
  print('Test set', test_dataset.shape, test_labels.shape)
    
    
Reformat into a TensorFlow-friendly shape:
In [3]:
    
image_size = 28
num_labels = 10
num_channels = 1 # grayscale
import numpy as np
def reformat(dataset, labels):
  dataset = dataset.reshape(
    (-1, image_size, image_size, num_channels)).astype(np.float32)
  labels = (np.arange(num_labels) == labels[:,None]).astype(np.float32)
  return dataset, labels
train_dataset, train_labels = reformat(train_dataset, train_labels)
valid_dataset, valid_labels = reformat(valid_dataset, valid_labels)
test_dataset, test_labels = reformat(test_dataset, test_labels)
print('Training set', train_dataset.shape, train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)
    
    
In [4]:
    
training_data = T.data(x=train_dataset, y=train_labels)
test_data = T.data(x=test_dataset, y=test_labels)
validation_data = T.data(x=valid_dataset, y=valid_labels)
x_data = np.vstack((train_dataset, test_dataset, valid_dataset))
y_data = np.vstack((train_labels, test_labels, valid_labels))
    
In [5]:
    
def crange(initial, final='z'):
    initial = ord(initial)
    final = ord(final) + 1
    
    for i in xrange(initial, final):
        yield chr(i)
        
char_dict = dict(enumerate(crange('a', 'j')))
def axis_ops(): 
    return dict(
        autorange=True,
        showgrid=False,
        zeroline=False,
        showline=False,
        autotick=True,
        ticks='',
        showticklabels=False
    )
rand = random.sample(range(len(x_data)), 10)
titles = [ "Letter {0}".format(char_dict[np.argmax(y_data[i])].upper()) for i in rand ]
layout = go.Layout(xaxis=axis_ops(), yaxis=axis_ops())
fig = tools.make_subplots(rows=2, cols=5, print_grid=False, subplot_titles=titles)
def heatmap(z, smooth='best', reversescale=True, title=None):
    return go.Heatmap(
        z=z,
        colorscale='Greys',
        reversescale=reversescale,
        showscale=False,
        showlegend=False,
        zsmooth=smooth
    )
def printmat(x):
    print(np.array2string(np.abs(x[:, :]), formatter={'float_kind':'{0:.1f}'.format}))
for row in range(2):
    for col in range(5):
        i = row * 5 + col
        i = rand[i]
        trace = heatmap(x_data[i][::-1, :, 0])
        fig.append_trace(trace, row + 1, col + 1)
for i in range(10):
    fig['layout']['xaxis{0}'.format(i+1)].update(**axis_ops())
    fig['layout']['yaxis{0}'.format(i+1)].update(**axis_ops())
py.iplot(fig)
    
    
Let's build a small network with two convolutional layers, followed by one fully connected layer. Convolutional networks are more expensive computationally, so we'll limit its depth and number of fully connected nodes.
In [10]:
    
sample_indexes = rand
sample_index = 6
sample_features = x_data[sample_indexes]
sample_labels = y_data[sample_indexes]
sample = sample_features[sample_index][:, :, 0]
hmap = heatmap(sample)
hlayout = layout.copy()
hlayout.update(width=700,height=700)
print titles[sample_index]
py.iplot(go.Figure(data=[hmap], layout=hlayout))
printmat(sample[:, 5:-5])
    
    
    
    
In [24]:
    
import tflearn
graph = tf.Graph()
with graph.as_default():
    
    with tf.name_scope('inputs'):
        x = tf.placeholder('float', shape=[None, 28, 28, 1], name='x')
        y = tf.placeholder('float', shape=[None, 10], name='y')
        learning_rate = tf.placeholder('float', name='learning_rate')
        keep_prob = tf.placeholder('float', name='keep_prob')
        
    
    [h, trainer, loss] = T.Pipe(
            x, T
            .inception_layer(4, activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm)
            .inception_layer(8, activation_fn=tf.nn.elu, normalizer_fn=layers.batch_norm)
        
            .elu_conv2d_layer(64, [3, 3], normalizer_fn=layers.batch_norm)
            .max_pool2d(2)
        
            .elu_conv2d_layer(128, [3, 3], normalizer_fn=layers.batch_norm)
            .max_pool2d(2)
        
            .elu_conv2d_layer(128, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
            .elu_conv2d_layer(128, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
            .elu_conv2d_layer(256, [3, 3], padding='VALID', normalizer_fn=layers.batch_norm)
            .flatten()
        
            .dropout(keep_prob)
            .linear_layer(10) #, scope='logits'),
        
            .List(
                T.softmax(name='h')
            ,
                With( tf.name_scope('loss'),
                    T.softmax_cross_entropy_with_logits(y).reduce_mean().summary.create_scalar('loss').Write('loss')
                )
                .minimize(tf.train.AdadeltaOptimizer(learning_rate))
            ,
                Read('loss')
            )
        )
    
    with tf.name_scope('accuracy'):
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(h, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        tf.summary.scalar('accuracy', accuracy)
    
    summaries = tf.summary.merge_all()
    saver = tf.train.Saver()
    
    model_name = "not-mnist-inception-batch-norm.model"
    logs_dir = model_dir = "/logs/{0}".format(model_name)
    writter = tf.summary.FileWriter(logs_dir, graph=graph)
    
def feed_dict(data, rate, prob):
    return {
        x: data.x,
        y: data.y,
        learning_rate: rate,
        keep_prob: prob
    }
def count_parameters(graph):
    import operator
    with graph.as_default():
        prod = lambda iterable: reduce(operator.mul, iterable, 1)
        dims = [ list([ int(d) for d in  v.get_shape()]) for v in tf.global_variables() ]
        vars = [ prod(shape) for shape in dims ]
        return sum(vars)
print "Number of paramters:", count_parameters(graph)
    
    
In [25]:
    
[sample_test, _] = test_data.split(0.3, 0.7)
[sample_train, _] = training_data.split(0.02, 0.98)
[sample_validation, _] = validation_data.split(0.3, 0.7)
    
with tf.Session(graph=graph, config=tf.ConfigProto(log_device_placement=True)) as sess:
#     sess.run(tf.global_variables_initializer()); best = 0.0
    rate = 0.01
    saver.restore(sess, model_dir)
    
    for step, datum in training_data.epochs(20000).batches_of(500).enumerated():
        
        
        [_, _summaries] = sess.run(
            [trainer, summaries], 
            feed_dict=feed_dict(datum, rate, 0.4)
        )
        
        if step % 5 == 0:
            writter.add_summary(_summaries, step)
        
        
        if step % 200 == 0:
            print "Step ", step
            [test_loss, test_accuracy] = sess.run(
                [loss, accuracy],
                feed_dict=feed_dict(sample_test, rate, 1.0)
            )
            print "[Test]  loss: {0}, accuracy: {1}".format(test_loss, test_accuracy)
            
            
            [train_loss, train_accuracy] = sess.run(
                [loss, accuracy],
                feed_dict=feed_dict(sample_train, rate, 1.0)
            )
            print "[Train] loss: {0}, accuracy: {1}".format(train_loss, train_accuracy)
            
            
            [validation_loss, validation_accuracy] = sess.run(
                [loss, accuracy],
                feed_dict=feed_dict(sample_validation, rate, 1.0)
            )
            print "[Validation] loss: {0}, accuracy: {1}".format(validation_loss, validation_accuracy)
            
            
            if test_accuracy > best:
                print "Saving"
                saver.save(sess, model_dir)
                best = test_accuracy
                
            print ""
    
    
    
In [27]:
    
rand = random.sample(range(len(x_data)), 30)
answers = np.argmax(y_data[rand], 1)
samples = x_data[rand]
with tf.Session(graph=graph) as sess:
    saver.restore(sess, model_dir)
    predictions = sess.run(h, feed_dict={x: samples, keep_prob: 1.0})
    prediction_vals = np.argmax(predictions, 1)
    
    
for answer, sample, prediction, prediction_val in zip(answers, samples, predictions, prediction_vals):
    fig = tools.make_subplots(
        rows=1, cols=2, print_grid=False,
        subplot_titles=(
            'Answer {0}'.format(char_dict[answer].upper()),
            'Prediction {0} - {1}'.format(char_dict[prediction_val].upper(), prediction_val == answer)
        )
    )
    heatmap_trace = heatmap(sample[::-1, :, 0])
    bar_trace = go.Bar(
        x=list(crange('a', 'j')),
        y=prediction
    )
    fig.append_trace(heatmap_trace, 1, 1)
    fig.append_trace(bar_trace, 1, 2)
    for i in range(2):
        fig['layout']['xaxis{0}'.format(i+1)].update(**axis_ops())
        fig['layout']['yaxis{0}'.format(i+1)].update(**axis_ops())
    py.iplot(fig)