Test and Demonstration of convnet, convevo and mutate Modules

This notebook shows how convnet can be used to simplify constructing tensorflow convnets, and how convevo can be used to further define convnets in a way that allows them to be mutated.


In [1]:
%matplotlib inline
from __future__ import print_function

import copy
import gzip
import os
import random
import sys

import numpy as np
import tensorflow as tf

from six.moves import cPickle as pickle

import outputer
import convnet
import mutate
import convevo

Load Data

Note: Requires notmnist_setup notebook to be run first.


In [2]:
def setup_data(pickle_file):
    data = {
        "image_size": 28,
        "label_count": 10,
        "channel_count": 1
    }
    data["total_image_size"] = data["image_size"] * data["image_size"]
    
    with gzip.open(pickle_file, 'rb') as f:
        save = pickle.load(f)
        inputs_train = save['train_dataset']
        labels_train = save['train_labels']
        inputs_test = save['test_dataset']
        labels_test = save['test_labels']
        print('Training set', inputs_train.shape, labels_train.shape)
        print('Test set', inputs_test.shape, labels_test.shape)

    def setup_data(inputs, labels, name):
        shape = (-1, data["image_size"], data["image_size"], data["channel_count"])
        inputs = inputs.reshape(shape).astype(np.float32)
        # Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
        labels = (np.arange(data["label_count"]) == labels[:,None]).astype(np.float32)
        print(name + " set", inputs.shape, labels.shape)
        return inputs, labels
    data["train"], data["train_labels"]=setup_data(inputs_train, labels_train, "Training")
    data["test"], data["test_labels"] = setup_data(inputs_test, labels_test, "Test")
    return data

full_data = setup_data('notMNIST/full.pickle')
print(full_data.keys())


Training set (529114, 28, 28) (529114,)
Test set (18724, 28, 28) (18724,)
Training set (529114, 28, 28, 1) (529114, 10)
Test set (18724, 28, 28, 1) (18724, 10)
dict_keys(['test', 'channel_count', 'total_image_size', 'label_count', 'test_labels', 'train', 'image_size', 'train_labels'])

Create training/validation split from the full data.


In [3]:
def setup_validate(data, train_count, validate_count, seed=None):
    if seed:
        np.random.seed(seed)

    def randomize(inputs, labels):
        permutation = np.random.permutation(labels.shape[0])
        shuffled_inputs = inputs[permutation,:,:,:]
        shuffled_labels = labels[permutation,:]
        return shuffled_inputs, shuffled_labels

    train_inputs = data["train"][:]
    train_labels = data["train_labels"][:]
    cross_data = copy.copy(data)

    train_inputs, train_labels = randomize(train_inputs, train_labels)
    cross_data["train"] = train_inputs[:train_count]
    cross_data["train_labels"] = train_labels[:train_count]

    cross_data["valid"] = train_inputs[train_count:train_count + validate_count]
    cross_data["valid_labels"] = train_labels[train_count:train_count + validate_count]
    return cross_data

In [4]:
datasets = setup_validate(full_data, 200000, 10000)

print(datasets["train_labels"].shape)
print(datasets["train_labels"][0])
print(full_data["train_labels"][0])
print(datasets["valid"].shape)


(200000, 10)
[ 0.  0.  0.  0.  0.  0.  0.  1.  0.  0.]
[ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
(10000, 28, 28, 1)

Tensorflow Graph Execution


In [5]:
def accuracy(predictions, labels):
    correct_predictions = np.argmax(predictions, 1) == np.argmax(labels, 1)
    return (100.0 * np.sum(correct_predictions) / predictions.shape[0])

In [6]:
def batch_accuracy(session, graph_info, inputs, labels, batch_size):
    total_accuracy = 0
    batch_count = len(inputs) // batch_size
    for b in range(batch_count):
        batch_data = inputs[b * batch_size: (b + 1) * batch_size]
        feed_dict = {graph_info["verify"] : batch_data}
        predictions = session.run([graph_info["verify_predictions"]],
                                  feed_dict=feed_dict)[0]
        total_accuracy += accuracy(
            predictions,
            labels[b * batch_size: (b + 1) * batch_size]
        )
    return total_accuracy / float(batch_count)

In [7]:
def run_graph(
    graph_info,
    data,
    step_count,
    report_every=50,
    verbose=True,
    accuracy_minimum=None
):
    with tf.Session(graph=graph_info["graph"]) as session:
        tf.initialize_all_variables().run()
        print("Initialized")
        batch_size = graph_info["batch_size"]
        valid_accuracy = 0
        for step in range(step_count + 1):
            # Generate a minibatch.
            offset = (step * batch_size) % (data["train_labels"].shape[0] - batch_size)
            batch_data = data["train"][offset:(offset + batch_size), :, :, :]
            batch_labels = data["train_labels"][offset:(offset + batch_size), :]
            
            # Set up graph inputs and targets
            feed_dict = {
                graph_info["train"] : batch_data,
                graph_info["labels"] : batch_labels
            }
            targets = [
                graph_info["optimizer"],
                graph_info["loss"],
                graph_info["predictions"]
            ]
            _, loss, predictions = session.run(targets, feed_dict=feed_dict)
            
            if np.isnan(loss):
                print("Error computing loss")
                return 0
            if (step % report_every == 0):
                if verbose:
                    print("Minibatch loss at step", step, ":", loss)
                    print("Minibatch accuracy: %.1f%%" % accuracy(predictions,
                                                                  batch_labels))
                valid_accuracy = batch_accuracy(
                    session, graph_info, data["valid"], data["valid_labels"], batch_size
                )
                print("Validation accuracy: %.1f%%" % valid_accuracy)
                if accuracy_minimum and step > 0 and valid_accuracy < accuracy_minimum:
                    print("Early out.")
                    break
        if verbose:
            test_accuracy = batch_accuracy(
                session, graph_info, data["test"], data["test_labels"], batch_size
            )
            print("Test accuracy: %.1f%%" % test_accuracy)
        return valid_accuracy

Tests the shape calculations for convolution and pooling operations


In [8]:
def shape_test(shape, options, func):
    graph = tf.Graph()
    with graph.as_default():
        input = tf.placeholder(tf.float32, shape=shape)
        parameters = convnet.setup_matrix(options)
        result = func(input, False, parameters, options)
        return tuple(int(d) for d in result.get_shape())
    
default_init = convnet.setup_initializer()
correct = 0
# For images of up to 7x7
for w in range(1, 7):
    # And patch sizes up to the image size
    for p in range(1, w + 1):
        # And strides up to the patch size
        for s in range(1, p + 1):
            # And for both same and valid padding
            for pad in ["SAME", "VALID"]:
                # And for both convolutions and pooling
                for func in [convnet.apply_pool, convnet.apply_conv]:
                    options = {
                        "size":(p, p, 1, 1),
                        "stride": (s, s),
                        "padding":pad,
                        "pool_type": "max",
                        "bias":False,
                        "init":default_init}
                    # Check if calculated shape matches what Tensorflow actually does.
                    calc = convnet.image_output_shape([1, w, w, 1], options)
                    shape = shape_test([1, w, w, 1], options, func)
                    if calc == shape:
                        correct += 1
                    else:
                        print("mismatch for", w, p, s, pad, shape, calc)
if correct == 224:
    print("All shapes match.")


All shapes match.

Setup for a graph using convnet directly

Graph is a simple two convolution layers with relu, then flatten followed by a hidden layer, relu then output layer.


In [9]:
def convnet_two_layer(batch_size, patch_size, depth, hidden_size, data):
    image_size = data["image_size"]
    total_image_size = image_size * image_size * depth
    label_count = data["label_count"]
    channel_count = data["channel_count"]
    graph = tf.Graph()
    with graph.as_default():
        # Define the placeholders
        input_shape = (batch_size, image_size, image_size, channel_count)
        output_shape = (batch_size, label_count)
        train = tf.placeholder(tf.float32, shape=input_shape)
        labels= tf.placeholder(tf.float32, shape=output_shape)
        verify= tf.placeholder(tf.float32, shape=input_shape)
        
        stride = 2
        
        # Define the layers
        operations = [
            convnet.create_conv((patch_size, patch_size), (stride, stride), 1, depth),
            convnet.create_relu(),
            convnet.create_conv((patch_size, patch_size), (stride, stride), depth, depth),
            convnet.create_relu(),
            convnet.create_flatten(),
            convnet.create_matrix(total_image_size // pow(stride, 4), hidden_size),
            convnet.create_relu(),
            convnet.create_matrix(hidden_size, label_count)
        ]
        
        # Set up the graph variables for matrices and biases.
        for op in operations:
            op.setup_parameters()
        
        # Construct the graph nodes
        def model(input, train):
            nodes = [input]
            for op in operations:
                nodes.append(op.connect(nodes[-1], train))
            return nodes[-1]

        # Build the model for training.
        logits = model(train, True)
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
        
        # Pass out all the neccesary bits.
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "labels": labels,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(0.05).minimize(loss),

            # Predictions for the training, validation, and test data.
            "predictions": tf.nn.softmax(logits),
            "verify": verify,
            "verify_predictions": tf.nn.softmax(model(verify, False))
        }
    return info

In [10]:
graph_2conv = convnet_two_layer(batch_size=16, patch_size=5,
                                depth=16, hidden_size=64, data=datasets)

run_graph(graph_2conv, datasets, 1000, 100, True, 50)


Initialized
Minibatch loss at step 0 : 2.44979
Minibatch accuracy: 0.0%
Validation accuracy: 11.2%
Minibatch loss at step 100 : 1.2123
Minibatch accuracy: 75.0%
Validation accuracy: 76.0%
Minibatch loss at step 200 : 0.915699
Minibatch accuracy: 81.2%
Validation accuracy: 77.0%
Minibatch loss at step 300 : 0.601131
Minibatch accuracy: 81.2%
Validation accuracy: 80.7%
Minibatch loss at step 400 : 0.969889
Minibatch accuracy: 81.2%
Validation accuracy: 82.1%
Minibatch loss at step 500 : 0.570695
Minibatch accuracy: 87.5%
Validation accuracy: 81.8%
Minibatch loss at step 600 : 0.461254
Minibatch accuracy: 87.5%
Validation accuracy: 83.0%
Minibatch loss at step 700 : 1.00383
Minibatch accuracy: 62.5%
Validation accuracy: 83.9%
Minibatch loss at step 800 : 0.546082
Minibatch accuracy: 81.2%
Validation accuracy: 83.2%
Minibatch loss at step 900 : 0.190906
Minibatch accuracy: 93.8%
Validation accuracy: 83.8%
Minibatch loss at step 1000 : 0.618016
Minibatch accuracy: 81.2%
Validation accuracy: 84.6%
Test accuracy: 90.3%
Out[10]:
84.560000000000002

Setup for graph using convevo and mutate

Graph is same as above: two convolution layers with relu, then flatten followed by and hidden layer, relu then output layer.


In [11]:
def convnet_two_layer_stack(batch_size, patch_size, depth, hidden_size, data):
    image_size = data["image_size"]
    label_count = data["label_count"]
    channel_count = data["channel_count"]
    graph = tf.Graph()
    with graph.as_default():
        # Define the placeholders
        input_shape = (batch_size, image_size, image_size, channel_count)
        output_shape = (batch_size, label_count)
        train = tf.placeholder(tf.float32, shape=input_shape)
        labels= tf.placeholder(tf.float32, shape=output_shape)
        verify= tf.placeholder(tf.float32, shape=input_shape)
        
        stride = 2
        
        # Define the layer stack
        stack = convevo.LayerStack(flatten=True)
        init = lambda: convevo.Initializer("normal", scale=1)
        
        def add(layer, relu):
            stack.add_layer(layer, relu=relu)
        
        add(convevo.ImageLayer("conv_bias",patch_size, stride, depth, "SAME", init()),True)
        add(convevo.ImageLayer("conv_bias",patch_size, stride, depth, "SAME", init()),True)
        add(convevo.HiddenLayer(hidden_size, bias=True, initializer=init()), True)
        add(convevo.HiddenLayer(label_count, bias=True, initializer=init()), False)
        
        # Clone the stack and mutate it.
        evo_copy = copy.deepcopy(stack)
        evo_copy.mutate(input_shape, output_shape, None, random.Random(55))
        
        # Reset the seeds to ensure consistent initialization
        evo_copy.reseed(random.Random(101))
        
        # Convert the stack to convnet style operations.
        operations = evo_copy.construct(input_shape)
        
        # Setup the parameters for the operations (matrix and bias variables)
        convnet.setup(operations)

        # Construct the graph operations - the last one is the output.
        logits = convnet.connect_model(train, operations, True)[-1]
        verify_logits = convnet.connect_model(verify, operations, False)[-1]
        
        # Setup loss
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
        
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "labels": labels,
            "loss": loss,
            "optimizer": tf.train.GradientDescentOptimizer(0.05).minimize(loss),

            # Predictions for the training, validation, and test data.
            "predictions": tf.nn.softmax(logits),
            "verify": verify,
            "verify_predictions": tf.nn.softmax(verify_logits)
        }
    return info

In [12]:
tf.set_random_seed(42)

graph_2conv_stack = convnet_two_layer_stack(batch_size=16, patch_size=5,
                                            depth=16, hidden_size=64, data=datasets)

run_graph(graph_2conv, datasets, 1000)


Initialized
Minibatch loss at step 0 : 2.32898
Minibatch accuracy: 0.0%
Validation accuracy: 11.6%
Minibatch loss at step 50 : 1.51666
Minibatch accuracy: 50.0%
Validation accuracy: 55.5%
Minibatch loss at step 100 : 1.16974
Minibatch accuracy: 75.0%
Validation accuracy: 74.8%
Minibatch loss at step 150 : 1.03057
Minibatch accuracy: 75.0%
Validation accuracy: 77.7%
Minibatch loss at step 200 : 0.962429
Minibatch accuracy: 75.0%
Validation accuracy: 75.4%
Minibatch loss at step 250 : 0.700694
Minibatch accuracy: 81.2%
Validation accuracy: 79.4%
Minibatch loss at step 300 : 0.622969
Minibatch accuracy: 75.0%
Validation accuracy: 80.5%
Minibatch loss at step 350 : 0.795481
Minibatch accuracy: 68.8%
Validation accuracy: 81.0%
Minibatch loss at step 400 : 0.885985
Minibatch accuracy: 75.0%
Validation accuracy: 80.9%
Minibatch loss at step 450 : 0.334447
Minibatch accuracy: 87.5%
Validation accuracy: 81.3%
Minibatch loss at step 500 : 0.468145
Minibatch accuracy: 81.2%
Validation accuracy: 81.2%
Minibatch loss at step 550 : 0.940139
Minibatch accuracy: 68.8%
Validation accuracy: 82.3%
Minibatch loss at step 600 : 0.527906
Minibatch accuracy: 81.2%
Validation accuracy: 82.6%
Minibatch loss at step 650 : 0.721136
Minibatch accuracy: 75.0%
Validation accuracy: 81.9%
Minibatch loss at step 700 : 1.26754
Minibatch accuracy: 56.2%
Validation accuracy: 83.0%
Minibatch loss at step 750 : 0.500304
Minibatch accuracy: 81.2%
Validation accuracy: 83.1%
Minibatch loss at step 800 : 0.474956
Minibatch accuracy: 81.2%
Validation accuracy: 82.8%
Minibatch loss at step 850 : 0.20388
Minibatch accuracy: 93.8%
Validation accuracy: 83.5%
Minibatch loss at step 900 : 0.150118
Minibatch accuracy: 100.0%
Validation accuracy: 83.1%
Minibatch loss at step 950 : 0.556124
Minibatch accuracy: 75.0%
Validation accuracy: 83.4%
Minibatch loss at step 1000 : 0.611481
Minibatch accuracy: 81.2%
Validation accuracy: 82.8%
Test accuracy: 88.8%
Out[12]:
82.799999999999997

Construct Optimizer Using convevo


In [13]:
def convnet_optimize(
    batch_size,
    patch_size,
    depth,
    hidden_size,
    data,
    rate_alpha=0.05,
    decay_rate=1.0,
    decay_steps=1000
):
    image_size = data["image_size"]
    label_count = data["label_count"]
    channel_count = data["channel_count"]
    graph = tf.Graph()
    with graph.as_default():
        # Input data.
        input_shape = (batch_size, image_size, image_size, channel_count)
        output_shape = (batch_size, label_count)
        train = tf.placeholder(tf.float32, shape=input_shape)
        labels= tf.placeholder(tf.float32, shape=output_shape)
        verify= tf.placeholder(tf.float32, shape=input_shape)
        
        stride = 2
        
        # Set up LayerStack
        optimizer = convevo.Optimizer("GradientDescent",rate_alpha,decay_rate,decay_steps)
        stack = convevo.LayerStack(flatten=True, optimizer=optimizer)
        init = lambda: convevo.Initializer("normal", scale=0.1)

        def add(layer, relu):
            stack.add_layer(layer, relu=relu)
        
        add(convevo.ImageLayer("conv_bias",patch_size, stride, depth, "SAME", init()),True)
        add(convevo.ImageLayer("conv_bias",patch_size, stride, depth, "SAME", init()),True)
        add(convevo.HiddenLayer(hidden_size, bias=True, initializer=init()), True)
        add(convevo.HiddenLayer(label_count, bias=True, initializer=init()), False)

        operations = stack.construct(input_shape)
        l2_loss = convnet.setup(operations)

        logits = convnet.connect_model(train, operations, True)[-1]
        verify_logits = convnet.connect_model(verify, operations, False)[-1]
        loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits,labels))+l2_loss
       
        info = {
            "graph": graph,
            "batch_size": batch_size,
            "train": train,
            "labels": labels,
            "loss": loss,
            "optimizer": stack.construct_optimizer(loss),

            # Predictions for the training, validation, and test data.
            "predictions": tf.nn.softmax(logits),
            "verify": verify,
            "verify_predictions": tf.nn.softmax(verify_logits)
        }
    return info

In [14]:
optimal_steps = 100000
# convnet_optimize doesn't reset the seeds on the stack,
# so initializers all default to using the global seed.
tf.set_random_seed(45654)

graph_connive = convnet_optimize(
    batch_size=16, patch_size=5, depth=64, hidden_size=128,
    rate_alpha=0.02, decay_rate=0.9, decay_steps=optimal_steps//4,
    data=datasets
)

run_graph(graph_connive, datasets, optimal_steps, report_every=10000)


Initialized
Minibatch loss at step 0 : 3.23063
Minibatch accuracy: 0.0%
Validation accuracy: 11.5%
Minibatch loss at step 10000 : 0.0329743
Minibatch accuracy: 100.0%
Validation accuracy: 88.5%
Minibatch loss at step 20000 : 0.295979
Minibatch accuracy: 87.5%
Validation accuracy: 89.6%
Minibatch loss at step 30000 : 0.586623
Minibatch accuracy: 81.2%
Validation accuracy: 90.2%
Minibatch loss at step 40000 : 0.145257
Minibatch accuracy: 93.8%
Validation accuracy: 90.6%
Minibatch loss at step 50000 : 0.710837
Minibatch accuracy: 93.8%
Validation accuracy: 91.0%
Minibatch loss at step 60000 : 0.308152
Minibatch accuracy: 87.5%
Validation accuracy: 91.3%
Minibatch loss at step 70000 : 0.532527
Minibatch accuracy: 87.5%
Validation accuracy: 91.2%
Minibatch loss at step 80000 : 0.0911913
Minibatch accuracy: 93.8%
Validation accuracy: 91.3%
Minibatch loss at step 90000 : 0.0588651
Minibatch accuracy: 100.0%
Validation accuracy: 90.9%
Minibatch loss at step 100000 : 0.327164
Minibatch accuracy: 93.8%
Validation accuracy: 91.6%
Test accuracy: 96.1%
Out[14]:
91.640000000000001

In [ ]: