In [1]:
%matplotlib inline
from __future__ import print_function
import copy
import gzip
import os
import random
import sys
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import outputer
import convnet
import mutate
import convevo
In [2]:
def setup_data(pickle_file):
data = {
"image_size": 28,
"label_count": 10,
"channel_count": 1
}
data["total_image_size"] = data["image_size"] * data["image_size"]
with gzip.open(pickle_file, 'rb') as f:
save = pickle.load(f)
inputs_train = save['train_dataset']
labels_train = save['train_labels']
inputs_test = save['test_dataset']
labels_test = save['test_labels']
print('Training set', inputs_train.shape, labels_train.shape)
print('Test set', inputs_test.shape, labels_test.shape)
def setup_data(inputs, labels, name):
shape = (-1, data["image_size"], data["image_size"], data["channel_count"])
inputs = inputs.reshape(shape).astype(np.float32)
# Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(data["label_count"]) == labels[:,None]).astype(np.float32)
print(name + " set", inputs.shape, labels.shape)
return inputs, labels
data["train"], data["train_labels"]=setup_data(inputs_train, labels_train, "Training")
data["test"], data["test_labels"] = setup_data(inputs_test, labels_test, "Test")
return data
full_data = setup_data('notMNIST/full.pickle')
print(full_data.keys())
In [3]:
def setup_validate(data, train_count, validate_count, seed=None):
if seed:
np.random.seed(seed)
def randomize(inputs, labels):
permutation = np.random.permutation(labels.shape[0])
shuffled_inputs = inputs[permutation,:,:,:]
shuffled_labels = labels[permutation,:]
return shuffled_inputs, shuffled_labels
train_inputs = data["train"][:]
train_labels = data["train_labels"][:]
cross_data = copy.copy(data)
train_inputs, train_labels = randomize(train_inputs, train_labels)
cross_data["train"] = train_inputs[:train_count]
cross_data["train_labels"] = train_labels[:train_count]
cross_data["valid"] = train_inputs[train_count:train_count + validate_count]
cross_data["valid_labels"] = train_labels[train_count:train_count + validate_count]
return cross_data
In [4]:
datasets = setup_validate(full_data, 200000, 10000)
print(datasets["train_labels"].shape)
print(datasets["train_labels"][0])
print(full_data["train_labels"][0])
print(datasets["valid"].shape)
In [5]:
def accuracy(predictions, labels):
correct_predictions = np.argmax(predictions, 1) == np.argmax(labels, 1)
return (100.0 * np.sum(correct_predictions) / predictions.shape[0])
In [6]:
def batch_accuracy(session, graph_info, inputs, labels, batch_size):
total_accuracy = 0
batch_count = len(inputs) // batch_size
for b in range(batch_count):
batch_data = inputs[b * batch_size: (b + 1) * batch_size]
feed_dict = {graph_info["verify"] : batch_data}
predictions = session.run([graph_info["verify_predictions"]],
feed_dict=feed_dict)[0]
total_accuracy += accuracy(
predictions,
labels[b * batch_size: (b + 1) * batch_size]
)
return total_accuracy / float(batch_count)
In [7]:
def run_graph(
graph_info,
data,
step_count,
report_every=50,
verbose=True,
accuracy_minimum=None
):
with tf.Session(graph=graph_info["graph"]) as session:
tf.initialize_all_variables().run()
print("Initialized")
batch_size = graph_info["batch_size"]
valid_accuracy = 0
for step in range(step_count + 1):
# Generate a minibatch.
offset = (step * batch_size) % (data["train_labels"].shape[0] - batch_size)
batch_data = data["train"][offset:(offset + batch_size), :, :, :]
batch_labels = data["train_labels"][offset:(offset + batch_size), :]
# Set up graph inputs and targets
feed_dict = {
graph_info["train"] : batch_data,
graph_info["labels"] : batch_labels
}
targets = [
graph_info["optimizer"],
graph_info["loss"],
graph_info["predictions"]
]
_, loss, predictions = session.run(targets, feed_dict=feed_dict)
if np.isnan(loss):
print("Error computing loss")
return 0
if (step % report_every == 0):
if verbose:
print("Minibatch loss at step", step, ":", loss)
print("Minibatch accuracy: %.1f%%" % accuracy(predictions,
batch_labels))
valid_accuracy = batch_accuracy(
session, graph_info, data["valid"], data["valid_labels"], batch_size
)
print("Validation accuracy: %.1f%%" % valid_accuracy)
if accuracy_minimum and step > 0 and valid_accuracy < accuracy_minimum:
print("Early out.")
break
if verbose:
test_accuracy = batch_accuracy(
session, graph_info, data["test"], data["test_labels"], batch_size
)
print("Test accuracy: %.1f%%" % test_accuracy)
return valid_accuracy
In [8]:
def shape_test(shape, options, func):
graph = tf.Graph()
with graph.as_default():
input = tf.placeholder(tf.float32, shape=shape)
parameters = convnet.setup_matrix(options)
result = func(input, False, parameters, options)
return tuple(int(d) for d in result.get_shape())
default_init = convnet.setup_initializer()
correct = 0
# For images of up to 7x7
for w in range(1, 7):
# And patch sizes up to the image size
for p in range(1, w + 1):
# And strides up to the patch size
for s in range(1, p + 1):
# And for both same and valid padding
for pad in ["SAME", "VALID"]:
# And for both convolutions and pooling
for func in [convnet.apply_pool, convnet.apply_conv]:
options = {
"size":(p, p, 1, 1),
"stride": (s, s),
"padding":pad,
"pool_type": "max",
"bias":False,
"init":default_init}
# Check if calculated shape matches what Tensorflow actually does.
calc = convnet.image_output_shape([1, w, w, 1], options)
shape = shape_test([1, w, w, 1], options, func)
if calc == shape:
correct += 1
else:
print("mismatch for", w, p, s, pad, shape, calc)
if correct == 224:
print("All shapes match.")
In [9]:
def convnet_two_layer(batch_size, patch_size, depth, hidden_size, data):
image_size = data["image_size"]
total_image_size = image_size * image_size * depth
label_count = data["label_count"]
channel_count = data["channel_count"]
graph = tf.Graph()
with graph.as_default():
# Define the placeholders
input_shape = (batch_size, image_size, image_size, channel_count)
output_shape = (batch_size, label_count)
train = tf.placeholder(tf.float32, shape=input_shape)
labels= tf.placeholder(tf.float32, shape=output_shape)
verify= tf.placeholder(tf.float32, shape=input_shape)
stride = 2
# Define the layers
operations = [
convnet.create_conv((patch_size, patch_size), (stride, stride), 1, depth),
convnet.create_relu(),
convnet.create_conv((patch_size, patch_size), (stride, stride), depth, depth),
convnet.create_relu(),
convnet.create_flatten(),
convnet.create_matrix(total_image_size // pow(stride, 4), hidden_size),
convnet.create_relu(),
convnet.create_matrix(hidden_size, label_count)
]
# Set up the graph variables for matrices and biases.
for op in operations:
op.setup_parameters()
# Construct the graph nodes
def model(input, train):
nodes = [input]
for op in operations:
nodes.append(op.connect(nodes[-1], train))
return nodes[-1]
# Build the model for training.
logits = model(train, True)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
# Pass out all the neccesary bits.
info = {
"graph": graph,
"batch_size": batch_size,
"train": train,
"labels": labels,
"loss": loss,
"optimizer": tf.train.GradientDescentOptimizer(0.05).minimize(loss),
# Predictions for the training, validation, and test data.
"predictions": tf.nn.softmax(logits),
"verify": verify,
"verify_predictions": tf.nn.softmax(model(verify, False))
}
return info
In [10]:
graph_2conv = convnet_two_layer(batch_size=16, patch_size=5,
depth=16, hidden_size=64, data=datasets)
run_graph(graph_2conv, datasets, 1000, 100, True, 50)
Out[10]:
In [11]:
def convnet_two_layer_stack(batch_size, patch_size, depth, hidden_size, data):
image_size = data["image_size"]
label_count = data["label_count"]
channel_count = data["channel_count"]
graph = tf.Graph()
with graph.as_default():
# Define the placeholders
input_shape = (batch_size, image_size, image_size, channel_count)
output_shape = (batch_size, label_count)
train = tf.placeholder(tf.float32, shape=input_shape)
labels= tf.placeholder(tf.float32, shape=output_shape)
verify= tf.placeholder(tf.float32, shape=input_shape)
stride = 2
# Define the layer stack
stack = convevo.LayerStack(flatten=True)
init = lambda: convevo.Initializer("normal", scale=1)
def add(layer, relu):
stack.add_layer(layer, relu=relu)
add(convevo.ImageLayer("conv_bias",patch_size, stride, depth, "SAME", init()),True)
add(convevo.ImageLayer("conv_bias",patch_size, stride, depth, "SAME", init()),True)
add(convevo.HiddenLayer(hidden_size, bias=True, initializer=init()), True)
add(convevo.HiddenLayer(label_count, bias=True, initializer=init()), False)
# Clone the stack and mutate it.
evo_copy = copy.deepcopy(stack)
evo_copy.mutate(input_shape, output_shape, None, random.Random(55))
# Reset the seeds to ensure consistent initialization
evo_copy.reseed(random.Random(101))
# Convert the stack to convnet style operations.
operations = evo_copy.construct(input_shape)
# Setup the parameters for the operations (matrix and bias variables)
convnet.setup(operations)
# Construct the graph operations - the last one is the output.
logits = convnet.connect_model(train, operations, True)[-1]
verify_logits = convnet.connect_model(verify, operations, False)[-1]
# Setup loss
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, labels))
info = {
"graph": graph,
"batch_size": batch_size,
"train": train,
"labels": labels,
"loss": loss,
"optimizer": tf.train.GradientDescentOptimizer(0.05).minimize(loss),
# Predictions for the training, validation, and test data.
"predictions": tf.nn.softmax(logits),
"verify": verify,
"verify_predictions": tf.nn.softmax(verify_logits)
}
return info
In [12]:
tf.set_random_seed(42)
graph_2conv_stack = convnet_two_layer_stack(batch_size=16, patch_size=5,
depth=16, hidden_size=64, data=datasets)
run_graph(graph_2conv, datasets, 1000)
Out[12]:
In [13]:
def convnet_optimize(
batch_size,
patch_size,
depth,
hidden_size,
data,
rate_alpha=0.05,
decay_rate=1.0,
decay_steps=1000
):
image_size = data["image_size"]
label_count = data["label_count"]
channel_count = data["channel_count"]
graph = tf.Graph()
with graph.as_default():
# Input data.
input_shape = (batch_size, image_size, image_size, channel_count)
output_shape = (batch_size, label_count)
train = tf.placeholder(tf.float32, shape=input_shape)
labels= tf.placeholder(tf.float32, shape=output_shape)
verify= tf.placeholder(tf.float32, shape=input_shape)
stride = 2
# Set up LayerStack
optimizer = convevo.Optimizer("GradientDescent",rate_alpha,decay_rate,decay_steps)
stack = convevo.LayerStack(flatten=True, optimizer=optimizer)
init = lambda: convevo.Initializer("normal", scale=0.1)
def add(layer, relu):
stack.add_layer(layer, relu=relu)
add(convevo.ImageLayer("conv_bias",patch_size, stride, depth, "SAME", init()),True)
add(convevo.ImageLayer("conv_bias",patch_size, stride, depth, "SAME", init()),True)
add(convevo.HiddenLayer(hidden_size, bias=True, initializer=init()), True)
add(convevo.HiddenLayer(label_count, bias=True, initializer=init()), False)
operations = stack.construct(input_shape)
l2_loss = convnet.setup(operations)
logits = convnet.connect_model(train, operations, True)[-1]
verify_logits = convnet.connect_model(verify, operations, False)[-1]
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits,labels))+l2_loss
info = {
"graph": graph,
"batch_size": batch_size,
"train": train,
"labels": labels,
"loss": loss,
"optimizer": stack.construct_optimizer(loss),
# Predictions for the training, validation, and test data.
"predictions": tf.nn.softmax(logits),
"verify": verify,
"verify_predictions": tf.nn.softmax(verify_logits)
}
return info
In [14]:
optimal_steps = 100000
# convnet_optimize doesn't reset the seeds on the stack,
# so initializers all default to using the global seed.
tf.set_random_seed(45654)
graph_connive = convnet_optimize(
batch_size=16, patch_size=5, depth=64, hidden_size=128,
rate_alpha=0.02, decay_rate=0.9, decay_steps=optimal_steps//4,
data=datasets
)
run_graph(graph_connive, datasets, optimal_steps, report_every=10000)
Out[14]:
In [ ]: