In [1]:
%matplotlib inline
from __future__ import print_function
import datetime
import gzip
import os
import sys
import copy
import random
import ipywidgets
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
import outputer
import convnet
import mutate
import convevo
import darwin
In [ ]:
# For easy incorporation of module changes
from imp import reload
reload (convnet)
reload (mutate)
reload (convevo)
reload (darwin)
In [2]:
def setup_data(pickle_file):
data = {
"image_size": 28,
"label_count": 10,
"channel_count": 1
}
data["total_image_size"] = data["image_size"] * data["image_size"]
with gzip.open(pickle_file, 'rb') as f:
save = pickle.load(f)
inputs_train = save['train_dataset']
labels_train = save['train_labels']
inputs_test = save['test_dataset']
labels_test = save['test_labels']
print('Training set', inputs_train.shape, labels_train.shape)
print('Test set', inputs_test.shape, labels_test.shape)
def setup_data(inputs, labels, name):
shape = (-1, data["image_size"], data["image_size"], data["channel_count"])
inputs = inputs.reshape(shape).astype(np.float32)
# Map 2 to [0.0, 1.0, 0.0 ...], 3 to [0.0, 0.0, 1.0 ...]
labels = (np.arange(data["label_count"]) == labels[:,None]).astype(np.float32)
print(name + " set", inputs.shape, labels.shape)
return inputs, labels
data["train"], data["train_labels"] = setup_data(inputs_train, labels_train, "Training")
data["test"], data["test_labels"] = setup_data(inputs_test, labels_test, "Test")
return data
full_data = setup_data('notMNIST/full.pickle')
print(full_data.keys())
In [3]:
def setup_validate(data, train_count, validate_count, entropy=random):
def randomize(inputs, labels):
permutation = np.arange(labels.shape[0], dtype=np.int32)
mutate.fisher_yates_shuffle(permutation, entropy)
shuffled_inputs = inputs[permutation,:,:,:]
shuffled_labels = labels[permutation,:]
return shuffled_inputs, shuffled_labels
train_inputs = data["train"][:]
train_labels = data["train_labels"][:]
cross_data = copy.copy(data)
train_inputs, train_labels = randomize(train_inputs, train_labels)
cross_data["train"] = train_inputs[:train_count]
cross_data["train_labels"] = train_labels[:train_count]
cross_data["valid"] = train_inputs[train_count:train_count + validate_count]
cross_data["valid_labels"] = train_labels[train_count:train_count + validate_count]
return cross_data
In [4]:
def test_setup_validate():
cross_data = setup_validate(full_data, 1000, 100)
print(cross_data["train_labels"].shape)
print(cross_data["train_labels"][0])
print(full_data["train_labels"][0])
print(cross_data["valid"].shape)
test_setup_validate()
In [5]:
def graph_input_shape(batch_size, data):
image_size = data["image_size"]
channel_count = data["channel_count"]
return (batch_size, image_size, image_size, channel_count)
def graph_output_shape(batch_size, data):
return (batch_size, data["label_count"])
def setup_graph(batch_size, data, stack):
graph = tf.Graph()
with graph.as_default():
# Input data.
input_shape = graph_input_shape(batch_size, data)
output_shape = graph_output_shape(batch_size, data)
train = tf.placeholder(tf.float32, shape=input_shape)
labels= tf.placeholder(tf.float32, shape=output_shape)
verify= tf.placeholder(tf.float32, shape=input_shape)
operations = stack.construct(input_shape, output_shape)
l2_loss = convnet.setup(operations)
logits = convnet.connect_model(train, operations, True)[-1]
verify_logits = convnet.connect_model(verify, operations, False)[-1]
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=labels))+l2_loss
info = {
"graph": graph,
"batch_size": batch_size,
"train": train,
"labels": labels,
"loss": loss,
"optimizer": stack.construct_optimizer(loss),
"predictions": tf.nn.softmax(logits),
"verify": verify,
"verify_predictions": tf.nn.softmax(verify_logits),
"saver": tf.train.Saver()
}
return info
In [6]:
def accuracy(predictions, labels):
correct_predictions = np.argmax(predictions, 1) == np.argmax(labels, 1)
return (100.0 * np.sum(correct_predictions) / predictions.shape[0])
In [7]:
def batch_accuracy(session, graph_info, inputs, labels, batch_size):
total_accuracy = 0
batch_count = len(inputs) // batch_size
for b in range(batch_count):
batch_data = inputs[b * batch_size: (b + 1) * batch_size]
feed_dict = { graph_info["verify"] : batch_data }
predictions, = session.run([graph_info["verify_predictions"]], feed_dict=feed_dict)
total_accuracy += accuracy(predictions, labels[b*batch_size : (b + 1)*batch_size])
return total_accuracy / float(batch_count)
In [8]:
def run_graph(
graph_info,
data,
step_count,
report_every=50,
verbose=True,
eval_test=False,
accuracy_minimum=None,
tracker=None
):
with tf.Session(graph=graph_info["graph"]) as session:
tf.global_variables_initializer().run()
print("Initialized")
# Optionally restore graph parameters from disk.
convnet.restore_model(graph_info, session)
batch_size = graph_info["batch_size"]
valid_accuracy = 0
last = step_count + 1 if step_count else 0
try:
for step in range(last):
if tracker:
tracker.update_progress(step)
# Pick an offset within the training data, which has been randomized.
# Note: we could use better randomization across epochs.
offset = (step * batch_size) % (data["train_labels"].shape[0] - batch_size)
# Generate a minibatch.
batch_data = data["train"][offset:(offset + batch_size), :, :, :]
batch_labels = data["train_labels"][offset:(offset + batch_size), :]
# Graph evaluation targets:
targets = [
graph_info["optimizer"],
graph_info["loss"],
graph_info["predictions"]
]
# Graph inputs:
feed_dict = {
graph_info["train"] : batch_data,
graph_info["labels"] : batch_labels
}
_, loss, predictions = session.run(targets, feed_dict=feed_dict)
reporting = step % report_every == 0
if reporting or tracker:
batch_score = (loss, accuracy(predictions, batch_labels))
if tracker:
tracker.record_score(batch_score)
if np.isnan(loss):
print("Error computing loss")
return valid_accuracy
if reporting:
if verbose:
print("Minibatch loss at step", step, ":", loss)
print("Minibatch accuracy: %.1f%%" % batch_score[1])
valid_accuracy = batch_accuracy(
session, graph_info, data["valid"], data["valid_labels"],batch_size
)
print("Validation accuracy: %.1f%%" % valid_accuracy)
if accuracy_minimum and step > 0 and valid_accuracy < accuracy_minimum:
print("Early out.")
break
if eval_test:
test_accuracy = batch_accuracy(
session, graph_info, data["test"], data["test_labels"], batch_size
)
print("Test accuracy: %.1f%%" % test_accuracy)
return valid_accuracy
finally:
# Optionally save out graph parameters to disk.
convnet.save_model(graph_info, session)
In [9]:
results_path = outputer.setup_directory("temp", "notMNIST_results")
In [10]:
def make_eval(
batch_size=16,
eval_steps=100000,
valid_steps=5000,
eval_test=False
):
progress_tracker = outputer.ProgressTracker(
["Loss", "Accuracy"], eval_steps, results_path, convevo.serialize
)
def evaluate(stack, entropy):
data = setup_validate(full_data, eval_steps, valid_steps, entropy)
progress_tracker.setup_eval(stack)
# Set up the graph.
try:
graph_info = setup_graph(batch_size, data, stack)
except KeyboardInterrupt:
raise
except:
progress_tracker.error(sys.exc_info())
return -10
progress_tracker.start_eval(graph_info)
# Run the graph
try:
return run_graph(
graph_info,
data,
eval_steps,
report_every=eval_steps/4,
verbose=False,
eval_test=eval_test,
accuracy_minimum=50,
tracker=progress_tracker
)
except KeyboardInterrupt:
raise
except:
progress_tracker.error(sys.exc_info())
return -1
finally:
progress_tracker.output()
return evaluate
In [11]:
def create_stack(
patch_size, stride, depth, hidden_size, label_count, init_scale, optimizer_name=None
):
if optimizer_name:
optimizer = convevo.Optimizer(optimizer_name, 0.05)
optimizer.default_parameters()
else:
optimizer = None
conv_layers = [
("conv_bias", patch_size, stride, depth, "SAME", True),
("conv_bias", patch_size, stride, depth, "SAME", True)
]
return convevo.create_stack(
conv_layers, [], True, [hidden_size, label_count], 0.0, init_scale, 0.0, optimizer
)
In [12]:
# Test optimizers
def test_optimizers():
for optimizer in ["GradientDescent","Adadelta","Adagrad","Momentum","Adam","RMSProp"]:
# As of this writing "Ftrl" is not supported on the GPU
batch_size = 16
eval_steps = 10000
test_stack = create_stack(5, 2, 64, 128, 10, 0.1, optimizer)
test_data = setup_validate(full_data, eval_steps * batch_size, 500)
test_graph = setup_graph(batch_size, test_data, test_stack)
result = run_graph(test_graph, test_data,
eval_steps, report_every=eval_steps//4, verbose=False)
print(result)
test_optimizers()
In [13]:
# Test make_eval
with outputer.TeeOutput(os.path.join(results_path, outputer.timestamp("Eval", "txt"))):
test_stack = create_stack(5, 2, 64, 128, 10, 0.1)
print(convevo.serialize(test_stack))
make_eval()(test_stack, random.Random(42))
In [14]:
# Test darwin setup
mutate_entropy = random.Random(42)
scored_mutants = []
breed_options = {
"input_shape": graph_input_shape(16, full_data),
"output_shape": graph_output_shape(16, full_data)
}
mutator = darwin.Darwin(convevo.serialize, lambda s,e: 0, convevo.breed)
prototypes = [create_stack(5, 2, 64, 64, 10, 0.1)]
for mutant in mutator.init_population(prototypes, 20, False, breed_options,mutate_entropy):
scored_mutants.append((mutant, 0.0))
convevo.output_results(scored_mutants, "temp", "mutants.xml", 0, 0)
In [15]:
# Test mutation
mutate_entropy = random.Random(42)
mutant_children = []
for _ in range(20):
mutant_a = mutate_entropy.choice(scored_mutants)[0]
mutant_b = mutate_entropy.choice(scored_mutants)[0]
mutant_children.append(
(convevo.breed([mutant_a, mutant_b], breed_options, mutate_entropy), 0.0)
)
convevo.output_results(mutant_children, "temp", "mutant_offspring.xml", 42, 0)
In [16]:
# Prototypes to seed the population
prototypes = [
create_stack(5, 2, 64, 128, 10, 0.10, "GradientDescent"),
create_stack(6, 2, 128, 64, 10, 0.05, "Adadelta"),
create_stack(4, 2, 64, 128, 10, 0.10, "Adagrad"),
create_stack(5, 1, 32, 64, 10, 0.02, "Adam"),
create_stack(2, 2, 64, 128, 10, 0.20, "RMSProp")
]
In [17]:
prototypes.append(convevo.load_stack("testing/notMNIST_optimized.xml"))
In [ ]:
# Evolve!
BATCH_SIZE = 128
with outputer.TeeOutput(os.path.join("temp", outputer.timestamp("notMNIST_evo_", "txt"))):
# Setup seeds and entropy
mutate_seed = random.randint(1, 100000)
print("Mutate Seed:", mutate_seed)
mutate_entropy = random.Random(mutate_seed)
eval_seed = random.randint(1, 100000)
print("Eval Seed:", eval_seed)
eval_entropy = random.Random(eval_seed)
# Number of stacks to test each generation
population_size = 20
# Number of generations to evaluate
generations = 10
breed_options = {
"input_shape": graph_input_shape(BATCH_SIZE, full_data),
"output_shape": graph_output_shape(BATCH_SIZE, full_data)
}
# Ensure the prototypes have the correct structure (eg, output layer size)
for stack in prototypes:
stack.make_safe(breed_options["input_shape"], breed_options["output_shape"])
# Construct an evolver
charles = darwin.Darwin(
convevo.serialize,
make_eval(batch_size=BATCH_SIZE, eval_steps=100000),
convevo.breed
)
# Initialize the population (True to include the protototypes as population members)
charles.init_population(prototypes,population_size,True,breed_options,mutate_entropy)
# Evaluate, output, select and breed/mutate
for g in range(generations):
print("Generation", g)
results = charles.evaluate(eval_entropy)
convevo.output_results(results, "temp", outputer.timestamp() + ".xml")
charles.repopulate(population_size, 0.25, 4, results, breed_options,mutate_entropy)
In [ ]:
# Show the best result
best = charles.best()
print("Best score:", best[1])
print(convevo.serialize(best[0]))
In [ ]:
# Store all the results in one place. (note that each generation gets stored above too)
results = darwin.descending_score(charles.history.values())
convevo.output_results(
results, "testing", "notminist_full_evolve_run_optimized.xml", mutate_seed, eval_seed
)
len(results)
In [ ]:
# If something went wrong, this helps to summarize error reports
errors = []
for root, dirs, files in os.walk('temp'):
for name in files:
path = os.path.join(root, name)
low_name = name.lower()
if low_name.startswith("err"):
with open (path, "r") as error_file:
lines=error_file.readlines()
errors.append((path, lines[-1]))
for path, error in sorted(errors, key=lambda e: e[0]):
print(path)
print(error)
In [ ]:
# Show and re-run a stack (for debugging)
rerun = 0
print (convevo.serialize(charles.population[rerun]))
make_eval()(charles.population[rerun], eval_entropy)
In [ ]:
BATCH_SIZE = 128
with outputer.TeeOutput(os.path.join("temp", outputer.timestamp("notMNIST_test_", "txt"))):
for stack, score in results[:5]:
stack_graph = setup_graph(BATCH_SIZE, full_data, stack)
checkpoint_path = stack.checkpoint_path()
if checkpoint_path:
convnet.setup_restore_model(stack_graph, checkpoint_path)
run_graph(stack_graph, test_data, 0, verbose=True, eval_test=True)
In [18]:
full_eval = make_eval(batch_size=128, eval_steps=100000, valid_steps=10000, eval_test=True)
results_file = os.path.join("temp", outputer.timestamp("notMNIST_compare_", "txt"))
with outputer.TeeOutput(results_file):
seeds = [43214]
reseed = random.Random(seeds[0])
for _ in range(4):
seeds.append(reseed.randint(1, 100000))
print("Seeds:", seeds)
print("Baseline optimized network:")
# Network from https://discussions.udacity.com/t/assignment-4-problem-2/46525/26
baseline = convevo.load_stack("testing/notMNIST_optimized.xml")
print(convevo.serialize(baseline))
for seed in seeds:
full_eval(baseline, random.Random(seed))
print("\n------------------------------------------------")
print("Evolved from prototypes only:")
population,_,_ = convevo.load_population(
"testing/notMNIST_Evolve_2016-06-06~01_16_10_287.xml", False
)
best = population[0]
print(convevo.serialize(best))
for seed in seeds:
full_eval(best, random.Random(seed))
print("\n------------------------------------------------")
print("Evolved from prototypes plus optimized baseline:")
population,_,_ = convevo.load_population(
"testing/notMNIST_full_evolve_run_optimized_2016-07-07~16_08_54_561.xml", False
)
best = population[0]
print(convevo.serialize(best))
for seed in seeds:
full_eval(best, random.Random(seed))
In [ ]: