In [0]:
#@title ##### License
# Copyright 2018 The GraphNets Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
This notebook and the accompanying code demonstrates how to use the Graph Nets library to learn to predict the shortest path between two nodes in graph.
The network is trained to label the nodes and edges of the shortest path, given the start and end nodes.
After training, the network's prediction ability is illustrated by comparing its output to the true shortest path. Then the network's ability to generalise is tested, by using it to predict the shortest path in similar but larger graphs.
In [0]:
#@title ### Install the Graph Nets library on this Colaboratory runtime { form-width: "60%", run: "auto"}
#@markdown <br>1. Connect to a local or hosted Colaboratory runtime by clicking the **Connect** button at the top-right.<br>2. Choose "Yes" below to install the Graph Nets library on the runtime machine with the correct dependencies. Note, this works both with local and hosted Colaboratory runtimes.
install_graph_nets_library = "No" #@param ["Yes", "No"]
if install_graph_nets_library.lower() == "yes":
print("Installing Graph Nets library and dependencies:")
print("Output message from command:\n")
!pip install graph_nets "dm-sonnet<2" "tensorflow_probability<0.9"
else:
print("Skipping installation of Graph Nets library")
If you are running this notebook locally (i.e., not through Colaboratory), you will also need to install a few more dependencies. Run the following on the command line to install the graph networks library, as well as a few other dependencies:
pip install graph_nets matplotlib scipy "tensorflow>=1.15,<2" "dm-sonnet<2" "tensorflow_probability<0.9"
In [0]:
#@title Imports { form-width: "30%" }
%tensorflow_version 1.x # For Google Colab only.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import itertools
import time
from graph_nets import graphs
from graph_nets import utils_np
from graph_nets import utils_tf
from graph_nets.demos import models
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
from scipy import spatial
import tensorflow as tf
SEED = 1
np.random.seed(SEED)
tf.set_random_seed(SEED)
In [0]:
#@title Helper functions { form-width: "30%" }
# pylint: disable=redefined-outer-name
DISTANCE_WEIGHT_NAME = "distance" # The name for the distance edge attribute.
def pairwise(iterable):
"""s -> (s0,s1), (s1,s2), (s2, s3), ..."""
a, b = itertools.tee(iterable)
next(b, None)
return zip(a, b)
def set_diff(seq0, seq1):
"""Return the set difference between 2 sequences as a list."""
return list(set(seq0) - set(seq1))
def to_one_hot(indices, max_value, axis=-1):
one_hot = np.eye(max_value)[indices]
if axis not in (-1, one_hot.ndim):
one_hot = np.moveaxis(one_hot, -1, axis)
return one_hot
def get_node_dict(graph, attr):
"""Return a `dict` of node:attribute pairs from a graph."""
return {k: v[attr] for k, v in graph.nodes.items()}
def generate_graph(rand,
num_nodes_min_max,
dimensions=2,
theta=1000.0,
rate=1.0):
"""Creates a connected graph.
The graphs are geographic threshold graphs, but with added edges via a
minimum spanning tree algorithm, to ensure all nodes are connected.
Args:
rand: A random seed for the graph generator. Default= None.
num_nodes_min_max: A sequence [lower, upper) number of nodes per graph.
dimensions: (optional) An `int` number of dimensions for the positions.
Default= 2.
theta: (optional) A `float` threshold parameters for the geographic
threshold graph's threshold. Large values (1000+) make mostly trees. Try
20-60 for good non-trees. Default=1000.0.
rate: (optional) A rate parameter for the node weight exponential sampling
distribution. Default= 1.0.
Returns:
The graph.
"""
# Sample num_nodes.
num_nodes = rand.randint(*num_nodes_min_max)
# Create geographic threshold graph.
pos_array = rand.uniform(size=(num_nodes, dimensions))
pos = dict(enumerate(pos_array))
weight = dict(enumerate(rand.exponential(rate, size=num_nodes)))
geo_graph = nx.geographical_threshold_graph(
num_nodes, theta, pos=pos, weight=weight)
# Create minimum spanning tree across geo_graph's nodes.
distances = spatial.distance.squareform(spatial.distance.pdist(pos_array))
i_, j_ = np.meshgrid(range(num_nodes), range(num_nodes), indexing="ij")
weighted_edges = list(zip(i_.ravel(), j_.ravel(), distances.ravel()))
mst_graph = nx.Graph()
mst_graph.add_weighted_edges_from(weighted_edges, weight=DISTANCE_WEIGHT_NAME)
mst_graph = nx.minimum_spanning_tree(mst_graph, weight=DISTANCE_WEIGHT_NAME)
# Put geo_graph's node attributes into the mst_graph.
for i in mst_graph.nodes():
mst_graph.nodes[i].update(geo_graph.nodes[i])
# Compose the graphs.
combined_graph = nx.compose_all((mst_graph, geo_graph.copy()))
# Put all distance weights into edge attributes.
for i, j in combined_graph.edges():
combined_graph.get_edge_data(i, j).setdefault(DISTANCE_WEIGHT_NAME,
distances[i, j])
return combined_graph, mst_graph, geo_graph
def add_shortest_path(rand, graph, min_length=1):
"""Samples a shortest path from A to B and adds attributes to indicate it.
Args:
rand: A random seed for the graph generator. Default= None.
graph: A `nx.Graph`.
min_length: (optional) An `int` minimum number of edges in the shortest
path. Default= 1.
Returns:
The `nx.DiGraph` with the shortest path added.
Raises:
ValueError: All shortest paths are below the minimum length
"""
# Map from node pairs to the length of their shortest path.
pair_to_length_dict = {}
try:
# This is for compatibility with older networkx.
lengths = nx.all_pairs_shortest_path_length(graph).items()
except AttributeError:
# This is for compatibility with newer networkx.
lengths = list(nx.all_pairs_shortest_path_length(graph))
for x, yy in lengths:
for y, l in yy.items():
if l >= min_length:
pair_to_length_dict[x, y] = l
if max(pair_to_length_dict.values()) < min_length:
raise ValueError("All shortest paths are below the minimum length")
# The node pairs which exceed the minimum length.
node_pairs = list(pair_to_length_dict)
# Computes probabilities per pair, to enforce uniform sampling of each
# shortest path lengths.
# The counts of pairs per length.
counts = collections.Counter(pair_to_length_dict.values())
prob_per_length = 1.0 / len(counts)
probabilities = [
prob_per_length / counts[pair_to_length_dict[x]] for x in node_pairs
]
# Choose the start and end points.
i = rand.choice(len(node_pairs), p=probabilities)
start, end = node_pairs[i]
path = nx.shortest_path(
graph, source=start, target=end, weight=DISTANCE_WEIGHT_NAME)
# Creates a directed graph, to store the directed path from start to end.
digraph = graph.to_directed()
# Add the "start", "end", and "solution" attributes to the nodes and edges.
digraph.add_node(start, start=True)
digraph.add_node(end, end=True)
digraph.add_nodes_from(set_diff(digraph.nodes(), [start]), start=False)
digraph.add_nodes_from(set_diff(digraph.nodes(), [end]), end=False)
digraph.add_nodes_from(set_diff(digraph.nodes(), path), solution=False)
digraph.add_nodes_from(path, solution=True)
path_edges = list(pairwise(path))
digraph.add_edges_from(set_diff(digraph.edges(), path_edges), solution=False)
digraph.add_edges_from(path_edges, solution=True)
return digraph
def graph_to_input_target(graph):
"""Returns 2 graphs with input and target feature vectors for training.
Args:
graph: An `nx.DiGraph` instance.
Returns:
The input `nx.DiGraph` instance.
The target `nx.DiGraph` instance.
Raises:
ValueError: unknown node type
"""
def create_feature(attr, fields):
return np.hstack([np.array(attr[field], dtype=float) for field in fields])
input_node_fields = ("pos", "weight", "start", "end")
input_edge_fields = ("distance",)
target_node_fields = ("solution",)
target_edge_fields = ("solution",)
input_graph = graph.copy()
target_graph = graph.copy()
solution_length = 0
for node_index, node_feature in graph.nodes(data=True):
input_graph.add_node(
node_index, features=create_feature(node_feature, input_node_fields))
target_node = to_one_hot(
create_feature(node_feature, target_node_fields).astype(int), 2)[0]
target_graph.add_node(node_index, features=target_node)
solution_length += int(node_feature["solution"])
solution_length /= graph.number_of_nodes()
for receiver, sender, features in graph.edges(data=True):
input_graph.add_edge(
sender, receiver, features=create_feature(features, input_edge_fields))
target_edge = to_one_hot(
create_feature(features, target_edge_fields).astype(int), 2)[0]
target_graph.add_edge(sender, receiver, features=target_edge)
input_graph.graph["features"] = np.array([0.0])
target_graph.graph["features"] = np.array([solution_length], dtype=float)
return input_graph, target_graph
def generate_networkx_graphs(rand, num_examples, num_nodes_min_max, theta):
"""Generate graphs for training.
Args:
rand: A random seed (np.RandomState instance).
num_examples: Total number of graphs to generate.
num_nodes_min_max: A 2-tuple with the [lower, upper) number of nodes per
graph. The number of nodes for a graph is uniformly sampled within this
range.
theta: (optional) A `float` threshold parameters for the geographic
threshold graph's threshold. Default= the number of nodes.
Returns:
input_graphs: The list of input graphs.
target_graphs: The list of output graphs.
graphs: The list of generated graphs.
"""
input_graphs = []
target_graphs = []
graphs = []
for _ in range(num_examples):
graph = generate_graph(rand, num_nodes_min_max, theta=theta)[0]
graph = add_shortest_path(rand, graph)
input_graph, target_graph = graph_to_input_target(graph)
input_graphs.append(input_graph)
target_graphs.append(target_graph)
graphs.append(graph)
return input_graphs, target_graphs, graphs
def create_placeholders(rand, batch_size, num_nodes_min_max, theta):
"""Creates placeholders for the model training and evaluation.
Args:
rand: A random seed (np.RandomState instance).
batch_size: Total number of graphs per batch.
num_nodes_min_max: A 2-tuple with the [lower, upper) number of nodes per
graph. The number of nodes for a graph is uniformly sampled within this
range.
theta: A `float` threshold parameters for the geographic threshold graph's
threshold. Default= the number of nodes.
Returns:
input_ph: The input graph's placeholders, as a graph namedtuple.
target_ph: The target graph's placeholders, as a graph namedtuple.
"""
# Create some example data for inspecting the vector sizes.
input_graphs, target_graphs, _ = generate_networkx_graphs(
rand, batch_size, num_nodes_min_max, theta)
input_ph = utils_tf.placeholders_from_networkxs(input_graphs)
target_ph = utils_tf.placeholders_from_networkxs(target_graphs)
return input_ph, target_ph
def create_feed_dict(rand, batch_size, num_nodes_min_max, theta, input_ph,
target_ph):
"""Creates placeholders for the model training and evaluation.
Args:
rand: A random seed (np.RandomState instance).
batch_size: Total number of graphs per batch.
num_nodes_min_max: A 2-tuple with the [lower, upper) number of nodes per
graph. The number of nodes for a graph is uniformly sampled within this
range.
theta: A `float` threshold parameters for the geographic threshold graph's
threshold. Default= the number of nodes.
input_ph: The input graph's placeholders, as a graph namedtuple.
target_ph: The target graph's placeholders, as a graph namedtuple.
Returns:
feed_dict: The feed `dict` of input and target placeholders and data.
raw_graphs: The `dict` of raw networkx graphs.
"""
inputs, targets, raw_graphs = generate_networkx_graphs(
rand, batch_size, num_nodes_min_max, theta)
input_graphs = utils_np.networkxs_to_graphs_tuple(inputs)
target_graphs = utils_np.networkxs_to_graphs_tuple(targets)
feed_dict = {input_ph: input_graphs, target_ph: target_graphs}
return feed_dict, raw_graphs
def compute_accuracy(target, output, use_nodes=True, use_edges=False):
"""Calculate model accuracy.
Returns the number of correctly predicted shortest path nodes and the number
of completely solved graphs (100% correct predictions).
Args:
target: A `graphs.GraphsTuple` that contains the target graph.
output: A `graphs.GraphsTuple` that contains the output graph.
use_nodes: A `bool` indicator of whether to compute node accuracy or not.
use_edges: A `bool` indicator of whether to compute edge accuracy or not.
Returns:
correct: A `float` fraction of correctly labeled nodes/edges.
solved: A `float` fraction of graphs that are completely correctly labeled.
Raises:
ValueError: Nodes or edges (or both) must be used
"""
if not use_nodes and not use_edges:
raise ValueError("Nodes or edges (or both) must be used")
tdds = utils_np.graphs_tuple_to_data_dicts(target)
odds = utils_np.graphs_tuple_to_data_dicts(output)
cs = []
ss = []
for td, od in zip(tdds, odds):
xn = np.argmax(td["nodes"], axis=-1)
yn = np.argmax(od["nodes"], axis=-1)
xe = np.argmax(td["edges"], axis=-1)
ye = np.argmax(od["edges"], axis=-1)
c = []
if use_nodes:
c.append(xn == yn)
if use_edges:
c.append(xe == ye)
c = np.concatenate(c, axis=0)
s = np.all(c)
cs.append(c)
ss.append(s)
correct = np.mean(np.concatenate(cs, axis=0))
solved = np.mean(np.stack(ss))
return correct, solved
def create_loss_ops(target_op, output_ops):
loss_ops = [
tf.losses.softmax_cross_entropy(target_op.nodes, output_op.nodes) +
tf.losses.softmax_cross_entropy(target_op.edges, output_op.edges)
for output_op in output_ops
]
return loss_ops
def make_all_runnable_in_session(*args):
"""Lets an iterable of TF graphs be output from a session as NP graphs."""
return [utils_tf.make_runnable_in_session(a) for a in args]
class GraphPlotter(object):
def __init__(self, ax, graph, pos):
self._ax = ax
self._graph = graph
self._pos = pos
self._base_draw_kwargs = dict(G=self._graph, pos=self._pos, ax=self._ax)
self._solution_length = None
self._nodes = None
self._edges = None
self._start_nodes = None
self._end_nodes = None
self._solution_nodes = None
self._intermediate_solution_nodes = None
self._solution_edges = None
self._non_solution_nodes = None
self._non_solution_edges = None
self._ax.set_axis_off()
@property
def solution_length(self):
if self._solution_length is None:
self._solution_length = len(self._solution_edges)
return self._solution_length
@property
def nodes(self):
if self._nodes is None:
self._nodes = self._graph.nodes()
return self._nodes
@property
def edges(self):
if self._edges is None:
self._edges = self._graph.edges()
return self._edges
@property
def start_nodes(self):
if self._start_nodes is None:
self._start_nodes = [
n for n in self.nodes if self._graph.nodes[n].get("start", False)
]
return self._start_nodes
@property
def end_nodes(self):
if self._end_nodes is None:
self._end_nodes = [
n for n in self.nodes if self._graph.nodes[n].get("end", False)
]
return self._end_nodes
@property
def solution_nodes(self):
if self._solution_nodes is None:
self._solution_nodes = [
n for n in self.nodes if self._graph.nodes[n].get("solution", False)
]
return self._solution_nodes
@property
def intermediate_solution_nodes(self):
if self._intermediate_solution_nodes is None:
self._intermediate_solution_nodes = [
n for n in self.nodes
if self._graph.nodes[n].get("solution", False) and
not self._graph.nodes[n].get("start", False) and
not self._graph.nodes[n].get("end", False)
]
return self._intermediate_solution_nodes
@property
def solution_edges(self):
if self._solution_edges is None:
self._solution_edges = [
e for e in self.edges
if self._graph.get_edge_data(e[0], e[1]).get("solution", False)
]
return self._solution_edges
@property
def non_solution_nodes(self):
if self._non_solution_nodes is None:
self._non_solution_nodes = [
n for n in self.nodes
if not self._graph.nodes[n].get("solution", False)
]
return self._non_solution_nodes
@property
def non_solution_edges(self):
if self._non_solution_edges is None:
self._non_solution_edges = [
e for e in self.edges
if not self._graph.get_edge_data(e[0], e[1]).get("solution", False)
]
return self._non_solution_edges
def _make_draw_kwargs(self, **kwargs):
kwargs.update(self._base_draw_kwargs)
return kwargs
def _draw(self, draw_function, zorder=None, **kwargs):
draw_kwargs = self._make_draw_kwargs(**kwargs)
collection = draw_function(**draw_kwargs)
if collection is not None and zorder is not None:
try:
# This is for compatibility with older matplotlib.
collection.set_zorder(zorder)
except AttributeError:
# This is for compatibility with newer matplotlib.
collection[0].set_zorder(zorder)
return collection
def draw_nodes(self, **kwargs):
"""Useful kwargs: nodelist, node_size, node_color, linewidths."""
if ("node_color" in kwargs and
isinstance(kwargs["node_color"], collections.Sequence) and
len(kwargs["node_color"]) in {3, 4} and
not isinstance(kwargs["node_color"][0],
(collections.Sequence, np.ndarray))):
num_nodes = len(kwargs.get("nodelist", self.nodes))
kwargs["node_color"] = np.tile(
np.array(kwargs["node_color"])[None], [num_nodes, 1])
return self._draw(nx.draw_networkx_nodes, **kwargs)
def draw_edges(self, **kwargs):
"""Useful kwargs: edgelist, width."""
return self._draw(nx.draw_networkx_edges, **kwargs)
def draw_graph(self,
node_size=200,
node_color=(0.4, 0.8, 0.4),
node_linewidth=1.0,
edge_width=1.0):
# Plot nodes.
self.draw_nodes(
nodelist=self.nodes,
node_size=node_size,
node_color=node_color,
linewidths=node_linewidth,
zorder=20)
# Plot edges.
self.draw_edges(edgelist=self.edges, width=edge_width, zorder=10)
def draw_graph_with_solution(self,
node_size=200,
node_color=(0.4, 0.8, 0.4),
node_linewidth=1.0,
edge_width=1.0,
start_color="w",
end_color="k",
solution_node_linewidth=3.0,
solution_edge_width=3.0):
node_border_color = (0.0, 0.0, 0.0, 1.0)
node_collections = {}
# Plot start nodes.
node_collections["start nodes"] = self.draw_nodes(
nodelist=self.start_nodes,
node_size=node_size,
node_color=start_color,
linewidths=solution_node_linewidth,
edgecolors=node_border_color,
zorder=100)
# Plot end nodes.
node_collections["end nodes"] = self.draw_nodes(
nodelist=self.end_nodes,
node_size=node_size,
node_color=end_color,
linewidths=solution_node_linewidth,
edgecolors=node_border_color,
zorder=90)
# Plot intermediate solution nodes.
if isinstance(node_color, dict):
c = [node_color[n] for n in self.intermediate_solution_nodes]
else:
c = node_color
node_collections["intermediate solution nodes"] = self.draw_nodes(
nodelist=self.intermediate_solution_nodes,
node_size=node_size,
node_color=c,
linewidths=solution_node_linewidth,
edgecolors=node_border_color,
zorder=80)
# Plot solution edges.
node_collections["solution edges"] = self.draw_edges(
edgelist=self.solution_edges, width=solution_edge_width, zorder=70)
# Plot non-solution nodes.
if isinstance(node_color, dict):
c = [node_color[n] for n in self.non_solution_nodes]
else:
c = node_color
node_collections["non-solution nodes"] = self.draw_nodes(
nodelist=self.non_solution_nodes,
node_size=node_size,
node_color=c,
linewidths=node_linewidth,
edgecolors=node_border_color,
zorder=20)
# Plot non-solution edges.
node_collections["non-solution edges"] = self.draw_edges(
edgelist=self.non_solution_edges, width=edge_width, zorder=10)
# Set title as solution length.
self._ax.set_title("Solution length: {}".format(self.solution_length))
return node_collections
# pylint: enable=redefined-outer-name
In [0]:
#@title Visualize example graphs { form-width: "30%" }
seed = 1 #@param{type: 'integer'}
rand = np.random.RandomState(seed=seed)
num_examples = 15 #@param{type: 'integer'}
# Large values (1000+) make trees. Try 20-60 for good non-trees.
theta = 20 #@param{type: 'integer'}
num_nodes_min_max = (16, 17)
input_graphs, target_graphs, graphs = generate_networkx_graphs(
rand, num_examples, num_nodes_min_max, theta)
num = min(num_examples, 16)
w = 3
h = int(np.ceil(num / w))
fig = plt.figure(40, figsize=(w * 4, h * 4))
fig.clf()
for j, graph in enumerate(graphs):
ax = fig.add_subplot(h, w, j + 1)
pos = get_node_dict(graph, "pos")
plotter = GraphPlotter(ax, graph, pos)
plotter.draw_graph_with_solution()
In [0]:
#@title Set up model training and evaluation { form-width: "30%" }
# The model we explore includes three components:
# - An "Encoder" graph net, which independently encodes the edge, node, and
# global attributes (does not compute relations etc.).
# - A "Core" graph net, which performs N rounds of processing (message-passing)
# steps. The input to the Core is the concatenation of the Encoder's output
# and the previous output of the Core (labeled "Hidden(t)" below, where "t" is
# the processing step).
# - A "Decoder" graph net, which independently decodes the edge, node, and
# global attributes (does not compute relations etc.), on each
# message-passing step.
#
# Hidden(t) Hidden(t+1)
# | ^
# *---------* | *------* | *---------*
# | | | | | | | |
# Input --->| Encoder | *->| Core |--*->| Decoder |---> Output(t)
# | |---->| | | |
# *---------* *------* *---------*
#
# The model is trained by supervised learning. Input graphs are procedurally
# generated, and output graphs have the same structure with the nodes and edges
# of the shortest path labeled (using 2-element 1-hot vectors). We could have
# predicted the shortest path only by labeling either the nodes or edges, and
# that does work, but we decided to predict both to demonstrate the flexibility
# of graph nets' outputs.
#
# The training loss is computed on the output of each processing step. The
# reason for this is to encourage the model to try to solve the problem in as
# few steps as possible. It also helps make the output of intermediate steps
# more interpretable.
#
# There's no need for a separate evaluate dataset because the inputs are
# never repeated, so the training loss is the measure of performance on graphs
# from the input distribution.
#
# We also evaluate how well the models generalize to graphs which are up to
# twice as large as those on which it was trained. The loss is computed only
# on the final processing step.
#
# Variables with the suffix _tr are training parameters, and variables with the
# suffix _ge are test/generalization parameters.
#
# After around 2000-5000 training iterations the model reaches near-perfect
# performance on graphs with between 8-16 nodes.
tf.reset_default_graph()
seed = 2
rand = np.random.RandomState(seed=seed)
# Model parameters.
# Number of processing (message-passing) steps.
num_processing_steps_tr = 10
num_processing_steps_ge = 10
# Data / training parameters.
num_training_iterations = 10000
theta = 20 # Large values (1000+) make trees. Try 20-60 for good non-trees.
batch_size_tr = 32
batch_size_ge = 100
# Number of nodes per graph sampled uniformly from this range.
num_nodes_min_max_tr = (8, 17)
num_nodes_min_max_ge = (16, 33)
# Data.
# Input and target placeholders.
input_ph, target_ph = create_placeholders(rand, batch_size_tr,
num_nodes_min_max_tr, theta)
# Connect the data to the model.
# Instantiate the model.
model = models.EncodeProcessDecode(edge_output_size=2, node_output_size=2)
# A list of outputs, one per processing step.
output_ops_tr = model(input_ph, num_processing_steps_tr)
output_ops_ge = model(input_ph, num_processing_steps_ge)
# Training loss.
loss_ops_tr = create_loss_ops(target_ph, output_ops_tr)
# Loss across processing steps.
loss_op_tr = sum(loss_ops_tr) / num_processing_steps_tr
# Test/generalization loss.
loss_ops_ge = create_loss_ops(target_ph, output_ops_ge)
loss_op_ge = loss_ops_ge[-1] # Loss from final processing step.
# Optimizer.
learning_rate = 1e-3
optimizer = tf.train.AdamOptimizer(learning_rate)
step_op = optimizer.minimize(loss_op_tr)
# Lets an iterable of TF graphs be output from a session as NP graphs.
input_ph, target_ph = make_all_runnable_in_session(input_ph, target_ph)
In [0]:
#@title Reset session { form-width: "30%" }
# This cell resets the Tensorflow session, but keeps the same computational
# graph.
try:
sess.close()
except NameError:
pass
sess = tf.Session()
sess.run(tf.global_variables_initializer())
last_iteration = 0
logged_iterations = []
losses_tr = []
corrects_tr = []
solveds_tr = []
losses_ge = []
corrects_ge = []
solveds_ge = []
In [0]:
#@title Run training { form-width: "30%" }
# You can interrupt this cell's training loop at any time, and visualize the
# intermediate results by running the next cell (below). You can then resume
# training by simply executing this cell again.
# How much time between logging and printing the current results.
log_every_seconds = 20
print("# (iteration number), T (elapsed seconds), "
"Ltr (training loss), Lge (test/generalization loss), "
"Ctr (training fraction nodes/edges labeled correctly), "
"Str (training fraction examples solved correctly), "
"Cge (test/generalization fraction nodes/edges labeled correctly), "
"Sge (test/generalization fraction examples solved correctly)")
start_time = time.time()
last_log_time = start_time
for iteration in range(last_iteration, num_training_iterations):
last_iteration = iteration
feed_dict, _ = create_feed_dict(rand, batch_size_tr, num_nodes_min_max_tr,
theta, input_ph, target_ph)
train_values = sess.run({
"step": step_op,
"target": target_ph,
"loss": loss_op_tr,
"outputs": output_ops_tr
},
feed_dict=feed_dict)
the_time = time.time()
elapsed_since_last_log = the_time - last_log_time
if elapsed_since_last_log > log_every_seconds:
last_log_time = the_time
feed_dict, raw_graphs = create_feed_dict(
rand, batch_size_ge, num_nodes_min_max_ge, theta, input_ph, target_ph)
test_values = sess.run({
"target": target_ph,
"loss": loss_op_ge,
"outputs": output_ops_ge
},
feed_dict=feed_dict)
correct_tr, solved_tr = compute_accuracy(
train_values["target"], train_values["outputs"][-1], use_edges=True)
correct_ge, solved_ge = compute_accuracy(
test_values["target"], test_values["outputs"][-1], use_edges=True)
elapsed = time.time() - start_time
losses_tr.append(train_values["loss"])
corrects_tr.append(correct_tr)
solveds_tr.append(solved_tr)
losses_ge.append(test_values["loss"])
corrects_ge.append(correct_ge)
solveds_ge.append(solved_ge)
logged_iterations.append(iteration)
print("# {:05d}, T {:.1f}, Ltr {:.4f}, Lge {:.4f}, Ctr {:.4f}, Str"
" {:.4f}, Cge {:.4f}, Sge {:.4f}".format(
iteration, elapsed, train_values["loss"], test_values["loss"],
correct_tr, solved_tr, correct_ge, solved_ge))
In [0]:
#@title Visualize results { form-width: "30%" }
# This cell visualizes the results of training. You can visualize the
# intermediate results by interrupting execution of the cell above, and running
# this cell. You can then resume training by simply executing the above cell
# again.
def softmax_prob_last_dim(x): # pylint: disable=redefined-outer-name
e = np.exp(x)
return e[:, -1] / np.sum(e, axis=-1)
# Plot results curves.
fig = plt.figure(1, figsize=(18, 3))
fig.clf()
x = np.array(logged_iterations)
# Loss.
y_tr = losses_tr
y_ge = losses_ge
ax = fig.add_subplot(1, 3, 1)
ax.plot(x, y_tr, "k", label="Training")
ax.plot(x, y_ge, "k--", label="Test/generalization")
ax.set_title("Loss across training")
ax.set_xlabel("Training iteration")
ax.set_ylabel("Loss (binary cross-entropy)")
ax.legend()
# Correct.
y_tr = corrects_tr
y_ge = corrects_ge
ax = fig.add_subplot(1, 3, 2)
ax.plot(x, y_tr, "k", label="Training")
ax.plot(x, y_ge, "k--", label="Test/generalization")
ax.set_title("Fraction correct across training")
ax.set_xlabel("Training iteration")
ax.set_ylabel("Fraction nodes/edges correct")
# Solved.
y_tr = solveds_tr
y_ge = solveds_ge
ax = fig.add_subplot(1, 3, 3)
ax.plot(x, y_tr, "k", label="Training")
ax.plot(x, y_ge, "k--", label="Test/generalization")
ax.set_title("Fraction solved across training")
ax.set_xlabel("Training iteration")
ax.set_ylabel("Fraction examples solved")
# Plot graphs and results after each processing step.
# The white node is the start, and the black is the end. Other nodes are colored
# from red to purple to blue, where red means the model is confident the node is
# off the shortest path, blue means the model is confident the node is on the
# shortest path, and purplish colors mean the model isn't sure.
max_graphs_to_plot = 6
num_steps_to_plot = 4
node_size = 120
min_c = 0.3
num_graphs = len(raw_graphs)
targets = utils_np.graphs_tuple_to_data_dicts(test_values["target"])
step_indices = np.floor(
np.linspace(0, num_processing_steps_ge - 1,
num_steps_to_plot)).astype(int).tolist()
outputs = list(
zip(*(utils_np.graphs_tuple_to_data_dicts(test_values["outputs"][i])
for i in step_indices)))
h = min(num_graphs, max_graphs_to_plot)
w = num_steps_to_plot + 1
fig = plt.figure(101, figsize=(18, h * 3))
fig.clf()
ncs = []
for j, (graph, target, output) in enumerate(zip(raw_graphs, targets, outputs)):
if j >= h:
break
pos = get_node_dict(graph, "pos")
ground_truth = target["nodes"][:, -1]
# Ground truth.
iax = j * (1 + num_steps_to_plot) + 1
ax = fig.add_subplot(h, w, iax)
plotter = GraphPlotter(ax, graph, pos)
color = {}
for i, n in enumerate(plotter.nodes):
color[n] = np.array([1.0 - ground_truth[i], 0.0, ground_truth[i], 1.0
]) * (1.0 - min_c) + min_c
plotter.draw_graph_with_solution(node_size=node_size, node_color=color)
ax.set_axis_on()
ax.set_xticks([])
ax.set_yticks([])
try:
ax.set_facecolor([0.9] * 3 + [1.0])
except AttributeError:
ax.set_axis_bgcolor([0.9] * 3 + [1.0])
ax.grid(None)
ax.set_title("Ground truth\nSolution length: {}".format(
plotter.solution_length))
# Prediction.
for k, outp in enumerate(output):
iax = j * (1 + num_steps_to_plot) + 2 + k
ax = fig.add_subplot(h, w, iax)
plotter = GraphPlotter(ax, graph, pos)
color = {}
prob = softmax_prob_last_dim(outp["nodes"])
for i, n in enumerate(plotter.nodes):
color[n] = np.array([1.0 - prob[n], 0.0, prob[n], 1.0
]) * (1.0 - min_c) + min_c
plotter.draw_graph_with_solution(node_size=node_size, node_color=color)
ax.set_title("Model-predicted\nStep {:02d} / {:02d}".format(
step_indices[k] + 1, step_indices[-1] + 1))