Implementation of CNNs for graph-structured data :
Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering
M Defferrard, X Bresson, P Vandergheynst
arXiv preprint arXiv:1606.09375
NIPS 2016
In [1]:
import tensorflow as tf
import numpy as np
import time
import collections
import shutil
import os
import sklearn
In [3]:
# Auto-reloading external modules
%load_ext autoreload
%autoreload 1
# Import functions in lib folder
import sys
sys.path.insert(0, 'lib/')
%aimport graph
%aimport coarsening
In [4]:
flags = tf.app.flags # tf.app = a wrapper python-gflags
FLAGS = flags.FLAGS
# Data folder
flags.DEFINE_string('dir_data', 'datasets', 'Directory to store data')
flags.DEFINE_float('learning_rate', 0.2, 'Initial learning rate.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_float('regularization', 0.0, 'L2 regularizations of weights and biases.')
flags.DEFINE_float('dropout', 1.0, 'Dropout')
# Graphs
flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')
flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')
# TODO: change cgcnn for combinatorial Laplacians.
flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')
flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')
In [5]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False) # load data in local folder
In [6]:
train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels
print(train_data.shape)
print(train_labels.shape)
print(val_data.shape)
print(val_labels.shape)
print(test_data.shape)
print(test_labels.shape)
In [7]:
# Graph of Euclidean grid
def grid_graph(m, corners=False):
z = graph.grid(m)
dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric)
A = graph.adjacency(dist, idx)
# Connections are only vertical or horizontal on the grid.
# Corner vertices are connected to 2 neightbors only.
if corners:
import scipy.sparse
A = A.toarray()
A[A < A.max()/1.5] = 0
A = scipy.sparse.csr_matrix(A)
print('{} edges'.format(A.nnz))
print("{} > {} edges".format(A.nnz//2, FLAGS.number_edges*m**2//2))
return A
In [8]:
# Coarsening with Metis-like technique
def coarsen(A, levels):
graphs, parents = coarsening.metis(A, levels)
perms = coarsening.compute_perm(parents)
laplacians = []
for i,A in enumerate(graphs):
M, M = A.shape
# No self-connections.
if True:
A = A.tocoo()
A.setdiag(0)
if i < levels:
A = coarsening.perm_adjacency(A, perms[i])
A = A.tocsr()
A.eliminate_zeros()
Mnew, Mnew = A.shape
print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))
L = graph.laplacian(A, normalized=FLAGS.normalized_laplacian)
laplacians.append(L)
return laplacians, perms[0] if len(perms) > 0 else None
In [9]:
# Construct graph
t_start = time.process_time()
A = grid_graph(28, corners=False)
#A = graph.replace_random_edges(A, 0)
# Compute coarsened graphs
L, perm = coarsen(A, FLAGS.coarsening_levels)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
# Compute max eigenvalue of graph Laplacians
lmax = []
lmax.append(graph.lmaxX(L[0]))
lmax.append(graph.lmaxX(L[1]))
lmax.append(graph.lmaxX(L[2]))
lmax.append(graph.lmaxX(L[3]))
#print(lmax[0],lmax[1],lmax[2],lmax[3])
In [11]:
# Reindex nodes to satisfy a binary tree structure
t_start = time.process_time()
train_data = coarsening.perm_data(train_data, perm)
val_data = coarsening.perm_data(val_data, perm)
test_data = coarsening.perm_data(test_data, perm)
print(train_data.shape)
print(val_data.shape)
print(test_data.shape)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
#del perm
In [12]:
# Define generic class of Neural Networks
class base_model(object):
# Constructor
def __init__(self):
self.regularizers = 0 # L2 regularizers
# Private methods
def _weight_variable(self, shape, regularization=True):
initial = tf.truncated_normal(shape, stddev=0.1)
var = tf.Variable(initial, name='weights')
if regularization:
self.regularizers += tf.nn.l2_loss(var)
#tf.histogram_summary(var.op.name, var)
return var
def _weight_variable_spectral(self, shape, regularization=True):
initial = tf.truncated_normal(shape, stddev=0.1)
#initial = tf.abs(initial) # XAV
var = tf.Variable(initial, name='weights')
if regularization:
self.regularizers += tf.nn.l2_loss(var)
#tf.histogram_summary(var.op.name, var)
return var
def _bias_variable(self, shape, regularization=True):
initial = tf.constant(0.1, shape=shape)
var = tf.Variable(initial, name='bias')
if regularization:
self.regularizers += tf.nn.l2_loss(var)
#tf.histogram_summary(var.op.name, var)
return var
# Public methods
def loss(self, logits, labels, regularization):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
loss += regularization * self.regularizers
#tf.scalar_summary('loss', loss) # Tensorboard
return loss
# Optimization
def training(self, loss, learning_rate, train_size, batch_size):
# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
0.01, # Base learning rate.
batch * batch_size, # Current index into the dataset.
train_size, # Decay step.
0.95, # Decay rate.
staircase=True)
# Use simple momentum for the optimization.
optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
train_op = optimizer.minimize(loss, global_step=batch)
return train_op
def evaluation(self, logits, labels):
output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
acc = 100.* tf.reduce_sum(tf.cast(tf.equal(output_classes,labels), tf.float32))/ tf.cast(tf.shape(logits)[0], tf.float32)
return acc
def prediction(self, logits):
"""Return the predicted classes."""
output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
return output_classes
def mpool1(self, x, p):
"""Max pooling of size p. Should be a power of 2."""
if p > 1:
x = tf.expand_dims(x, 3) # N x M x F x 1
x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')
return tf.squeeze(x, [3]) # N x M/p x F
else:
return x
def chebyshev5(self, x, L, lmax, Fout, K, W):
N, M, Fin = x.get_shape()
N, M, Fin = int(N), int(M), int(Fin)
L = graph.rescale_L(L, lmax)
L = L.tocoo()
indices = np.column_stack((L.row, L.col))
L = tf.SparseTensor(indices, L.data, L.shape)
L = tf.sparse_reorder(L)
# Transform to Chebyshev basis
x0 = tf.transpose(x, perm=[1, 2, 0]) # M x Fin x N
x0 = tf.reshape(x0, [M, Fin*N]) # M x Fin*N
x = tf.expand_dims(x0, 0) # 1 x M x Fin*N
def concat(x, x_):
x_ = tf.expand_dims(x_, 0) # 1 x M x Fin*N
return tf.concat(0, [x, x_]) # K x M x Fin*N
if K > 1:
x1 = tf.sparse_tensor_dense_matmul(L, x0)
x = concat(x, x1)
for k in range(2, K):
x2 = 2 * tf.sparse_tensor_dense_matmul(L, x1) - x0 # M x Fin*N
x = concat(x, x2)
x0, x1 = x1, x2
x = tf.reshape(x, [K, M, Fin, N]) # K x M x Fin x N
x = tf.transpose(x, perm=[3,1,2,0]) # N x M x Fin x K
x = tf.reshape(x, [N*M, Fin*K]) # N*M x Fin*K
# Filter: Fin*Fout filters of order K, i.e. one filterbank per feature.
x = tf.matmul(x, W) # N*M x Fout
out = tf.reshape(x, [N, M, Fout]) # N x M x Fout
return out
In [13]:
F1 = 32 # Number of features
K1 = 20 # Spatial support = polynomial order
NCLASSES = 10 # Number of classes
M0 = L[0].shape[0]
class GCNN_1CL_1MP_1FC(base_model):
def __init__(self):
print('Graph CNN Architecture: 1CL+1MP+1FC')
super().__init__()
self.W1 = self._weight_variable_spectral([K1, F1], regularization=False)
self.b1 = self._bias_variable([1, 1, F1], regularization=False)
self.W2 = self._weight_variable([int(M0/4*F1), NCLASSES], regularization=True)
self.b2 = self._bias_variable([NCLASSES], regularization=True)
def inference(self, x, L, lmax, d):
# Graph convolutional layers
layer_name = 'CN32'
with tf.name_scope(layer_name):
# Graph filtering
x = tf.expand_dims(x, 2) # N x M x F=1
x = self.chebyshev5(x, L[0], lmax[0], F1, K1, self.W1)
x = x + self.b1
# Non-linear activation
x = tf.nn.relu(x)
# Max pooling
layer_name = 'MP4'
with tf.name_scope(layer_name):
x = self.mpool1(x,4)
# Dropout
x = tf.nn.dropout(x, d)
# Fully connected hidden layers
layer_name = 'FC10'
with tf.name_scope(layer_name):
N, M, F = x.get_shape()
x = tf.reshape(x, [int(N), int(M0/4*F1)]) # N x M
x = tf.matmul(x, self.W2) + self.b2
return x
In [14]:
F1 = 32 # Number of features of 1st CL layer
K1 = 20 # Spatial support = polynomial order of 1st CL layer
F2 = 64 # Number of features of 2nd CL layer
K2 = 20 # Spatial support = polynomial order of 2nd CL layer
NFC = 512 # Number of nodes for 1st FC layer
NCLASSES = 10 # Number of classes
M0 = L[0].shape[0]
class GCNN_LeNet5(base_model):
def __init__(self):
print('Graph CNN Architecture: LeNet5')
#print(K1,K2)
super().__init__()
self.W1 = self._weight_variable_spectral([K1, F1], regularization=False)
self.b1 = self._bias_variable([1, 1, F1], regularization=False)
self.W2 = self._weight_variable_spectral([F1*K1, F2], regularization=False)
self.b2 = self._bias_variable([1, 1, F2], regularization=False)
self.W3 = self._weight_variable([int(M0/16*F2), NFC], regularization=True)
self.b3 = self._bias_variable([NFC], regularization=True)
self.W4 = self._weight_variable([NFC, NCLASSES], regularization=True)
self.b4 = self._bias_variable([NCLASSES], regularization=True)
def inference(self, x, L, lmax, d):
# Graph convolutional layers
layer_name = 'CN32'
with tf.name_scope(layer_name):
# Graph filtering
x = tf.expand_dims(x, 2) # N x M x F=1
x = self.chebyshev5(x, L[0], lmax[0], F1, K1, self.W1)
x = x + self.b1
# Non-linear activation
x = tf.nn.relu(x)
# Max pooling
layer_name = 'MP4'
with tf.name_scope(layer_name):
x = self.mpool1(x,4)
# Graph convolutional layer
layer_name = 'CN64'
with tf.name_scope(layer_name):
# Graph filtering
x = self.chebyshev5(x, L[2], lmax[2], F2, K2, self.W2)
x = x + self.b2
# Non-linear activation
x = tf.nn.relu(x)
# Max pooling
layer_name = 'MP4'
with tf.name_scope(layer_name):
x = self.mpool1(x,4)
# Fully connected hidden layer
layer_name = 'FC512'
with tf.name_scope(layer_name):
N, M, F = x.get_shape()
x = tf.reshape(x, [int(N), int(M0/16*F2)]) # N x M_0/4*F2
x = tf.matmul(x, self.W3) + self.b3
x = tf.nn.relu(x)
# Dropout
x = tf.nn.dropout(x, d)
layer_name = 'FC10'
with tf.name_scope(layer_name):
x = tf.matmul(x, self.W4) + self.b4
return x
In [24]:
# Comment/uncomment
NN_1Layer=False
NN_1Layer=True
if NN_1Layer==True:
model = GCNN_1CL_1MP_1FC()
FLAGS.learning_rate = 0.05
FLAGS.regularization = 5e-2
FLAGS.dropout = 0.75
print(FLAGS.learning_rate,FLAGS.regularization,FLAGS.dropout)
In [26]:
# Comment/uncomment
NN_LeNet5=False
#NN_LeNet5=True
if NN_LeNet5==True:
model = GCNN_LeNet5()
FLAGS.learning_rate = 0.5
FLAGS.regularization = 5e-4
FLAGS.dropout = 0.5
print(FLAGS.learning_rate,FLAGS.regularization,FLAGS.dropout)
In [27]:
# Parameters
num_epochs = 10
num_epochs = 2 # Early stop
train_size = train_data.shape[0]
nb_iter = int(num_epochs * train_size) // FLAGS.batch_size
print('num_epochs=',num_epochs,', train_size=',train_size,', nb_iter=',nb_iter)
# Construct computational graph
x = tf.placeholder(tf.float32, (FLAGS.batch_size, L[0].shape[0]))
y = tf.placeholder(tf.int32, (FLAGS.batch_size))
d = tf.placeholder(tf.float32)
logits = model.inference(x,L,lmax,d)
loss = model.loss(logits, y, FLAGS.regularization)
train_op = model.training(loss, FLAGS.learning_rate, train_size, FLAGS.batch_size)
evaluation = model.evaluation(logits, y)
prediction = model.prediction(logits)
In [28]:
# Faster test evaluation
test_data_evaluation = test_data
test_labels_evaluation = test_labels
if 1==1:
print(test_data.shape)
nb_selected = 1200
nb_selected = 200
test_data_evaluation = test_data[:nb_selected,:]
test_labels_evaluation = test_labels[:nb_selected]
print(test_data_evaluation.shape)
In [29]:
# Train
init = tf.initialize_all_variables()
sess = tf.Session()
# TensorFlow
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
writer = tf.train.SummaryWriter('tmp/mnist_logs' + '/run2', sess.graph)
op_summary = tf.merge_all_summaries()
# Start
sess.run(init)
indices = collections.deque()
tab_results = []
tab_last_epoch = []
start_last_epoch = nb_iter - train_size // FLAGS.batch_size
nb_samples_last_epoch = 25
freq_save_last_epoch = int(train_size // FLAGS.batch_size // (nb_samples_last_epoch-1))
acc_train = -1.0
loss_train = -1.0
print('num_epochs=',num_epochs,', nb_iter=',nb_iter)
t_start = time.process_time()
for i in range(nb_iter):
# Computational time
freq_iter = 10
if (i%freq_iter==0) & (i<=freq_iter):
print('iter={:d}, freq_iter={:d}, training time: {:.2f}s, acc_train={:2.2f}, loss_train={:2.2f}'
.format(i,freq_iter,time.process_time() - t_start,acc_train,loss_train))
t_start = time.process_time()
# Generic batch extraction
if len(indices) < FLAGS.batch_size:
indices.extend(np.random.permutation(train_data.shape[0])) # rand permutation
idx = [indices.popleft() for i in range(FLAGS.batch_size)] # extract batch_size data
batch_xs, batch_ys = train_data[idx,:], train_labels[idx]
if type(batch_xs) is not np.ndarray:
batch_xs = batch_xs.toarray() # convert to full matrices if sparse
# Run computational graph for weight learning
_,acc_train,loss_train = sess.run([train_op,evaluation,loss], feed_dict={x: batch_xs, y: batch_ys, d: FLAGS.dropout})
# Display, save results
if (i+1)%100==0:
t_start_testset = time.process_time()
# Compute test accuracy
data = test_data_evaluation
size = data.shape[0]
predictions = np.zeros(size, dtype='int32')
predictions_nodropout = np.zeros(size, dtype='int32')
for begin in range(0, size, FLAGS.batch_size):
end = begin + FLAGS.batch_size
end = min([end, size])
batch_data = np.zeros((FLAGS.batch_size, data.shape[1]))
tmp_data = data[begin:end,:]
if type(tmp_data) is not np.ndarray:
tmp_data = tmp_data.toarray()
batch_data[:end-begin] = tmp_data
batch_label = np.zeros((FLAGS.batch_size))
batch_pred = np.zeros((FLAGS.batch_size))
batch_pred = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: FLAGS.dropout}) # for test set, comment to speedup computations
predictions[begin:end] = batch_pred[:end-begin]
batch_pred_nodropout = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: 1.0})
predictions_nodropout[begin:end] = batch_pred_nodropout[:end-begin]
acc_test = 100.* float( sum(predictions == test_labels_evaluation) ) / float(size)
acc_test_nodropout = 100.* float( sum(predictions_nodropout == test_labels_evaluation) ) / float(size)
t_testset = time.process_time() - t_start_testset
print('iter={:d}, acc_train={:2.2f}, loss_train={:2.2f}, acc_test={:2.2f}, acc_test_nodropout={:2.2f}, test time={:.2f}s'
.format(i+1,acc_train,loss_train,acc_test,acc_test_nodropout,t_testset))
# Summaries for TensorBoard.
acc_train *= 1.0
acc_test *= 1.0
acc_test_nodropout *= 1.0
summary = tf.Summary()
summary.value.add(tag='acc_train', simple_value=acc_train)
summary.value.add(tag='acc_test', simple_value=acc_test)
summary.value.add(tag='acc_test_nodropout', simple_value=acc_test_nodropout)
writer.add_summary(summary, i+1)
# Save accuracy for last batch
data = test_data_evaluation
size = data.shape[0]
predictions_nodropout = np.zeros(size, dtype='int32')
for begin in range(0, size, FLAGS.batch_size):
end = begin + FLAGS.batch_size
end = min([end, size])
batch_data = np.zeros((FLAGS.batch_size, data.shape[1]))
tmp_data = data[begin:end,:]
if type(tmp_data) is not np.ndarray:
tmp_data = tmp_data.toarray() # convert sparse matrices
batch_data[:end-begin] = tmp_data
batch_label = np.zeros((FLAGS.batch_size))
batch_pred = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: FLAGS.dropout})
predictions[begin:end] = batch_pred[:end-begin]
batch_pred_nodropout = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: 1.0})
predictions_nodropout[begin:end] = batch_pred_nodropout[:end-begin]
acc_test_nodropout = 100.* float( sum(predictions_nodropout == test_labels_evaluation) ) / float(size)
print('final accuracy=',acc_test_nodropout)
writer.close()
print('Training time: {:.2f}s'.format(time.process_time() - t_start))
In [ ]: