# Data Science Course

## Code 2 : Convolutional Neural Networks for Graph-Structured Data

Implementation of CNNs for graph-structured data :
Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering
M Defferrard, X Bresson, P Vandergheynst
arXiv preprint arXiv:1606.09375
NIPS 2016

``````

In [1]:

import tensorflow as tf
import numpy as np
import time
import collections
import shutil
import os
import sklearn

``````
``````

In [3]:

# Import functions in lib folder
import sys
sys.path.insert(0, 'lib/')

%aimport graph
%aimport coarsening

``````
``````

``````

# MNIST Data

``````

In [4]:

flags = tf.app.flags # tf.app = a wrapper python-gflags
FLAGS = flags.FLAGS

# Data folder
flags.DEFINE_string('dir_data', 'datasets', 'Directory to store data')
flags.DEFINE_float('learning_rate', 0.2, 'Initial learning rate.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_float('regularization', 0.0, 'L2 regularizations of weights and biases.')
flags.DEFINE_float('dropout', 1.0, 'Dropout')

# Graphs
flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')
flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')
# TODO: change cgcnn for combinatorial Laplacians.
flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')
flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')

``````
``````

In [5]:

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False) # load data in local folder

``````
``````

Extracting datasets/train-images-idx3-ubyte.gz
Extracting datasets/train-labels-idx1-ubyte.gz
Extracting datasets/t10k-images-idx3-ubyte.gz
Extracting datasets/t10k-labels-idx1-ubyte.gz

``````
``````

In [6]:

train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels
print(train_data.shape)
print(train_labels.shape)
print(val_data.shape)
print(val_labels.shape)
print(test_data.shape)
print(test_labels.shape)

``````
``````

(55000, 784)
(55000,)
(5000, 784)
(5000,)
(10000, 784)
(10000,)

``````

# Import Graph

Data lie on a network/graph

``````

In [7]:

# Graph of Euclidean grid
def grid_graph(m, corners=False):
z = graph.grid(m)
dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric)
A = graph.adjacency(dist, idx)

# Connections are only vertical or horizontal on the grid.
# Corner vertices are connected to 2 neightbors only.
if corners:
import scipy.sparse
A = A.toarray()
A[A < A.max()/1.5] = 0
A = scipy.sparse.csr_matrix(A)
print('{} edges'.format(A.nnz))

print("{} > {} edges".format(A.nnz//2, FLAGS.number_edges*m**2//2))
return A

``````
``````

In [8]:

# Coarsening with Metis-like technique
def coarsen(A, levels):
graphs, parents = coarsening.metis(A, levels)
perms = coarsening.compute_perm(parents)

laplacians = []
for i,A in enumerate(graphs):
M, M = A.shape

# No self-connections.
if True:
A = A.tocoo()
A.setdiag(0)

if i < levels:
A = coarsening.perm_adjacency(A, perms[i])

A = A.tocsr()
A.eliminate_zeros()
Mnew, Mnew = A.shape
print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))

L = graph.laplacian(A, normalized=FLAGS.normalized_laplacian)
laplacians.append(L)
return laplacians, perms[0] if len(perms) > 0 else None

``````
``````

In [9]:

# Construct graph
t_start = time.process_time()
A = grid_graph(28, corners=False)
#A = graph.replace_random_edges(A, 0)

# Compute coarsened graphs
L, perm = coarsen(A, FLAGS.coarsening_levels)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))

# Compute max eigenvalue of graph Laplacians
lmax = []
lmax.append(graph.lmaxX(L[0]))
lmax.append(graph.lmaxX(L[1]))
lmax.append(graph.lmaxX(L[2]))
lmax.append(graph.lmaxX(L[3]))
#print(lmax[0],lmax[1],lmax[2],lmax[3])

``````
``````

3198 > 3136 edges
Layer 0: M_0 = |V| = 1056 nodes (272 added), |E| = 3198 edges
Layer 1: M_1 = |V| = 528 nodes (112 added), |E| = 1428 edges
Layer 2: M_2 = |V| = 264 nodes (42 added), |E| = 692 edges
Layer 3: M_3 = |V| = 132 nodes (15 added), |E| = 331 edges
Layer 4: M_4 = |V| = 66 nodes (0 added), |E| = 172 edges
Execution time: 0.14s

``````
``````

In [11]:

# Reindex nodes to satisfy a binary tree structure
t_start = time.process_time()
train_data = coarsening.perm_data(train_data, perm)
val_data = coarsening.perm_data(val_data, perm)
test_data = coarsening.perm_data(test_data, perm)
print(train_data.shape)
print(val_data.shape)
print(test_data.shape)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
#del perm

``````
``````

(55000, 1056)
(5000, 1056)
(10000, 1056)
Execution time: 1.59s

``````

# Neural networks

``````

In [12]:

# Define generic class of Neural Networks
class base_model(object):

# Constructor
def __init__(self):
self.regularizers = 0 # L2 regularizers

# Private methods
def _weight_variable(self, shape, regularization=True):
initial = tf.truncated_normal(shape, stddev=0.1)
var = tf.Variable(initial, name='weights')
if regularization:
self.regularizers += tf.nn.l2_loss(var)
#tf.histogram_summary(var.op.name, var)
return var

def _weight_variable_spectral(self, shape, regularization=True):
initial = tf.truncated_normal(shape, stddev=0.1)
#initial = tf.abs(initial) # XAV
var = tf.Variable(initial, name='weights')
if regularization:
self.regularizers += tf.nn.l2_loss(var)
#tf.histogram_summary(var.op.name, var)
return var

def _bias_variable(self, shape, regularization=True):
initial = tf.constant(0.1, shape=shape)
var = tf.Variable(initial, name='bias')
if regularization:
self.regularizers += tf.nn.l2_loss(var)
#tf.histogram_summary(var.op.name, var)
return var

# Public methods
def loss(self, logits, labels, regularization):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
loss += regularization * self.regularizers
#tf.scalar_summary('loss', loss) # Tensorboard
return loss

# Optimization
def training(self, loss, learning_rate, train_size, batch_size):
# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
0.01,                # Base learning rate.
batch * batch_size,  # Current index into the dataset.
train_size,          # Decay step.
0.95,                # Decay rate.
staircase=True)
# Use simple momentum for the optimization.
optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
train_op = optimizer.minimize(loss, global_step=batch)
return train_op

def evaluation(self, logits, labels):
output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
acc = 100.* tf.reduce_sum(tf.cast(tf.equal(output_classes,labels), tf.float32))/ tf.cast(tf.shape(logits)[0], tf.float32)
return acc

def prediction(self, logits):
"""Return the predicted classes."""
output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
return output_classes

def mpool1(self, x, p):
"""Max pooling of size p. Should be a power of 2."""
if p > 1:
x = tf.expand_dims(x, 3)  # N x M x F x 1
x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')
return tf.squeeze(x, [3])  # N x M/p x F
else:
return x

def chebyshev5(self, x, L, lmax, Fout, K, W):
N, M, Fin = x.get_shape()
N, M, Fin = int(N), int(M), int(Fin)
L = graph.rescale_L(L, lmax)
L = L.tocoo()
indices = np.column_stack((L.row, L.col))
L = tf.SparseTensor(indices, L.data, L.shape)
L = tf.sparse_reorder(L)
# Transform to Chebyshev basis
x0 = tf.transpose(x, perm=[1, 2, 0])  # M x Fin x N
x0 = tf.reshape(x0, [M, Fin*N])  # M x Fin*N
x = tf.expand_dims(x0, 0)  # 1 x M x Fin*N
def concat(x, x_):
x_ = tf.expand_dims(x_, 0)  # 1 x M x Fin*N
return tf.concat(0, [x, x_])  # K x M x Fin*N
if K > 1:
x1 = tf.sparse_tensor_dense_matmul(L, x0)
x = concat(x, x1)
for k in range(2, K):
x2 = 2 * tf.sparse_tensor_dense_matmul(L, x1) - x0  # M x Fin*N
x = concat(x, x2)
x0, x1 = x1, x2
x = tf.reshape(x, [K, M, Fin, N])  # K x M x Fin x N
x = tf.transpose(x, perm=[3,1,2,0])  # N x M x Fin x K
x = tf.reshape(x, [N*M, Fin*K])  # N*M x Fin*K
# Filter: Fin*Fout filters of order K, i.e. one filterbank per feature.
x = tf.matmul(x, W)  # N*M x Fout
out = tf.reshape(x, [N, M, Fout])  # N x M x Fout
return out

``````

# Simple Test: CL32-MP4-FC10

``````

In [13]:

F1 = 32 # Number of features
K1 = 20 # Spatial support = polynomial order
NCLASSES = 10 # Number of classes
M0 = L[0].shape[0]

class GCNN_1CL_1MP_1FC(base_model):

def __init__(self):

print('Graph CNN Architecture: 1CL+1MP+1FC')
super().__init__()
self.W1 = self._weight_variable_spectral([K1, F1], regularization=False)
self.b1 = self._bias_variable([1, 1, F1], regularization=False)
self.W2 = self._weight_variable([int(M0/4*F1), NCLASSES], regularization=True)
self.b2 = self._bias_variable([NCLASSES], regularization=True)

def inference(self, x, L, lmax, d):

# Graph convolutional layers
layer_name = 'CN32'
with tf.name_scope(layer_name):
# Graph filtering
x = tf.expand_dims(x, 2)  # N x M x F=1
x = self.chebyshev5(x, L[0], lmax[0], F1, K1, self.W1)
x = x + self.b1
# Non-linear activation
x = tf.nn.relu(x)

# Max pooling
layer_name = 'MP4'
with tf.name_scope(layer_name):
x = self.mpool1(x,4)
# Dropout
x = tf.nn.dropout(x, d)

# Fully connected hidden layers
layer_name = 'FC10'
with tf.name_scope(layer_name):
N, M, F = x.get_shape()
x = tf.reshape(x, [int(N), int(M0/4*F1)])  # N x M
x = tf.matmul(x, self.W2) + self.b2

return x

``````

# LeNet5: CL32-MP4-CL64-MP4-FC512-FC10

``````

In [14]:

F1 = 32 # Number of features of 1st CL layer
K1 = 20 # Spatial support = polynomial order of 1st CL layer
F2 = 64 # Number of features of 2nd CL layer
K2 = 20 # Spatial support = polynomial order of 2nd CL layer
NFC = 512 # Number of nodes for 1st FC layer
NCLASSES = 10 # Number of classes
M0 = L[0].shape[0]

class GCNN_LeNet5(base_model):

def __init__(self):

print('Graph CNN Architecture: LeNet5')
#print(K1,K2)
super().__init__()
self.W1 = self._weight_variable_spectral([K1, F1], regularization=False)
self.b1 = self._bias_variable([1, 1, F1], regularization=False)
self.W2 = self._weight_variable_spectral([F1*K1, F2], regularization=False)
self.b2 = self._bias_variable([1, 1, F2], regularization=False)
self.W3 = self._weight_variable([int(M0/16*F2), NFC], regularization=True)
self.b3 = self._bias_variable([NFC], regularization=True)
self.W4 = self._weight_variable([NFC, NCLASSES], regularization=True)
self.b4 = self._bias_variable([NCLASSES], regularization=True)

def inference(self, x, L, lmax, d):

# Graph convolutional layers
layer_name = 'CN32'
with tf.name_scope(layer_name):
# Graph filtering
x = tf.expand_dims(x, 2)  # N x M x F=1
x = self.chebyshev5(x, L[0], lmax[0], F1, K1, self.W1)
x = x + self.b1
# Non-linear activation
x = tf.nn.relu(x)

# Max pooling
layer_name = 'MP4'
with tf.name_scope(layer_name):
x = self.mpool1(x,4)

# Graph convolutional layer
layer_name = 'CN64'
with tf.name_scope(layer_name):
# Graph filtering
x = self.chebyshev5(x, L[2], lmax[2], F2, K2, self.W2)
x = x + self.b2
# Non-linear activation
x = tf.nn.relu(x)

# Max pooling
layer_name = 'MP4'
with tf.name_scope(layer_name):
x = self.mpool1(x,4)

# Fully connected hidden layer
layer_name = 'FC512'
with tf.name_scope(layer_name):
N, M, F = x.get_shape()
x = tf.reshape(x, [int(N), int(M0/16*F2)])  # N x M_0/4*F2
x = tf.matmul(x, self.W3) + self.b3
x = tf.nn.relu(x)
# Dropout
x = tf.nn.dropout(x, d)

layer_name = 'FC10'
with tf.name_scope(layer_name):
x = tf.matmul(x, self.W4) + self.b4

return x

``````

# Select NN model

``````

In [24]:

# Comment/uncomment
NN_1Layer=False
NN_1Layer=True
if NN_1Layer==True:
model = GCNN_1CL_1MP_1FC()
FLAGS.learning_rate = 0.05
FLAGS.regularization = 5e-2
FLAGS.dropout = 0.75
print(FLAGS.learning_rate,FLAGS.regularization,FLAGS.dropout)

``````
``````

CNN Architecture: 1CL+1MP+1FC
0.05 0.05 0.75

``````
``````

In [26]:

# Comment/uncomment
NN_LeNet5=False
#NN_LeNet5=True
if NN_LeNet5==True:
model = GCNN_LeNet5()
FLAGS.learning_rate = 0.5
FLAGS.regularization = 5e-4
FLAGS.dropout = 0.5
print(FLAGS.learning_rate,FLAGS.regularization,FLAGS.dropout)

``````
``````

In [27]:

# Parameters
num_epochs = 10
num_epochs = 2 # Early stop
train_size = train_data.shape[0]
nb_iter = int(num_epochs * train_size) // FLAGS.batch_size
print('num_epochs=',num_epochs,', train_size=',train_size,', nb_iter=',nb_iter)

# Construct computational graph
x = tf.placeholder(tf.float32, (FLAGS.batch_size, L[0].shape[0]))
y = tf.placeholder(tf.int32, (FLAGS.batch_size))
d = tf.placeholder(tf.float32)
logits = model.inference(x,L,lmax,d)
loss = model.loss(logits, y, FLAGS.regularization)
train_op = model.training(loss, FLAGS.learning_rate, train_size, FLAGS.batch_size)
evaluation = model.evaluation(logits, y)
prediction = model.prediction(logits)

``````
``````

num_epochs= 2 , train_size= 55000 , nb_iter= 1100

``````
``````

In [28]:

# Faster test evaluation
test_data_evaluation = test_data
test_labels_evaluation = test_labels
if 1==1:
print(test_data.shape)
nb_selected = 1200
nb_selected = 200
test_data_evaluation = test_data[:nb_selected,:]
test_labels_evaluation = test_labels[:nb_selected]
print(test_data_evaluation.shape)

``````
``````

(10000, 1056)
(200, 1056)

``````
``````

In [29]:

# Train
init = tf.initialize_all_variables()
sess = tf.Session()

# TensorFlow
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
writer = tf.train.SummaryWriter('tmp/mnist_logs' + '/run2', sess.graph)
op_summary = tf.merge_all_summaries()

# Start
sess.run(init)
indices = collections.deque()
tab_results = []
tab_last_epoch = []
start_last_epoch = nb_iter - train_size // FLAGS.batch_size
nb_samples_last_epoch = 25
freq_save_last_epoch = int(train_size // FLAGS.batch_size // (nb_samples_last_epoch-1))
acc_train = -1.0
loss_train = -1.0
print('num_epochs=',num_epochs,', nb_iter=',nb_iter)
t_start = time.process_time()
for i in range(nb_iter):

# Computational time
freq_iter = 10
if (i%freq_iter==0) & (i<=freq_iter):
print('iter={:d}, freq_iter={:d}, training time: {:.2f}s, acc_train={:2.2f}, loss_train={:2.2f}'
.format(i,freq_iter,time.process_time() - t_start,acc_train,loss_train))
t_start = time.process_time()

# Generic batch extraction
if len(indices) < FLAGS.batch_size:
indices.extend(np.random.permutation(train_data.shape[0])) # rand permutation
idx = [indices.popleft() for i in range(FLAGS.batch_size)] # extract batch_size data
batch_xs, batch_ys = train_data[idx,:], train_labels[idx]
if type(batch_xs) is not np.ndarray:
batch_xs = batch_xs.toarray()  # convert to full matrices if sparse

# Run computational graph for weight learning
_,acc_train,loss_train = sess.run([train_op,evaluation,loss], feed_dict={x: batch_xs, y: batch_ys, d: FLAGS.dropout})

# Display, save results
if (i+1)%100==0:

t_start_testset = time.process_time()

# Compute test accuracy
data = test_data_evaluation
size = data.shape[0]
predictions = np.zeros(size, dtype='int32')
predictions_nodropout = np.zeros(size, dtype='int32')
for begin in range(0, size, FLAGS.batch_size):
end = begin + FLAGS.batch_size
end = min([end, size])
batch_data = np.zeros((FLAGS.batch_size, data.shape[1]))
tmp_data = data[begin:end,:]
if type(tmp_data) is not np.ndarray:
tmp_data = tmp_data.toarray()
batch_data[:end-begin] = tmp_data
batch_label = np.zeros((FLAGS.batch_size))
batch_pred = np.zeros((FLAGS.batch_size))
batch_pred = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: FLAGS.dropout}) # for test set, comment to speedup computations
predictions[begin:end] = batch_pred[:end-begin]
batch_pred_nodropout = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: 1.0})
predictions_nodropout[begin:end] = batch_pred_nodropout[:end-begin]
acc_test = 100.* float( sum(predictions == test_labels_evaluation) ) / float(size)
acc_test_nodropout = 100.* float( sum(predictions_nodropout == test_labels_evaluation) ) / float(size)

t_testset = time.process_time() - t_start_testset

print('iter={:d}, acc_train={:2.2f}, loss_train={:2.2f}, acc_test={:2.2f}, acc_test_nodropout={:2.2f}, test time={:.2f}s'
.format(i+1,acc_train,loss_train,acc_test,acc_test_nodropout,t_testset))

# Summaries for TensorBoard.
acc_train *= 1.0
acc_test *= 1.0
acc_test_nodropout *= 1.0
summary = tf.Summary()

# Save accuracy for last batch
data = test_data_evaluation
size = data.shape[0]
predictions_nodropout = np.zeros(size, dtype='int32')
for begin in range(0, size, FLAGS.batch_size):
end = begin + FLAGS.batch_size
end = min([end, size])
batch_data = np.zeros((FLAGS.batch_size, data.shape[1]))
tmp_data = data[begin:end,:]
if type(tmp_data) is not np.ndarray:
tmp_data = tmp_data.toarray()  # convert sparse matrices
batch_data[:end-begin] = tmp_data
batch_label = np.zeros((FLAGS.batch_size))
batch_pred = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: FLAGS.dropout})
predictions[begin:end] = batch_pred[:end-begin]
batch_pred_nodropout = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: 1.0})
predictions_nodropout[begin:end] = batch_pred_nodropout[:end-begin]
acc_test_nodropout = 100.* float( sum(predictions_nodropout == test_labels_evaluation) ) / float(size)
print('final accuracy=',acc_test_nodropout)

writer.close()

print('Training time: {:.2f}s'.format(time.process_time() - t_start))

``````
``````

num_epochs= 2 , nb_iter= 1100
iter=0, freq_iter=10, training time: 0.00s, acc_train=-1.00, loss_train=-1.00
iter=10, freq_iter=10, training time: 6.13s, acc_train=15.00, loss_train=18.09
iter=100, acc_train=87.00, loss_train=6.96, acc_test=86.00, acc_test_nodropout=89.50, test time=1.52s
iter=200, acc_train=83.00, loss_train=2.80, acc_test=92.00, acc_test_nodropout=92.50, test time=1.45s
iter=300, acc_train=89.00, loss_train=1.31, acc_test=90.50, acc_test_nodropout=92.00, test time=1.55s
iter=400, acc_train=87.00, loss_train=0.90, acc_test=90.50, acc_test_nodropout=91.50, test time=1.42s
iter=500, acc_train=87.00, loss_train=0.63, acc_test=88.00, acc_test_nodropout=90.00, test time=1.50s
iter=600, acc_train=84.00, loss_train=0.57, acc_test=89.00, acc_test_nodropout=93.00, test time=1.43s
iter=700, acc_train=86.00, loss_train=0.62, acc_test=91.00, acc_test_nodropout=91.00, test time=1.52s
iter=800, acc_train=88.00, loss_train=0.68, acc_test=88.00, acc_test_nodropout=89.00, test time=1.46s
iter=900, acc_train=90.00, loss_train=0.44, acc_test=92.50, acc_test_nodropout=92.00, test time=1.44s
iter=1000, acc_train=94.00, loss_train=0.35, acc_test=90.50, acc_test_nodropout=90.00, test time=1.42s
iter=1100, acc_train=91.00, loss_train=0.48, acc_test=91.50, acc_test_nodropout=91.50, test time=1.49s
final accuracy= 91.5
Training time: 679.35s

``````
``````

In [ ]:

``````