Data Science Course

      Xavier Bresson, Sept. 2016

Lecture 11 : Deep Learning 3 - Convolutional Neural Networks

Code 2 : Convolutional Neural Networks for Graph-Structured Data

Implementation of CNNs for graph-structured data :
Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering
M Defferrard, X Bresson, P Vandergheynst
arXiv preprint arXiv:1606.09375
NIPS 2016


In [1]:
import tensorflow as tf
import numpy as np
import time
import collections
import shutil
import os
import sklearn

In [3]:
# Auto-reloading external modules
%load_ext autoreload
%autoreload 1

# Import functions in lib folder
import sys
sys.path.insert(0, 'lib/')

%aimport graph
%aimport coarsening


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

MNIST Data


In [4]:
flags = tf.app.flags # tf.app = a wrapper python-gflags
FLAGS = flags.FLAGS

# Data folder
flags.DEFINE_string('dir_data', 'datasets', 'Directory to store data')
flags.DEFINE_float('learning_rate', 0.2, 'Initial learning rate.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.')
flags.DEFINE_float('regularization', 0.0, 'L2 regularizations of weights and biases.')
flags.DEFINE_float('dropout', 1.0, 'Dropout')

# Graphs
flags.DEFINE_integer('number_edges', 8, 'Graph: minimum number of edges per vertex.')
flags.DEFINE_string('metric', 'euclidean', 'Graph: similarity measure (between features).')
# TODO: change cgcnn for combinatorial Laplacians.
flags.DEFINE_bool('normalized_laplacian', True, 'Graph Laplacian: normalized.')
flags.DEFINE_integer('coarsening_levels', 4, 'Number of coarsened graphs.')

In [5]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets(FLAGS.dir_data, one_hot=False) # load data in local folder


Extracting datasets/train-images-idx3-ubyte.gz
Extracting datasets/train-labels-idx1-ubyte.gz
Extracting datasets/t10k-images-idx3-ubyte.gz
Extracting datasets/t10k-labels-idx1-ubyte.gz

In [6]:
train_data = mnist.train.images.astype(np.float32)
val_data = mnist.validation.images.astype(np.float32)
test_data = mnist.test.images.astype(np.float32)
train_labels = mnist.train.labels
val_labels = mnist.validation.labels
test_labels = mnist.test.labels
print(train_data.shape)
print(train_labels.shape)
print(val_data.shape)
print(val_labels.shape)
print(test_data.shape)
print(test_labels.shape)


(55000, 784)
(55000,)
(5000, 784)
(5000,)
(10000, 784)
(10000,)

Import Graph

Data lie on a network/graph


In [7]:
# Graph of Euclidean grid
def grid_graph(m, corners=False):
    z = graph.grid(m)
    dist, idx = graph.distance_sklearn_metrics(z, k=FLAGS.number_edges, metric=FLAGS.metric)
    A = graph.adjacency(dist, idx)

    # Connections are only vertical or horizontal on the grid.
    # Corner vertices are connected to 2 neightbors only.
    if corners:
        import scipy.sparse
        A = A.toarray()
        A[A < A.max()/1.5] = 0
        A = scipy.sparse.csr_matrix(A)
        print('{} edges'.format(A.nnz))

    print("{} > {} edges".format(A.nnz//2, FLAGS.number_edges*m**2//2))
    return A

In [8]:
# Coarsening with Metis-like technique
def coarsen(A, levels):
    graphs, parents = coarsening.metis(A, levels)
    perms = coarsening.compute_perm(parents)

    laplacians = []
    for i,A in enumerate(graphs):
        M, M = A.shape

        # No self-connections.
        if True:
            A = A.tocoo()
            A.setdiag(0)

        if i < levels:
            A = coarsening.perm_adjacency(A, perms[i])

        A = A.tocsr()
        A.eliminate_zeros()
        Mnew, Mnew = A.shape
        print('Layer {0}: M_{0} = |V| = {1} nodes ({2} added), |E| = {3} edges'.format(i, Mnew, Mnew-M, A.nnz//2))

        L = graph.laplacian(A, normalized=FLAGS.normalized_laplacian)
        laplacians.append(L)
    return laplacians, perms[0] if len(perms) > 0 else None

In [9]:
# Construct graph
t_start = time.process_time()
A = grid_graph(28, corners=False)
#A = graph.replace_random_edges(A, 0)

# Compute coarsened graphs
L, perm = coarsen(A, FLAGS.coarsening_levels)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))

# Compute max eigenvalue of graph Laplacians
lmax = []
lmax.append(graph.lmaxX(L[0]))
lmax.append(graph.lmaxX(L[1]))
lmax.append(graph.lmaxX(L[2]))
lmax.append(graph.lmaxX(L[3]))
#print(lmax[0],lmax[1],lmax[2],lmax[3])


3198 > 3136 edges
Layer 0: M_0 = |V| = 1056 nodes (272 added), |E| = 3198 edges
Layer 1: M_1 = |V| = 528 nodes (112 added), |E| = 1428 edges
Layer 2: M_2 = |V| = 264 nodes (42 added), |E| = 692 edges
Layer 3: M_3 = |V| = 132 nodes (15 added), |E| = 331 edges
Layer 4: M_4 = |V| = 66 nodes (0 added), |E| = 172 edges
Execution time: 0.14s

In [11]:
# Reindex nodes to satisfy a binary tree structure
t_start = time.process_time()
train_data = coarsening.perm_data(train_data, perm)
val_data = coarsening.perm_data(val_data, perm)
test_data = coarsening.perm_data(test_data, perm)
print(train_data.shape)
print(val_data.shape)
print(test_data.shape)
print('Execution time: {:.2f}s'.format(time.process_time() - t_start))
#del perm


(55000, 1056)
(5000, 1056)
(10000, 1056)
Execution time: 1.59s

Neural networks


In [12]:
# Define generic class of Neural Networks
class base_model(object):
    
    # Constructor
    def __init__(self):
        self.regularizers = 0 # L2 regularizers
      
    # Private methods
    def _weight_variable(self, shape, regularization=True):
        initial = tf.truncated_normal(shape, stddev=0.1) 
        var = tf.Variable(initial, name='weights')
        if regularization:
            self.regularizers += tf.nn.l2_loss(var)
        #tf.histogram_summary(var.op.name, var)
        return var

    def _weight_variable_spectral(self, shape, regularization=True):
        initial = tf.truncated_normal(shape, stddev=0.1) 
        #initial = tf.abs(initial) # XAV
        var = tf.Variable(initial, name='weights')
        if regularization:
            self.regularizers += tf.nn.l2_loss(var)
        #tf.histogram_summary(var.op.name, var)
        return var

    def _bias_variable(self, shape, regularization=True):
        initial = tf.constant(0.1, shape=shape)
        var = tf.Variable(initial, name='bias')
        if regularization:
            self.regularizers += tf.nn.l2_loss(var)
        #tf.histogram_summary(var.op.name, var)
        return var
    
    # Public methods
    def loss(self, logits, labels, regularization): 
        labels = tf.to_int64(labels)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels, name='xentropy')
        loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
        loss += regularization * self.regularizers     
        #tf.scalar_summary('loss', loss) # Tensorboard     
        return loss
   
    # Optimization
    def training(self, loss, learning_rate, train_size, batch_size):            
        # Optimizer: set up a variable that's incremented once per batch and
        # controls the learning rate decay.
        batch = tf.Variable(0)
        # Decay once per epoch, using an exponential schedule starting at 0.01.
        learning_rate = tf.train.exponential_decay(
                0.01,                # Base learning rate.
                batch * batch_size,  # Current index into the dataset.
                train_size,          # Decay step.
                0.95,                # Decay rate.
                staircase=True)
        # Use simple momentum for the optimization.
        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)  
        train_op = optimizer.minimize(loss, global_step=batch) 
        return train_op
    
    def evaluation(self, logits, labels):
        output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
        acc = 100.* tf.reduce_sum(tf.cast(tf.equal(output_classes,labels), tf.float32))/ tf.cast(tf.shape(logits)[0], tf.float32)
        return acc

    def prediction(self, logits):
        """Return the predicted classes."""
        output_classes = tf.cast(tf.argmax(tf.nn.softmax(logits),1), tf.int32)
        return output_classes
    
    def mpool1(self, x, p):
        """Max pooling of size p. Should be a power of 2."""
        if p > 1:
            x = tf.expand_dims(x, 3)  # N x M x F x 1
            x = tf.nn.max_pool(x, ksize=[1,p,1,1], strides=[1,p,1,1], padding='SAME')
            return tf.squeeze(x, [3])  # N x M/p x F
        else:
            return x        

    def chebyshev5(self, x, L, lmax, Fout, K, W):
        N, M, Fin = x.get_shape()
        N, M, Fin = int(N), int(M), int(Fin) 
        L = graph.rescale_L(L, lmax) 
        L = L.tocoo() 
        indices = np.column_stack((L.row, L.col)) 
        L = tf.SparseTensor(indices, L.data, L.shape) 
        L = tf.sparse_reorder(L) 
        # Transform to Chebyshev basis
        x0 = tf.transpose(x, perm=[1, 2, 0])  # M x Fin x N 
        x0 = tf.reshape(x0, [M, Fin*N])  # M x Fin*N
        x = tf.expand_dims(x0, 0)  # 1 x M x Fin*N
        def concat(x, x_):
            x_ = tf.expand_dims(x_, 0)  # 1 x M x Fin*N
            return tf.concat(0, [x, x_])  # K x M x Fin*N        
        if K > 1: 
            x1 = tf.sparse_tensor_dense_matmul(L, x0) 
            x = concat(x, x1) 
        for k in range(2, K):
            x2 = 2 * tf.sparse_tensor_dense_matmul(L, x1) - x0  # M x Fin*N
            x = concat(x, x2)
            x0, x1 = x1, x2      
        x = tf.reshape(x, [K, M, Fin, N])  # K x M x Fin x N
        x = tf.transpose(x, perm=[3,1,2,0])  # N x M x Fin x K
        x = tf.reshape(x, [N*M, Fin*K])  # N*M x Fin*K
        # Filter: Fin*Fout filters of order K, i.e. one filterbank per feature.
        x = tf.matmul(x, W)  # N*M x Fout
        out = tf.reshape(x, [N, M, Fout])  # N x M x Fout
        return out

Simple Test: CL32-MP4-FC10


In [13]:
F1 = 32 # Number of features
K1 = 20 # Spatial support = polynomial order
NCLASSES = 10 # Number of classes
M0 = L[0].shape[0]
    
class GCNN_1CL_1MP_1FC(base_model):
    
    def __init__(self):
        
        print('Graph CNN Architecture: 1CL+1MP+1FC')
        super().__init__()
        self.W1 = self._weight_variable_spectral([K1, F1], regularization=False)
        self.b1 = self._bias_variable([1, 1, F1], regularization=False)
        self.W2 = self._weight_variable([int(M0/4*F1), NCLASSES], regularization=True)
        self.b2 = self._bias_variable([NCLASSES], regularization=True)
        
    def inference(self, x, L, lmax, d):
        
        # Graph convolutional layers
        layer_name = 'CN32'
        with tf.name_scope(layer_name):
            # Graph filtering
            x = tf.expand_dims(x, 2)  # N x M x F=1 
            x = self.chebyshev5(x, L[0], lmax[0], F1, K1, self.W1)           
            x = x + self.b1            
            # Non-linear activation
            x = tf.nn.relu(x)
    
        # Max pooling
        layer_name = 'MP4' 
        with tf.name_scope(layer_name):
            x = self.mpool1(x,4)
            # Dropout
            x = tf.nn.dropout(x, d)      
            
        # Fully connected hidden layers
        layer_name = 'FC10'
        with tf.name_scope(layer_name):
            N, M, F = x.get_shape()
            x = tf.reshape(x, [int(N), int(M0/4*F1)])  # N x M   
            x = tf.matmul(x, self.W2) + self.b2
                    
        return x

LeNet5: CL32-MP4-CL64-MP4-FC512-FC10


In [14]:
F1 = 32 # Number of features of 1st CL layer
K1 = 20 # Spatial support = polynomial order of 1st CL layer
F2 = 64 # Number of features of 2nd CL layer
K2 = 20 # Spatial support = polynomial order of 2nd CL layer
NFC = 512 # Number of nodes for 1st FC layer
NCLASSES = 10 # Number of classes
M0 = L[0].shape[0]
    
class GCNN_LeNet5(base_model):
    
    def __init__(self):
        
        print('Graph CNN Architecture: LeNet5')
        #print(K1,K2)
        super().__init__()
        self.W1 = self._weight_variable_spectral([K1, F1], regularization=False)
        self.b1 = self._bias_variable([1, 1, F1], regularization=False)
        self.W2 = self._weight_variable_spectral([F1*K1, F2], regularization=False)
        self.b2 = self._bias_variable([1, 1, F2], regularization=False)
        self.W3 = self._weight_variable([int(M0/16*F2), NFC], regularization=True)
        self.b3 = self._bias_variable([NFC], regularization=True)
        self.W4 = self._weight_variable([NFC, NCLASSES], regularization=True)
        self.b4 = self._bias_variable([NCLASSES], regularization=True)
        
    def inference(self, x, L, lmax, d):
        
        # Graph convolutional layers
        layer_name = 'CN32' 
        with tf.name_scope(layer_name):
            # Graph filtering
            x = tf.expand_dims(x, 2)  # N x M x F=1 
            x = self.chebyshev5(x, L[0], lmax[0], F1, K1, self.W1)
            x = x + self.b1
            # Non-linear activation
            x = tf.nn.relu(x)
    
        # Max pooling
        layer_name = 'MP4' 
        with tf.name_scope(layer_name):
            x = self.mpool1(x,4)
            
        # Graph convolutional layer
        layer_name = 'CN64' 
        with tf.name_scope(layer_name):
            # Graph filtering
            x = self.chebyshev5(x, L[2], lmax[2], F2, K2, self.W2) 
            x = x + self.b2
            # Non-linear activation
            x = tf.nn.relu(x)
            
        # Max pooling
        layer_name = 'MP4' 
        with tf.name_scope(layer_name):
            x = self.mpool1(x,4)
            
        # Fully connected hidden layer
        layer_name = 'FC512'
        with tf.name_scope(layer_name):
            N, M, F = x.get_shape()
            x = tf.reshape(x, [int(N), int(M0/16*F2)])  # N x M_0/4*F2
            x = tf.matmul(x, self.W3) + self.b3 
            x = tf.nn.relu(x)           
            # Dropout
            x = tf.nn.dropout(x, d)
                
        layer_name = 'FC10'
        with tf.name_scope(layer_name):
            x = tf.matmul(x, self.W4) + self.b4 
                        
        return x

Select NN model


In [24]:
# Comment/uncomment
NN_1Layer=False
NN_1Layer=True
if NN_1Layer==True:
    model = GCNN_1CL_1MP_1FC()
    FLAGS.learning_rate = 0.05
    FLAGS.regularization = 5e-2
    FLAGS.dropout = 0.75
    print(FLAGS.learning_rate,FLAGS.regularization,FLAGS.dropout)


CNN Architecture: 1CL+1MP+1FC
0.05 0.05 0.75

In [26]:
# Comment/uncomment
NN_LeNet5=False
#NN_LeNet5=True
if NN_LeNet5==True:
    model = GCNN_LeNet5()
    FLAGS.learning_rate = 0.5
    FLAGS.regularization = 5e-4
    FLAGS.dropout = 0.5
    print(FLAGS.learning_rate,FLAGS.regularization,FLAGS.dropout)

In [27]:
# Parameters
num_epochs = 10
num_epochs = 2 # Early stop
train_size = train_data.shape[0]
nb_iter = int(num_epochs * train_size) // FLAGS.batch_size
print('num_epochs=',num_epochs,', train_size=',train_size,', nb_iter=',nb_iter)

# Construct computational graph
x = tf.placeholder(tf.float32, (FLAGS.batch_size, L[0].shape[0]))
y = tf.placeholder(tf.int32, (FLAGS.batch_size))
d = tf.placeholder(tf.float32)
logits = model.inference(x,L,lmax,d) 
loss = model.loss(logits, y, FLAGS.regularization)
train_op = model.training(loss, FLAGS.learning_rate, train_size, FLAGS.batch_size)
evaluation = model.evaluation(logits, y)
prediction = model.prediction(logits)


num_epochs= 2 , train_size= 55000 , nb_iter= 1100

In [28]:
# Faster test evaluation
test_data_evaluation = test_data
test_labels_evaluation = test_labels
if 1==1:
    print(test_data.shape)
    nb_selected = 1200
    nb_selected = 200
    test_data_evaluation = test_data[:nb_selected,:]
    test_labels_evaluation = test_labels[:nb_selected]
print(test_data_evaluation.shape)


(10000, 1056)
(200, 1056)

In [29]:
# Train
init = tf.initialize_all_variables()
sess = tf.Session()

# TensorFlow
# Merge all the summaries and write them out to /tmp/mnist_logs (by default)
writer = tf.train.SummaryWriter('tmp/mnist_logs' + '/run2', sess.graph)
op_summary = tf.merge_all_summaries()

# Start
sess.run(init)
indices = collections.deque() 
tab_results = []
tab_last_epoch = []
start_last_epoch = nb_iter - train_size // FLAGS.batch_size
nb_samples_last_epoch = 25 
freq_save_last_epoch = int(train_size // FLAGS.batch_size // (nb_samples_last_epoch-1))
acc_train = -1.0
loss_train = -1.0
print('num_epochs=',num_epochs,', nb_iter=',nb_iter)
t_start = time.process_time()
for i in range(nb_iter): 
    
    # Computational time
    freq_iter = 10 
    if (i%freq_iter==0) & (i<=freq_iter):
        print('iter={:d}, freq_iter={:d}, training time: {:.2f}s, acc_train={:2.2f}, loss_train={:2.2f}'
              .format(i,freq_iter,time.process_time() - t_start,acc_train,loss_train))
        t_start = time.process_time()
        
    # Generic batch extraction
    if len(indices) < FLAGS.batch_size:
        indices.extend(np.random.permutation(train_data.shape[0])) # rand permutation
    idx = [indices.popleft() for i in range(FLAGS.batch_size)] # extract batch_size data
    batch_xs, batch_ys = train_data[idx,:], train_labels[idx]
    if type(batch_xs) is not np.ndarray:
        batch_xs = batch_xs.toarray()  # convert to full matrices if sparse

    # Run computational graph for weight learning
    _,acc_train,loss_train = sess.run([train_op,evaluation,loss], feed_dict={x: batch_xs, y: batch_ys, d: FLAGS.dropout})    
    
    # Display, save results
    if (i+1)%100==0: 
        
        t_start_testset = time.process_time()
        
        # Compute test accuracy
        data = test_data_evaluation
        size = data.shape[0]
        predictions = np.zeros(size, dtype='int32')
        predictions_nodropout = np.zeros(size, dtype='int32')
        for begin in range(0, size, FLAGS.batch_size):
            end = begin + FLAGS.batch_size
            end = min([end, size])
            batch_data = np.zeros((FLAGS.batch_size, data.shape[1]))
            tmp_data = data[begin:end,:]
            if type(tmp_data) is not np.ndarray:
                tmp_data = tmp_data.toarray()  
            batch_data[:end-begin] = tmp_data
            batch_label = np.zeros((FLAGS.batch_size))
            batch_pred = np.zeros((FLAGS.batch_size))
            batch_pred = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: FLAGS.dropout}) # for test set, comment to speedup computations 
            predictions[begin:end] = batch_pred[:end-begin]
            batch_pred_nodropout = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: 1.0})
            predictions_nodropout[begin:end] = batch_pred_nodropout[:end-begin]   
        acc_test = 100.* float( sum(predictions == test_labels_evaluation) ) / float(size)
        acc_test_nodropout = 100.* float( sum(predictions_nodropout == test_labels_evaluation) ) / float(size)
        
        t_testset = time.process_time() - t_start_testset
        
        print('iter={:d}, acc_train={:2.2f}, loss_train={:2.2f}, acc_test={:2.2f}, acc_test_nodropout={:2.2f}, test time={:.2f}s'
              .format(i+1,acc_train,loss_train,acc_test,acc_test_nodropout,t_testset))
        
        # Summaries for TensorBoard.
        acc_train *= 1.0
        acc_test *= 1.0
        acc_test_nodropout *= 1.0
        summary = tf.Summary()
        summary.value.add(tag='acc_train', simple_value=acc_train)
        summary.value.add(tag='acc_test', simple_value=acc_test)
        summary.value.add(tag='acc_test_nodropout', simple_value=acc_test_nodropout)
        writer.add_summary(summary, i+1)
         
# Save accuracy for last batch
data = test_data_evaluation
size = data.shape[0]
predictions_nodropout = np.zeros(size, dtype='int32')
for begin in range(0, size, FLAGS.batch_size):
    end = begin + FLAGS.batch_size
    end = min([end, size])
    batch_data = np.zeros((FLAGS.batch_size, data.shape[1]))
    tmp_data = data[begin:end,:]
    if type(tmp_data) is not np.ndarray:
        tmp_data = tmp_data.toarray()  # convert sparse matrices
    batch_data[:end-begin] = tmp_data
    batch_label = np.zeros((FLAGS.batch_size))
    batch_pred = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: FLAGS.dropout})
    predictions[begin:end] = batch_pred[:end-begin]
    batch_pred_nodropout = sess.run(prediction, feed_dict= {x: batch_data, y: batch_label, d: 1.0})
    predictions_nodropout[begin:end] = batch_pred_nodropout[:end-begin]   
acc_test_nodropout = 100.* float( sum(predictions_nodropout == test_labels_evaluation) ) / float(size)
print('final accuracy=',acc_test_nodropout)
                
writer.close()  
        
print('Training time: {:.2f}s'.format(time.process_time() - t_start))


num_epochs= 2 , nb_iter= 1100
iter=0, freq_iter=10, training time: 0.00s, acc_train=-1.00, loss_train=-1.00
iter=10, freq_iter=10, training time: 6.13s, acc_train=15.00, loss_train=18.09
iter=100, acc_train=87.00, loss_train=6.96, acc_test=86.00, acc_test_nodropout=89.50, test time=1.52s
iter=200, acc_train=83.00, loss_train=2.80, acc_test=92.00, acc_test_nodropout=92.50, test time=1.45s
iter=300, acc_train=89.00, loss_train=1.31, acc_test=90.50, acc_test_nodropout=92.00, test time=1.55s
iter=400, acc_train=87.00, loss_train=0.90, acc_test=90.50, acc_test_nodropout=91.50, test time=1.42s
iter=500, acc_train=87.00, loss_train=0.63, acc_test=88.00, acc_test_nodropout=90.00, test time=1.50s
iter=600, acc_train=84.00, loss_train=0.57, acc_test=89.00, acc_test_nodropout=93.00, test time=1.43s
iter=700, acc_train=86.00, loss_train=0.62, acc_test=91.00, acc_test_nodropout=91.00, test time=1.52s
iter=800, acc_train=88.00, loss_train=0.68, acc_test=88.00, acc_test_nodropout=89.00, test time=1.46s
iter=900, acc_train=90.00, loss_train=0.44, acc_test=92.50, acc_test_nodropout=92.00, test time=1.44s
iter=1000, acc_train=94.00, loss_train=0.35, acc_test=90.50, acc_test_nodropout=90.00, test time=1.42s
iter=1100, acc_train=91.00, loss_train=0.48, acc_test=91.50, acc_test_nodropout=91.50, test time=1.49s
final accuracy= 91.5
Training time: 679.35s

In [ ]: