Trial 4: graph coarsening

First Python implementation of the greedy Metis and Graclus coarsening algorithms.
Results comparison with a previously developed matlab implementation.
Results comparison with the newer version in the coarsening module.

METIS COARSENING IMPLEMENTATION AS PROPOSED IN: An incremental reseeding strategy for clustering X Bresson, H Hu, T Laurent, A Szlam, J von Brecht arXiv preprint arXiv:1406.3837 3 May 2016



In [ ]:

    
import os
import scipy.io
import scipy.sparse
import numpy as np



In [ ]:

    
if False:
    # CHECK PYTHON RESULTS WITH MATLAB CODE
    folder = os.path.join('..', 'data', 'metis_matlab.mat')
    mat = scipy.io.loadmat(folder)
    W = mat['W']
    W = scipy.sparse.csr_matrix(W)
    rid = mat['rid']-1
    rid = rid.T
    rid = rid.squeeze()
    #print(type(W))
    #print(type(rid))
    print(W.shape)
    print(W.nnz)
    #print(rid.shape)

else:
    N = 533
    #np.random.seed(0)
    rid = np.random.permutation(range(N))
    W = np.random.uniform(0.01, 0.99, size=(N,N))
    mask = np.random.uniform(size=(N,N))
    W[mask<0.99] = 0
    W = scipy.sparse.csr_matrix(W)
    print(W.nnz)



In [ ]:

    
# INPUT
# W = symmetric sparse weight matrix
# maxsize = the number of nodes for the coarsest graph
# OUTPUT
# graph{1}: original graph of size N_1
# graph{2}: coarser graph of size N_2 < N_1
# etc...
# graph{k}: corsest graph of Size N_k <...< N_2 < N_1
# parents{i} is a vector of size N_i with entries ranging from 1 to N_{i+1}
# which indicate the parents in the coarser graph{i+1} 
# nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i}
# NOTE
# if "graph" is a cell of size k, then "parents" will be a cell of size k-1

def metis_coarsening(W,maxsize,rid):
    
    N = W.shape[0]
    print('Size of original graph=',N)
    parents = []
    degree = W.sum(axis=0) - W.diagonal()
    graphs = []
    graphs.append(W)
    supernode_size = np.ones(N)
    nd_sz = [supernode_size]
    count = 0
    
    while N > maxsize:
        
        count = count + 1;
        print('level=',count)
        
        # CHOOSE THE WEIGHTS FOR THE PAIRING
        # weights = ones(N,1)       # metis weights
        weights = degree            # graclus weights
        # weights = supernode_size  # other possibility
        weights = weights.T
        weights = np.array(weights)
        weights = weights.squeeze()
        
         # PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR
        idx_row,idx_col,val = scipy.sparse.find(W) 
        perm = np.argsort(idx_row)
        rr = idx_row[perm]
        cc = idx_col[perm]
        vv = val[perm]
        cluster_id = one_level_coarsening(rr,cc,vv,rid,weights) # rr is ordered  
        parents.append(cluster_id)
        
        # TO DO
        # COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE 
        #supernode_size = full(   sparse(cluster_id,  ones(N,1) , supernode_size )     )
        #print(cluster_id)
        #print(supernode_size)
        #nd_sz{count+1}=supernode_size;
    
         # COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH
        nrr = cluster_id[rr]
        ncc = cluster_id[cc]
        nvv = vv
        Nnew = int(cluster_id.max()) + 1
        print('Size of coarser graph=',Nnew)
        W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)),shape=(Nnew,Nnew))
        # Add new graph to the list of all coarsened graphs
        graphs.append(W)
        N = W.shape[0]
        
        # COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS)
        degree = W.sum(axis=0)
        #degree = W.sum(axis=0) - W.diagonal()
    
        # CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS
        #[~, rid]=sort(ss);     # arthur strategy
        #[~, rid]=sort(supernode_size);    #  thomas strategy
        #rid=randperm(N);                  #  metis/graclus strategy   
        ss = W.sum(axis=0).T
        rid = [i[0] for i in sorted(enumerate(ss), key=lambda x:x[1])] # [~, rid]=sort(ss);
        
        
    # Remove all diagonal entries in similarity matrices
    for i in range(len(graphs)): 
        csr_setdiag_val(graphs[i])
        scipy.sparse.csr_matrix.eliminate_zeros(graphs[i])
    
        
    return graphs,parents



In [ ]:

    
#http://nbviewer.ipython.org/gist/Midnighter/9992103
def csr_setdiag_val(csr, value=0):
    """Set all diagonal nonzero elements
    (elements currently in the sparsity pattern)
    to the given value. Useful to set to 0 mostly.
    """
    if csr.format != "csr":
        raise ValueError('Matrix given must be of CSR format.')
    csr.sort_indices()
    pointer = csr.indptr
    indices = csr.indices
    data = csr.data
    for i in range(min(csr.shape)):
        ind = indices[pointer[i]: pointer[i + 1]]
        j =  ind.searchsorted(i)
        # matrix has only elements up until diagonal (in row i)
        if j == len(ind):
            continue
        j += pointer[i]
        # in case matrix has only elements after diagonal (in row i)
        if indices[j] == i:
            data[j] = value



In [ ]:

    
# Coarsen a graph given by rr,cc,vv.  rr is assumed to be ordered
def one_level_coarsening(rr,cc,vv,rid,weights):
    
    nnz = rr.shape[0]
    N = rr[nnz-1]+1
    #print(nnz,N)
    
    marked = np.zeros(N)
    rowstart = np.zeros(N)
    rowlength = np.zeros(N)
    cluster_id = np.zeros(N)
    
    oldval = rr[0]
    count = 0
    clustercount = 0
    
    for ii in range(nnz):
        rowlength[count] = rowlength[count] + 1
        if rr[ii] > oldval:
            oldval = rr[ii]
            rowstart[count+1] = ii
            count = count + 1
            
    for ii in range(N):
        tid = rid[ii]
        if marked[tid]==0.0:
            wmax = 0.0
            rs = rowstart[tid]
            marked[tid] = 1.0
            bestneighbor = -1
            for jj in range(int(rowlength[tid])):
                nid = cc[rs+jj]
                tval = (1.0-marked[nid]) * vv[rs+jj] *  (1.0/weights[tid]+ 1.0/weights[nid])
                if  tval > wmax:
                    wmax = tval
                    bestneighbor = nid
                    
            cluster_id[tid] = clustercount;
            
            if  bestneighbor > -1:
                cluster_id[bestneighbor] = clustercount
                marked[bestneighbor] = 1.0
        
            clustercount = clustercount + 1
            
    return cluster_id



In [ ]:

    
maxsize = 200
N = W.shape[0]
#rid = np.random.permutation(range(N))
#print(N)
#print(rid[0:10])

graphs,parents = metis_coarsening(W.copy(),maxsize,rid)
#print(graph)
#print(parents)


# CHECK RESULTS WITH MATLAB CODE
graph0 = graphs[0]
print(graph0.shape)
print(graph0[0,:])

graph1 = graphs[1]
print(graph1.shape)
print(graph1[0,:])

graph2 = graphs[2]
print(graph2.shape)
print(graph2[0,:])

parents0 = parents[0]
print(parents0.shape)
print(parents0[0:10])

parents1 = parents[1]
print(parents1.shape)
print(parents1[0:10])



In [ ]:

    
import sys
sys.path.append('..')
from lib import coarsening

graphs, parents = coarsening.metis(W, 2, rid)

for i,A in enumerate(graphs):
    M, M = A.shape
    A = A.tocoo()
    A.setdiag(0)
    A = A.tocsr()
    A.eliminate_zeros()
    graphs[i] = A
    print('Layer {0}: M_{0} = {1} nodes, {2} edges'.format(i, M, A.nnz))

# CHECK RESULTS WITH MATLAB CODE
graph0 = graphs[0]
print(graph0.shape)
print(graph0[0,:])

graph1 = graphs[1].tocsr()
print(graph1.shape)
print(graph1[0,:])

graph2 = graphs[2].tocsr()
print(graph2.shape)
print(graph2[0,:])

parents0 = parents[0]
print(parents0.shape)
print(parents0[0:10])

parents1 = parents[1]
print(parents1.shape)
print(parents1[0:10])

# Python results Size of original graph= 533 level= 1 Size of coarser graph= 279 level= 2 Size of coarser graph= 147 (533, 533) (0, 18) 0.810464124165 (0, 59) 0.349678536711 (0, 60) 0.591336229831 (0, 83) 0.388420442335 (0, 105) 0.255134781894 (0, 210) 0.656852096558 (0, 226) 0.900257809833 (0, 299) 0.065093756932 (0, 340) 0.810464124165 (0, 407) 0.431454676752 (279, 279) (0, 44) 1.63660876872 (0, 58) 2.42459126058 (0, 71) 0.186153138092 (0, 115) 1.99313658383 (0, 167) 1.24818832639 (0, 168) 2.95891026039 (0, 179) 0.388420442335 (0, 240) 0.431454676752 (147, 147) (0, 21) 5.1886032791 (0, 85) 1.08484314421 (0, 87) 0.353738954483 (0, 127) 0.186153138092 (0, 135) 1.88273900708 (0, 141) 0.255134781894 (533,) [ 57. 148. 184. 237. 93. 93. 47. 28. 133. 71.] (279,) [ 127. 4. 88. 128. 50. 120. 54. 123. 146. 26.]

# Matlab results ans = (1,19) 0.8105 (1,60) 0.3497 (1,61) 0.5913 (1,84) 0.3884 (1,106) 0.2551 (1,211) 0.6569 (1,227) 0.9003 (1,300) 0.0651 (1,341) 0.8105 (1,408) 0.4315 ans = (1,45) 1.6366 (1,59) 2.4246 (1,72) 0.1862 (1,116) 1.9931 (1,168) 1.2482 (1,169) 2.9589 (1,180) 0.3884 (1,241) 0.4315 ans = (1,22) 5.1886 (1,86) 1.0848 (1,88) 0.3537 (1,128) 0.1862 (1,136) 1.8827 (1,142) 0.2551 ans = 58 149 185 238 94 94 48 29 134 72 ans = 128 5 89 129 51 121 55 124 147 27