In [ ]:
import os
import scipy.io
import scipy.sparse
import numpy as np
In [ ]:
if False:
# CHECK PYTHON RESULTS WITH MATLAB CODE
folder = os.path.join('..', 'data', 'metis_matlab.mat')
mat = scipy.io.loadmat(folder)
W = mat['W']
W = scipy.sparse.csr_matrix(W)
rid = mat['rid']-1
rid = rid.T
rid = rid.squeeze()
#print(type(W))
#print(type(rid))
print(W.shape)
print(W.nnz)
#print(rid.shape)
else:
N = 533
#np.random.seed(0)
rid = np.random.permutation(range(N))
W = np.random.uniform(0.01, 0.99, size=(N,N))
mask = np.random.uniform(size=(N,N))
W[mask<0.99] = 0
W = scipy.sparse.csr_matrix(W)
print(W.nnz)
In [ ]:
# INPUT
# W = symmetric sparse weight matrix
# maxsize = the number of nodes for the coarsest graph
# OUTPUT
# graph{1}: original graph of size N_1
# graph{2}: coarser graph of size N_2 < N_1
# etc...
# graph{k}: corsest graph of Size N_k <...< N_2 < N_1
# parents{i} is a vector of size N_i with entries ranging from 1 to N_{i+1}
# which indicate the parents in the coarser graph{i+1}
# nd_sz{i} is a vector of size N_i that contains the size of the supernode in the graph{i}
# NOTE
# if "graph" is a cell of size k, then "parents" will be a cell of size k-1
def metis_coarsening(W,maxsize,rid):
N = W.shape[0]
print('Size of original graph=',N)
parents = []
degree = W.sum(axis=0) - W.diagonal()
graphs = []
graphs.append(W)
supernode_size = np.ones(N)
nd_sz = [supernode_size]
count = 0
while N > maxsize:
count = count + 1;
print('level=',count)
# CHOOSE THE WEIGHTS FOR THE PAIRING
# weights = ones(N,1) # metis weights
weights = degree # graclus weights
# weights = supernode_size # other possibility
weights = weights.T
weights = np.array(weights)
weights = weights.squeeze()
# PAIR THE VERTICES AND CONSTRUCT THE ROOT VECTOR
idx_row,idx_col,val = scipy.sparse.find(W)
perm = np.argsort(idx_row)
rr = idx_row[perm]
cc = idx_col[perm]
vv = val[perm]
cluster_id = one_level_coarsening(rr,cc,vv,rid,weights) # rr is ordered
parents.append(cluster_id)
# TO DO
# COMPUTE THE SIZE OF THE SUPERNODES AND THEIR DEGREE
#supernode_size = full( sparse(cluster_id, ones(N,1) , supernode_size ) )
#print(cluster_id)
#print(supernode_size)
#nd_sz{count+1}=supernode_size;
# COMPUTE THE EDGES WEIGHTS FOR THE NEW GRAPH
nrr = cluster_id[rr]
ncc = cluster_id[cc]
nvv = vv
Nnew = int(cluster_id.max()) + 1
print('Size of coarser graph=',Nnew)
W = scipy.sparse.csr_matrix((nvv,(nrr,ncc)),shape=(Nnew,Nnew))
# Add new graph to the list of all coarsened graphs
graphs.append(W)
N = W.shape[0]
# COMPUTE THE DEGREE (OMIT OR NOT SELF LOOPS)
degree = W.sum(axis=0)
#degree = W.sum(axis=0) - W.diagonal()
# CHOOSE THE ORDER IN WHICH VERTICES WILL BE VISTED AT THE NEXT PASS
#[~, rid]=sort(ss); # arthur strategy
#[~, rid]=sort(supernode_size); # thomas strategy
#rid=randperm(N); # metis/graclus strategy
ss = W.sum(axis=0).T
rid = [i[0] for i in sorted(enumerate(ss), key=lambda x:x[1])] # [~, rid]=sort(ss);
# Remove all diagonal entries in similarity matrices
for i in range(len(graphs)):
csr_setdiag_val(graphs[i])
scipy.sparse.csr_matrix.eliminate_zeros(graphs[i])
return graphs,parents
In [ ]:
#http://nbviewer.ipython.org/gist/Midnighter/9992103
def csr_setdiag_val(csr, value=0):
"""Set all diagonal nonzero elements
(elements currently in the sparsity pattern)
to the given value. Useful to set to 0 mostly.
"""
if csr.format != "csr":
raise ValueError('Matrix given must be of CSR format.')
csr.sort_indices()
pointer = csr.indptr
indices = csr.indices
data = csr.data
for i in range(min(csr.shape)):
ind = indices[pointer[i]: pointer[i + 1]]
j = ind.searchsorted(i)
# matrix has only elements up until diagonal (in row i)
if j == len(ind):
continue
j += pointer[i]
# in case matrix has only elements after diagonal (in row i)
if indices[j] == i:
data[j] = value
In [ ]:
# Coarsen a graph given by rr,cc,vv. rr is assumed to be ordered
def one_level_coarsening(rr,cc,vv,rid,weights):
nnz = rr.shape[0]
N = rr[nnz-1]+1
#print(nnz,N)
marked = np.zeros(N)
rowstart = np.zeros(N)
rowlength = np.zeros(N)
cluster_id = np.zeros(N)
oldval = rr[0]
count = 0
clustercount = 0
for ii in range(nnz):
rowlength[count] = rowlength[count] + 1
if rr[ii] > oldval:
oldval = rr[ii]
rowstart[count+1] = ii
count = count + 1
for ii in range(N):
tid = rid[ii]
if marked[tid]==0.0:
wmax = 0.0
rs = rowstart[tid]
marked[tid] = 1.0
bestneighbor = -1
for jj in range(int(rowlength[tid])):
nid = cc[rs+jj]
tval = (1.0-marked[nid]) * vv[rs+jj] * (1.0/weights[tid]+ 1.0/weights[nid])
if tval > wmax:
wmax = tval
bestneighbor = nid
cluster_id[tid] = clustercount;
if bestneighbor > -1:
cluster_id[bestneighbor] = clustercount
marked[bestneighbor] = 1.0
clustercount = clustercount + 1
return cluster_id
In [ ]:
maxsize = 200
N = W.shape[0]
#rid = np.random.permutation(range(N))
#print(N)
#print(rid[0:10])
graphs,parents = metis_coarsening(W.copy(),maxsize,rid)
#print(graph)
#print(parents)
# CHECK RESULTS WITH MATLAB CODE
graph0 = graphs[0]
print(graph0.shape)
print(graph0[0,:])
graph1 = graphs[1]
print(graph1.shape)
print(graph1[0,:])
graph2 = graphs[2]
print(graph2.shape)
print(graph2[0,:])
parents0 = parents[0]
print(parents0.shape)
print(parents0[0:10])
parents1 = parents[1]
print(parents1.shape)
print(parents1[0:10])
In [ ]:
import sys
sys.path.append('..')
from lib import coarsening
graphs, parents = coarsening.metis(W, 2, rid)
for i,A in enumerate(graphs):
M, M = A.shape
A = A.tocoo()
A.setdiag(0)
A = A.tocsr()
A.eliminate_zeros()
graphs[i] = A
print('Layer {0}: M_{0} = {1} nodes, {2} edges'.format(i, M, A.nnz))
# CHECK RESULTS WITH MATLAB CODE
graph0 = graphs[0]
print(graph0.shape)
print(graph0[0,:])
graph1 = graphs[1].tocsr()
print(graph1.shape)
print(graph1[0,:])
graph2 = graphs[2].tocsr()
print(graph2.shape)
print(graph2[0,:])
parents0 = parents[0]
print(parents0.shape)
print(parents0[0:10])
parents1 = parents[1]
print(parents1.shape)
print(parents1[0:10])