In [1]:
from scipy.stats import gaussian_kde
from ipywidgets import widgets
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
import collections
import os
%matplotlib inline
font = {'weight' : 'bold',
'size' : 18}
import matplotlib
matplotlib.rc('font', **font)
In [2]:
# Initializing dataset names
dnames = list(['../data/desikan/MRN114', '../data/desikan/KKI2009', '../data/desikan/SWU4'])
print "Datasets: " + ", ".join(dnames)
print "D = " + str(len(dnames))
# Getting graph names
fs = dict()
for dd in dnames:
fs[dd] = [root+'/'+fl for root, dir, files in os.walk(dd) for fl in files if fl.endswith(".graphml")]
# fs[dnames[1]]
In [3]:
def loadGraphs(filenames, printer=False):
gstruct = collections.OrderedDict()
for idx, files in enumerate(filenames):
if printer:
print "Loading: " + files
gstruct[files] = nx.read_graphml(files)
return gstruct
In [4]:
mygs = loadGraphs(fs[fs.keys()[2]], printer=False) # only loads graphs for kki dataset
In [5]:
nnz = collections.OrderedDict((key, len(nx.edges(mygs[key]))) for key in mygs)
In [6]:
fig = plt.figure(figsize=(12,6))
plt.bar(range(len(nnz)),nnz.values(), alpha=0.7, color='#888888')
plt.title('Number of Non-Zeros in KKI2009 Dataset', y = 1.04)
plt.ylabel('Count')
plt.xlabel('Graph')
plt.xlim((0, len(nnz.keys())))
plt.tight_layout()
plt.savefig('../figs/distribs/KKI2009-nnz.png')
plt.show()
In [7]:
degrees = collections.OrderedDict((key, np.array(nx.degree(mygs[key]).values())) for key in mygs)
avg_degrees = [np.mean(degrees[key]) for key in degrees]
In [8]:
fig = plt.figure(figsize=(8,6))
plt.hold(True)
for key in degrees.keys():
dens = gaussian_kde(degrees[key])
x = np.linspace(0, 1.2*np.max(degrees[key]), 1000)
plt.plot(x, dens.pdf(x), color='#888888', alpha=0.4)
plt.title('Degree Sequence in KKI2009 Dataset', y = 1.04)
plt.ylabel('Probability')
plt.xlabel('Vertex Degree')
plt.tight_layout()
plt.savefig('../figs/distribs/KKI2009-degree.png')
plt.show()
In [9]:
e_weights = collections.OrderedDict((key, [mygs[key].get_edge_data(e[0],e[1])['weight']
for e in mygs[key].edges()]) for key in mygs)
avg_e_weights = [{'N': len(e_weights[key]),
'mean':np.mean(e_weights[key]),
'std':np.std(e_weights[key])} for key in e_weights]
In [10]:
fig = plt.figure(figsize=(8,6))
plt.hold(True)
for key in e_weights.keys():
dens = gaussian_kde(e_weights[key])
x = np.linspace(0, 1.2*np.max(e_weights[key]), 1000)
plt.plot(x/1000, 1*10**(3)*dens.pdf(x), color='#888888', alpha=0.4)
plt.title('Edge Weight Distributions in KKI2009 Dataset', y = 1.04)
plt.ylabel('Probability (10^(-3)')
plt.xlabel('Edge Weight (10^3)')
plt.tight_layout()
plt.savefig('../figs/distribs/KKI2009-edgeweight.png')
plt.show()
In [11]:
e_count = collections.OrderedDict((key, len(e_weights[key])) for key in e_weights)
In [34]:
fig = plt.figure(figsize=(12,6))
plt.bar(range(len(e_count)),e_count.values(), alpha=0.7, color='#888888')
plt.title('Edge Count in KKI2009 Dataset', y = 1.04)
plt.ylabel('Count')
plt.xlabel('Graph')
plt.xlim((0, len(e_count.keys())))
plt.savefig('../figs/distribs/KKI2009-edges.png')
plt.show()
In [12]:
ccoefs = collections.OrderedDict((key, nx.clustering(mygs[key]).values()) for key in mygs)
avg_ccoefs = [np.mean(ccoefs[key]) for key in ccoefs]
In [13]:
fig = plt.figure(figsize=(8,6))
plt.hold(True)
for key in ccoefs.keys():
dens = gaussian_kde(ccoefs[key])
x = np.linspace(0, 1.2*np.max(ccoefs[key]), 1000)
plt.plot(x, dens.pdf(x), color='#888888', alpha=0.4)
plt.title('Clustering Coefficient in KKI2009 Dataset', y = 1.04)
plt.ylabel('Probability')
plt.xlabel('Clustering Coefficient')
plt.tight_layout()
plt.savefig('../figs/distribs/KKI2009-cc.png')
plt.show()
In [14]:
i = 1
def scan_statistic(mygs, i):
ss = collections.OrderedDict()
for key in mygs.keys():
g = mygs[key]
tmp = np.array(())
for n in g.nodes():
subgraph = nx.ego_graph(g, n, radius = i)
tmp = np.append(tmp, np.sum([subgraph.get_edge_data(e[0],e[1])['weight'] for e in subgraph.edges()]))
ss[key] = tmp
return ss
ss1 = scan_statistic(mygs, i)
In [15]:
fig = plt.figure(figsize=(8,6))
plt.hold(True)
for key in ss1.keys():
dens = gaussian_kde(ss1[key])
x = np.linspace(0, 1.2*np.max(ss1[key]), 1000)
plt.plot(x/10**6, 1*10**6*dens.pdf(x), color='#888888', alpha=0.4)
plt.title('Scan Statistic-1 Distributions in KKI2009 Dataset', y = 1.04)
plt.ylabel('Probability (10^(-6))')
plt.xlabel('Scan Statistic-1 (10^(6))')
plt.tight_layout()
plt.savefig('../figs/distribs/KKI2009-ss1.png')
plt.show()
In [23]:
i = 2
ss2 = scan_statistic(mygs, i)
In [42]:
fig = plt.figure(figsize=(6,6))
plt.hold(True)
for key in ss2.keys():
dens = gaussian_kde(ss2[key])
x = np.linspace(0, 1.2*np.max(ss2[key]), 1000)
plt.plot(x/10**6, 10**6*dens.pdf(x), color='#888888', alpha=0.4)
plt.title('Scan Statistic-2 Distributions in KKI2009 Dataset', y = 1.04)
plt.ylabel('Probability (10^(-6))')
plt.xlabel('Scan Statistic-2 (10^(6))')
plt.show()
In [16]:
laplacian = collections.OrderedDict((key, nx.normalized_laplacian_matrix(mygs[key])) for key in mygs)
eigs = collections.OrderedDict((key, np.sort(np.linalg.eigvals(laplacian[key].A))[::-1]) for key in laplacian)
In [17]:
fig = plt.figure(figsize=(8,6))
plt.hold(True)
for key in eigs.keys():
# dens = gaussian_kde(eigs[key])
# x = np.linspace(0, 1.2*np.max(eigs[key]), 1000)
plt.plot(eigs[key], 'ro-', markersize=0.4, color='#888888', alpha=0.4)
plt.title('Eigen Values in KKI209 Dataset', y = 1.04)
plt.ylabel('Value')
plt.xlabel('Dimension')
plt.tight_layout()
plt.savefig('../figs/distribs/KKI2009-eigen.png')
plt.show()
In [18]:
centrality = collections.OrderedDict((key, nx.algorithms.betweenness_centrality(mygs[key]).values())
for key in mygs.keys())
In [19]:
fig = plt.figure(figsize=(8,6))
plt.hold(True)
for key in centrality.keys():
dens = gaussian_kde(centrality[key])
x = np.linspace(0, 1.2*np.max(centrality[key]), 1000)
plt.plot(x, dens.pdf(x), color='#888888', alpha=0.4)
plt.title('Centrality Distributions in KKI2009 Dataset', y = 1.04)
plt.ylabel('Probability')
plt.xlabel('Betweenness Centrality')
plt.tight_layout()
plt.savefig('../figs/distribs/KKI2009-centrality.png')
plt.show()
In [32]:
three_cliques = collections.OrderedDict((key, [clique for clique in
nx.algorithms.clique.enumerate_all_cliques(mygs[key])
if len(clique) == 3]) for key in mygs)
n_three_cliques = [len(three_cliques[key]) for key in three_cliques]
In [10]:
fig = plt.figure(figsize=(12,6))
plt.bar(range(len(n_three_cliques)),n_three_cliques, alpha=0.7)
plt.title('Number of local 3-cliques in KKI2009 Dataset', y = 1.04)
plt.ylabel('Count')
plt.xlabel('Graph')
plt.xlim((0, len(three_cliques.keys())))
plt.show()
In [ ]:
In [25]:
ccs = {keys: nx.connected_component_subgraphs(mygs[keys]) for keys in mygs.keys()}
# nccs = {keys: len(list(ccs[keys])) for keys in ccs.keys()}
# print nccs
lccs = {keys: max(ccs[keys], key=len) for keys in ccs.keys()}