Graph Analysis - II

Imports


In [176]:
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import networkx as nx
from sklearn.cluster import KMeans

%matplotlib inline

K-core decomposition of a graph


In [205]:
G=nx.karate_club_graph()
G = nx.Graph(G)

print len(G.nodes())


34

In [85]:
degree_sequence=sorted(nx.degree(G).values(),reverse=True) # degree sequence
dmax=max(degree_sequence)
print dmax


17

Computing the k-core decomposition of a graph


In [86]:
core_dec = nx.core_number(G)
print core_dec


{0: 4, 1: 4, 2: 4, 3: 4, 4: 3, 5: 3, 6: 3, 7: 4, 8: 4, 9: 2, 10: 3, 11: 1, 12: 2, 13: 4, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 3, 20: 2, 21: 2, 22: 2, 23: 3, 24: 3, 25: 3, 26: 2, 27: 3, 28: 3, 29: 3, 30: 4, 31: 3, 32: 4, 33: 4}

Plotting the graph; nodes with the same color belong in the same core


In [120]:
colors = ['#d7191c', '#fdae61', '#ffffbf', '#abdda4', '#2b83ba']
node_colors = [ colors[core_dec[v]] for v in G.nodes()]

nx.draw(G, node_color=node_colors, with_labels=True)


Minimun Cuts


In [206]:
cut_edges = nx.minimum_edge_cut(G)
print cut_edges


[(11, 0)]

In [207]:
Gcopy = G.copy()
Gcopy.remove_edges_from(cut_edges)

In [208]:
cc = nx.connected_components(Gcopy)
node_set = {}
i = 1
for s in cc:
    for node in s:
        node_set[node] = i
    i+=1
print node_set


{0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 2, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1}

In [209]:
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[node_set[v]-1] for v in G.nodes()]
nx.draw(G, node_color=node_colors, with_labels='True')



In [210]:
cut_edges = nx.minimum_edge_cut(G, s=0, t=33)
print cut_edges


set([(13, 33), (19, 33), (2, 28), (1, 30), (2, 32), (0, 31), (2, 8), (0, 8), (2, 27), (9, 33)])

In [211]:
Gcopy = G.copy()
Gcopy.remove_edges_from(cut_edges)

In [212]:
cc = nx.connected_components(Gcopy)
node_set = {}
for i, s in enumerate(cc):
    for node in s:
        node_set[node] = i
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[node_set[v]-1] for v in G.nodes()]
nx.draw(G, node_color=node_colors, with_labels='True')


Graph spectral clustering

Exploring the Fiedler vector of the Karate-club graph


In [213]:
G=nx.karate_club_graph()
G = nx.Graph(G)

In [214]:
f = nx.fiedler_vector(G)
print f


[ 0.11213743  0.04128789 -0.02321896  0.05549979  0.28460452  0.32372722
  0.32372722  0.05258601 -0.05160128 -0.09280089  0.28460452  0.21099295
  0.10946132  0.01474198 -0.1627508  -0.1627508   0.42276533  0.10018141
 -0.1627508   0.01363713 -0.1627508   0.10018141 -0.1627508  -0.15569456
 -0.15302557 -0.16096291 -0.18710953 -0.12766404 -0.0951523  -0.1676503
 -0.07349964 -0.09875343 -0.13034546 -0.11890326]

In [215]:
s = np.zeros(len(f))
s[f>0]=1
s = s.astype(int)
#s = s.tolist()
print s, type(s)


[1 1 0 1 1 1 1 1 0 0 1 1 1 1 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0] <type 'numpy.ndarray'>

In [216]:
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[s[v]] for v in G.nodes()]
node_colors = ['#d7191c' if f[i] < 0 else '#2b83ba' for i, v in enumerate(G.nodes())]
nx.draw(G, node_color=node_colors, with_labels='True')


Exploring the Fiedler vector of a union of noisy cliques


In [182]:
from numpy.random import RandomState

def generate_noisy_subcliques(nodes_per_clique, inside_p, across_p, min_node_label=0, seed=None):
    """Generates a graph which consists of small cliques connected with each other.
    The noise within a clique and across cliques can be set by the `inside_p` and 
    `across_p` parameters respectively.
    
    
    Parameters
    ----------
    nodes_per_clique : list
        The size of this list corresponds to the number of cliques that will be
        generated. The value of each element will be the size of the corresponding 
        clique.
        
    inside_p : float
        The probability of an edge inside a clique. The higher this number, the more 
        each clique will resemble a fully connected graph.
        
    across_p : float
        The probability of an edge across cliques.
        
    min_node_label : int, default is 0
        The minimum node label of the graph.
        
    seed : int, default is None
        The seed to the pseudorandom number generator.
        
    
    Returns
    -------
    G : networkX graph
        The generated graph.
    """

    prng = RandomState(seed)
    clique_list = []
    number_of_cliques = len(nodes_per_clique)

    # Make the independent cliques
    starting_node = min_node_label
    for clique in range(number_of_cliques):
        G = nx.Graph()
        for u in range(starting_node, starting_node + nodes_per_clique[clique]):
            for v in range(u + 1, starting_node + nodes_per_clique[clique]):
                if prng.rand() < inside_p:
                    G.add_edge(u, v)
        clique_list.append(G)
        starting_node += nodes_per_clique[clique]

    # Combine them in one graph
    G = nx.Graph()
    for clique in range(number_of_cliques):
        G.add_edges_from(clique_list[clique].edges())

    # Connect edges across the cliques
    for i in range(number_of_cliques):
        clique_from = clique_list[i]
        for j in range(i + 1, number_of_cliques):
            clique_to = clique_list[j]
            for u in clique_from.nodes():
                for v in clique_to.nodes():
                    if prng.rand() < across_p:
                        G.add_edge(u, v)
    return G

In [217]:
nodes_per_clique = [10, 10, 10]
across_p = 0.05
inside_p = 0.9
cliques = generate_noisy_subcliques(nodes_per_clique, inside_p, across_p)
nx.draw(cliques, with_labels=True)



In [218]:
f = nx.fiedler_vector(cliques)
print f


[-0.13172645 -0.13048803 -0.09405882 -0.09853715 -0.13659028 -0.13134603
 -0.13475063 -0.13111461 -0.06175771 -0.12851544  0.24984342  0.21405709
  0.24747274  0.28613525  0.24545554  0.28613525  0.29155544  0.25183293
  0.24523071  0.24304742 -0.1194048  -0.11421938 -0.15436763 -0.15217566
 -0.11512428 -0.15440879 -0.15473606 -0.11424198 -0.14879327 -0.15440879]

In [219]:
s = np.zeros(len(f))
s[f>0]=1
s = s.astype(int)
s = s.tolist()
print s, type(s)


[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] <type 'list'>

In [220]:
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[s[v]] for v in cliques.nodes()]
nx.draw(cliques, node_color=node_colors, with_labels='True')


Beyond the Fiedler vector


In [221]:
L = nx.laplacian_matrix(cliques).astype(float)
w,v = sp.sparse.linalg.eigsh(L, k = 3, which='SM')

In [222]:
print w
print v


[ -1.81054597e-15   1.05050559e+00   1.16157956e+00]
[[ 0.18257419  0.13172645  0.25630534]
 [ 0.18257419  0.13048803  0.21513737]
 [ 0.18257419  0.09405882  0.23177457]
 [ 0.18257419  0.09853715  0.17996764]
 [ 0.18257419  0.13659028  0.26768062]
 [ 0.18257419  0.13134603  0.26236449]
 [ 0.18257419  0.13475063  0.20881118]
 [ 0.18257419  0.13111461  0.25485412]
 [ 0.18257419  0.06175771  0.2133549 ]
 [ 0.18257419  0.12851544  0.17508414]
 [ 0.18257419 -0.24984342 -0.03384428]
 [ 0.18257419 -0.21405709 -0.00180997]
 [ 0.18257419 -0.24747274  0.00647296]
 [ 0.18257419 -0.28613525 -0.01315664]
 [ 0.18257419 -0.24545554 -0.03443766]
 [ 0.18257419 -0.28613525 -0.01315664]
 [ 0.18257419 -0.29155544 -0.01674384]
 [ 0.18257419 -0.25183293  0.01551845]
 [ 0.18257419 -0.24523071 -0.04008765]
 [ 0.18257419 -0.24304742  0.01496132]
 [ 0.18257419  0.1194048  -0.14872245]
 [ 0.18257419  0.11421938 -0.22193482]
 [ 0.18257419  0.15436763 -0.24215991]
 [ 0.18257419  0.15217566 -0.19113596]
 [ 0.18257419  0.11512428 -0.18524937]
 [ 0.18257419  0.15440879 -0.2431487 ]
 [ 0.18257419  0.15473606 -0.25089925]
 [ 0.18257419  0.11424198 -0.2225091 ]
 [ 0.18257419  0.14879327 -0.20014217]
 [ 0.18257419  0.15440879 -0.2431487 ]]

In [223]:
print w.shape, v.shape
X = v*w


(3,) (30, 3)

In [224]:
kmeans = KMeans(init='k-means++', n_clusters=3, n_init=10)
kmeans.fit_predict(X)
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
error = kmeans.inertia_

In [225]:
print labels


[0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1]

In [226]:
colors = ['#d7191c', '#ffffbf', '#2b83ba']
node_colors = [ colors[labels[v]] for v in cliques.nodes()]
nx.draw(cliques, node_color=node_colors, with_labels='True')



In [181]:
# Code for setting the style of the notebook
from IPython.core.display import HTML
def css_styling():
    styles = open("../theme/custom.css", "r").read()
    return HTML(styles)
css_styling()


---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-181-a3b95627d7cd> in <module>()
      4     styles = open("../theme/custom.css", "r").read()
      5     return HTML(styles)
----> 6 css_styling()

<ipython-input-181-a3b95627d7cd> in css_styling()
      2 from IPython.core.display import HTML
      3 def css_styling():
----> 4     styles = open("../theme/custom.css", "r").read()
      5     return HTML(styles)
      6 css_styling()

IOError: [Errno 2] No such file or directory: '../theme/custom.css'