Graph Analysis - II


In [176]:
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import networkx as nx
from sklearn.cluster import KMeans

%matplotlib inline

K-core decomposition of a graph

In [205]:
G = nx.Graph(G)

print len(G.nodes())


In [85]:
degree_sequence=sorted(,reverse=True) # degree sequence
print dmax


Computing the k-core decomposition of a graph

In [86]:
core_dec = nx.core_number(G)
print core_dec

{0: 4, 1: 4, 2: 4, 3: 4, 4: 3, 5: 3, 6: 3, 7: 4, 8: 4, 9: 2, 10: 3, 11: 1, 12: 2, 13: 4, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 3, 20: 2, 21: 2, 22: 2, 23: 3, 24: 3, 25: 3, 26: 2, 27: 3, 28: 3, 29: 3, 30: 4, 31: 3, 32: 4, 33: 4}

Plotting the graph; nodes with the same color belong in the same core

In [120]:
colors = ['#d7191c', '#fdae61', '#ffffbf', '#abdda4', '#2b83ba']
node_colors = [ colors[core_dec[v]] for v in G.nodes()]

nx.draw(G, node_color=node_colors, with_labels=True)

Minimun Cuts

In [206]:
cut_edges = nx.minimum_edge_cut(G)
print cut_edges

[(11, 0)]

In [207]:
Gcopy = G.copy()

In [208]:
cc = nx.connected_components(Gcopy)
node_set = {}
i = 1
for s in cc:
    for node in s:
        node_set[node] = i
print node_set

{0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 2, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1}

In [209]:
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[node_set[v]-1] for v in G.nodes()]
nx.draw(G, node_color=node_colors, with_labels='True')

In [210]:
cut_edges = nx.minimum_edge_cut(G, s=0, t=33)
print cut_edges

set([(13, 33), (19, 33), (2, 28), (1, 30), (2, 32), (0, 31), (2, 8), (0, 8), (2, 27), (9, 33)])

In [211]:
Gcopy = G.copy()

In [212]:
cc = nx.connected_components(Gcopy)
node_set = {}
for i, s in enumerate(cc):
    for node in s:
        node_set[node] = i
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[node_set[v]-1] for v in G.nodes()]
nx.draw(G, node_color=node_colors, with_labels='True')

Graph spectral clustering

Exploring the Fiedler vector of the Karate-club graph

In [213]:
G = nx.Graph(G)

In [214]:
f = nx.fiedler_vector(G)
print f

[ 0.11213743  0.04128789 -0.02321896  0.05549979  0.28460452  0.32372722
  0.32372722  0.05258601 -0.05160128 -0.09280089  0.28460452  0.21099295
  0.10946132  0.01474198 -0.1627508  -0.1627508   0.42276533  0.10018141
 -0.1627508   0.01363713 -0.1627508   0.10018141 -0.1627508  -0.15569456
 -0.15302557 -0.16096291 -0.18710953 -0.12766404 -0.0951523  -0.1676503
 -0.07349964 -0.09875343 -0.13034546 -0.11890326]

In [215]:
s = np.zeros(len(f))
s = s.astype(int)
#s = s.tolist()
print s, type(s)

[1 1 0 1 1 1 1 1 0 0 1 1 1 1 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0] <type 'numpy.ndarray'>

In [216]:
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[s[v]] for v in G.nodes()]
node_colors = ['#d7191c' if f[i] < 0 else '#2b83ba' for i, v in enumerate(G.nodes())]
nx.draw(G, node_color=node_colors, with_labels='True')

Exploring the Fiedler vector of a union of noisy cliques

In [182]:
from numpy.random import RandomState

def generate_noisy_subcliques(nodes_per_clique, inside_p, across_p, min_node_label=0, seed=None):
    """Generates a graph which consists of small cliques connected with each other.
    The noise within a clique and across cliques can be set by the `inside_p` and 
    `across_p` parameters respectively.
    nodes_per_clique : list
        The size of this list corresponds to the number of cliques that will be
        generated. The value of each element will be the size of the corresponding 
    inside_p : float
        The probability of an edge inside a clique. The higher this number, the more 
        each clique will resemble a fully connected graph.
    across_p : float
        The probability of an edge across cliques.
    min_node_label : int, default is 0
        The minimum node label of the graph.
    seed : int, default is None
        The seed to the pseudorandom number generator.
    G : networkX graph
        The generated graph.

    prng = RandomState(seed)
    clique_list = []
    number_of_cliques = len(nodes_per_clique)

    # Make the independent cliques
    starting_node = min_node_label
    for clique in range(number_of_cliques):
        G = nx.Graph()
        for u in range(starting_node, starting_node + nodes_per_clique[clique]):
            for v in range(u + 1, starting_node + nodes_per_clique[clique]):
                if prng.rand() < inside_p:
                    G.add_edge(u, v)
        starting_node += nodes_per_clique[clique]

    # Combine them in one graph
    G = nx.Graph()
    for clique in range(number_of_cliques):

    # Connect edges across the cliques
    for i in range(number_of_cliques):
        clique_from = clique_list[i]
        for j in range(i + 1, number_of_cliques):
            clique_to = clique_list[j]
            for u in clique_from.nodes():
                for v in clique_to.nodes():
                    if prng.rand() < across_p:
                        G.add_edge(u, v)
    return G

In [217]:
nodes_per_clique = [10, 10, 10]
across_p = 0.05
inside_p = 0.9
cliques = generate_noisy_subcliques(nodes_per_clique, inside_p, across_p)
nx.draw(cliques, with_labels=True)

In [218]:
f = nx.fiedler_vector(cliques)
print f

[-0.13172645 -0.13048803 -0.09405882 -0.09853715 -0.13659028 -0.13134603
 -0.13475063 -0.13111461 -0.06175771 -0.12851544  0.24984342  0.21405709
  0.24747274  0.28613525  0.24545554  0.28613525  0.29155544  0.25183293
  0.24523071  0.24304742 -0.1194048  -0.11421938 -0.15436763 -0.15217566
 -0.11512428 -0.15440879 -0.15473606 -0.11424198 -0.14879327 -0.15440879]

In [219]:
s = np.zeros(len(f))
s = s.astype(int)
s = s.tolist()
print s, type(s)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] <type 'list'>

In [220]:
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[s[v]] for v in cliques.nodes()]
nx.draw(cliques, node_color=node_colors, with_labels='True')

Beyond the Fiedler vector

In [221]:
L = nx.laplacian_matrix(cliques).astype(float)
w,v = sp.sparse.linalg.eigsh(L, k = 3, which='SM')

In [222]:
print w
print v

[ -1.81054597e-15   1.05050559e+00   1.16157956e+00]
[[ 0.18257419  0.13172645  0.25630534]
 [ 0.18257419  0.13048803  0.21513737]
 [ 0.18257419  0.09405882  0.23177457]
 [ 0.18257419  0.09853715  0.17996764]
 [ 0.18257419  0.13659028  0.26768062]
 [ 0.18257419  0.13134603  0.26236449]
 [ 0.18257419  0.13475063  0.20881118]
 [ 0.18257419  0.13111461  0.25485412]
 [ 0.18257419  0.06175771  0.2133549 ]
 [ 0.18257419  0.12851544  0.17508414]
 [ 0.18257419 -0.24984342 -0.03384428]
 [ 0.18257419 -0.21405709 -0.00180997]
 [ 0.18257419 -0.24747274  0.00647296]
 [ 0.18257419 -0.28613525 -0.01315664]
 [ 0.18257419 -0.24545554 -0.03443766]
 [ 0.18257419 -0.28613525 -0.01315664]
 [ 0.18257419 -0.29155544 -0.01674384]
 [ 0.18257419 -0.25183293  0.01551845]
 [ 0.18257419 -0.24523071 -0.04008765]
 [ 0.18257419 -0.24304742  0.01496132]
 [ 0.18257419  0.1194048  -0.14872245]
 [ 0.18257419  0.11421938 -0.22193482]
 [ 0.18257419  0.15436763 -0.24215991]
 [ 0.18257419  0.15217566 -0.19113596]
 [ 0.18257419  0.11512428 -0.18524937]
 [ 0.18257419  0.15440879 -0.2431487 ]
 [ 0.18257419  0.15473606 -0.25089925]
 [ 0.18257419  0.11424198 -0.2225091 ]
 [ 0.18257419  0.14879327 -0.20014217]
 [ 0.18257419  0.15440879 -0.2431487 ]]

In [223]:
print w.shape, v.shape
X = v*w

(3,) (30, 3)

In [224]:
kmeans = KMeans(init='k-means++', n_clusters=3, n_init=10)
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
error = kmeans.inertia_

In [225]:
print labels

[0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1]

In [226]:
colors = ['#d7191c', '#ffffbf', '#2b83ba']
node_colors = [ colors[labels[v]] for v in cliques.nodes()]
nx.draw(cliques, node_color=node_colors, with_labels='True')

