# Graph Analysis - II

## Imports

``````

In [176]:

import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import networkx as nx
from sklearn.cluster import KMeans

%matplotlib inline

``````

### K-core decomposition of a graph

``````

In [205]:

G=nx.karate_club_graph()
G = nx.Graph(G)

print len(G.nodes())

``````
``````

34

``````
``````

In [85]:

degree_sequence=sorted(nx.degree(G).values(),reverse=True) # degree sequence
dmax=max(degree_sequence)
print dmax

``````
``````

17

``````

Computing the k-core decomposition of a graph

``````

In [86]:

core_dec = nx.core_number(G)
print core_dec

``````
``````

{0: 4, 1: 4, 2: 4, 3: 4, 4: 3, 5: 3, 6: 3, 7: 4, 8: 4, 9: 2, 10: 3, 11: 1, 12: 2, 13: 4, 14: 2, 15: 2, 16: 2, 17: 2, 18: 2, 19: 3, 20: 2, 21: 2, 22: 2, 23: 3, 24: 3, 25: 3, 26: 2, 27: 3, 28: 3, 29: 3, 30: 4, 31: 3, 32: 4, 33: 4}

``````

Plotting the graph; nodes with the same color belong in the same core

``````

In [120]:

colors = ['#d7191c', '#fdae61', '#ffffbf', '#abdda4', '#2b83ba']
node_colors = [ colors[core_dec[v]] for v in G.nodes()]

nx.draw(G, node_color=node_colors, with_labels=True)

``````
``````

``````

### Minimun Cuts

``````

In [206]:

cut_edges = nx.minimum_edge_cut(G)
print cut_edges

``````
``````

[(11, 0)]

``````
``````

In [207]:

Gcopy = G.copy()
Gcopy.remove_edges_from(cut_edges)

``````
``````

In [208]:

cc = nx.connected_components(Gcopy)
node_set = {}
i = 1
for s in cc:
for node in s:
node_set[node] = i
i+=1
print node_set

``````
``````

{0: 1, 1: 1, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 2, 12: 1, 13: 1, 14: 1, 15: 1, 16: 1, 17: 1, 18: 1, 19: 1, 20: 1, 21: 1, 22: 1, 23: 1, 24: 1, 25: 1, 26: 1, 27: 1, 28: 1, 29: 1, 30: 1, 31: 1, 32: 1, 33: 1}

``````
``````

In [209]:

colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[node_set[v]-1] for v in G.nodes()]
nx.draw(G, node_color=node_colors, with_labels='True')

``````
``````

``````
``````

In [210]:

cut_edges = nx.minimum_edge_cut(G, s=0, t=33)
print cut_edges

``````
``````

set([(13, 33), (19, 33), (2, 28), (1, 30), (2, 32), (0, 31), (2, 8), (0, 8), (2, 27), (9, 33)])

``````
``````

In [211]:

Gcopy = G.copy()
Gcopy.remove_edges_from(cut_edges)

``````
``````

In [212]:

cc = nx.connected_components(Gcopy)
node_set = {}
for i, s in enumerate(cc):
for node in s:
node_set[node] = i
colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[node_set[v]-1] for v in G.nodes()]
nx.draw(G, node_color=node_colors, with_labels='True')

``````
``````

``````

### Graph spectral clustering

#### Exploring the Fiedler vector of the Karate-club graph

``````

In [213]:

G=nx.karate_club_graph()
G = nx.Graph(G)

``````
``````

In [214]:

f = nx.fiedler_vector(G)
print f

``````
``````

[ 0.11213743  0.04128789 -0.02321896  0.05549979  0.28460452  0.32372722
0.32372722  0.05258601 -0.05160128 -0.09280089  0.28460452  0.21099295
0.10946132  0.01474198 -0.1627508  -0.1627508   0.42276533  0.10018141
-0.1627508   0.01363713 -0.1627508   0.10018141 -0.1627508  -0.15569456
-0.15302557 -0.16096291 -0.18710953 -0.12766404 -0.0951523  -0.1676503
-0.07349964 -0.09875343 -0.13034546 -0.11890326]

``````
``````

In [215]:

s = np.zeros(len(f))
s[f>0]=1
s = s.astype(int)
#s = s.tolist()
print s, type(s)

``````
``````

[1 1 0 1 1 1 1 1 0 0 1 1 1 1 0 0 1 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0] <type 'numpy.ndarray'>

``````
``````

In [216]:

colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[s[v]] for v in G.nodes()]
node_colors = ['#d7191c' if f[i] < 0 else '#2b83ba' for i, v in enumerate(G.nodes())]
nx.draw(G, node_color=node_colors, with_labels='True')

``````
``````

``````

#### Exploring the Fiedler vector of a union of noisy cliques

``````

In [182]:

from numpy.random import RandomState

def generate_noisy_subcliques(nodes_per_clique, inside_p, across_p, min_node_label=0, seed=None):
"""Generates a graph which consists of small cliques connected with each other.
The noise within a clique and across cliques can be set by the `inside_p` and
`across_p` parameters respectively.

Parameters
----------
nodes_per_clique : list
The size of this list corresponds to the number of cliques that will be
generated. The value of each element will be the size of the corresponding
clique.

inside_p : float
The probability of an edge inside a clique. The higher this number, the more
each clique will resemble a fully connected graph.

across_p : float
The probability of an edge across cliques.

min_node_label : int, default is 0
The minimum node label of the graph.

seed : int, default is None
The seed to the pseudorandom number generator.

Returns
-------
G : networkX graph
The generated graph.
"""

prng = RandomState(seed)
clique_list = []
number_of_cliques = len(nodes_per_clique)

# Make the independent cliques
starting_node = min_node_label
for clique in range(number_of_cliques):
G = nx.Graph()
for u in range(starting_node, starting_node + nodes_per_clique[clique]):
for v in range(u + 1, starting_node + nodes_per_clique[clique]):
if prng.rand() < inside_p:
clique_list.append(G)
starting_node += nodes_per_clique[clique]

# Combine them in one graph
G = nx.Graph()
for clique in range(number_of_cliques):

# Connect edges across the cliques
for i in range(number_of_cliques):
clique_from = clique_list[i]
for j in range(i + 1, number_of_cliques):
clique_to = clique_list[j]
for u in clique_from.nodes():
for v in clique_to.nodes():
if prng.rand() < across_p:
return G

``````
``````

In [217]:

nodes_per_clique = [10, 10, 10]
across_p = 0.05
inside_p = 0.9
cliques = generate_noisy_subcliques(nodes_per_clique, inside_p, across_p)
nx.draw(cliques, with_labels=True)

``````
``````

``````
``````

In [218]:

f = nx.fiedler_vector(cliques)
print f

``````
``````

[-0.13172645 -0.13048803 -0.09405882 -0.09853715 -0.13659028 -0.13134603
-0.13475063 -0.13111461 -0.06175771 -0.12851544  0.24984342  0.21405709
0.24747274  0.28613525  0.24545554  0.28613525  0.29155544  0.25183293
0.24523071  0.24304742 -0.1194048  -0.11421938 -0.15436763 -0.15217566
-0.11512428 -0.15440879 -0.15473606 -0.11424198 -0.14879327 -0.15440879]

``````
``````

In [219]:

s = np.zeros(len(f))
s[f>0]=1
s = s.astype(int)
s = s.tolist()
print s, type(s)

``````
``````

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] <type 'list'>

``````
``````

In [220]:

colors = ['#d7191c', '#2b83ba']
node_colors = [ colors[s[v]] for v in cliques.nodes()]
nx.draw(cliques, node_color=node_colors, with_labels='True')

``````
``````

``````

#### Beyond the Fiedler vector

``````

In [221]:

L = nx.laplacian_matrix(cliques).astype(float)
w,v = sp.sparse.linalg.eigsh(L, k = 3, which='SM')

``````
``````

In [222]:

print w
print v

``````
``````

[ -1.81054597e-15   1.05050559e+00   1.16157956e+00]
[[ 0.18257419  0.13172645  0.25630534]
[ 0.18257419  0.13048803  0.21513737]
[ 0.18257419  0.09405882  0.23177457]
[ 0.18257419  0.09853715  0.17996764]
[ 0.18257419  0.13659028  0.26768062]
[ 0.18257419  0.13134603  0.26236449]
[ 0.18257419  0.13475063  0.20881118]
[ 0.18257419  0.13111461  0.25485412]
[ 0.18257419  0.06175771  0.2133549 ]
[ 0.18257419  0.12851544  0.17508414]
[ 0.18257419 -0.24984342 -0.03384428]
[ 0.18257419 -0.21405709 -0.00180997]
[ 0.18257419 -0.24747274  0.00647296]
[ 0.18257419 -0.28613525 -0.01315664]
[ 0.18257419 -0.24545554 -0.03443766]
[ 0.18257419 -0.28613525 -0.01315664]
[ 0.18257419 -0.29155544 -0.01674384]
[ 0.18257419 -0.25183293  0.01551845]
[ 0.18257419 -0.24523071 -0.04008765]
[ 0.18257419 -0.24304742  0.01496132]
[ 0.18257419  0.1194048  -0.14872245]
[ 0.18257419  0.11421938 -0.22193482]
[ 0.18257419  0.15436763 -0.24215991]
[ 0.18257419  0.15217566 -0.19113596]
[ 0.18257419  0.11512428 -0.18524937]
[ 0.18257419  0.15440879 -0.2431487 ]
[ 0.18257419  0.15473606 -0.25089925]
[ 0.18257419  0.11424198 -0.2225091 ]
[ 0.18257419  0.14879327 -0.20014217]
[ 0.18257419  0.15440879 -0.2431487 ]]

``````
``````

In [223]:

print w.shape, v.shape
X = v*w

``````
``````

(3,) (30, 3)

``````
``````

In [224]:

kmeans = KMeans(init='k-means++', n_clusters=3, n_init=10)
kmeans.fit_predict(X)
centroids = kmeans.cluster_centers_
labels = kmeans.labels_
error = kmeans.inertia_

``````
``````

In [225]:

print labels

``````
``````

[0 0 0 0 0 0 0 0 0 0 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1]

``````
``````

In [226]:

colors = ['#d7191c', '#ffffbf', '#2b83ba']
node_colors = [ colors[labels[v]] for v in cliques.nodes()]
nx.draw(cliques, node_color=node_colors, with_labels='True')

``````
``````

``````
``````

In [181]:

# Code for setting the style of the notebook
from IPython.core.display import HTML
def css_styling():
return HTML(styles)
css_styling()

``````
``````

---------------------------------------------------------------------------
IOError                                   Traceback (most recent call last)
<ipython-input-181-a3b95627d7cd> in <module>()
5     return HTML(styles)
----> 6 css_styling()

<ipython-input-181-a3b95627d7cd> in css_styling()
2 from IPython.core.display import HTML
3 def css_styling():
----> 4     styles = open("../theme/custom.css", "r").read()
5     return HTML(styles)
6 css_styling()

IOError: [Errno 2] No such file or directory: '../theme/custom.css'

``````