In [2]:
from collections import defaultdict
import os
import sys

PROJ_ROOT = os.path.join(os.pardir)

# add local python functions
sys.path.append(os.path.join(PROJ_ROOT, "src"))

import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import distance
from scipy.cluster import hierarchy
import networkx as nx

In [4]:
%load_ext watermark

In [6]:
%watermark -a "Arya D. McCarthy" -d -t -v -p networkx -g


Arya D. McCarthy 2017-03-07 00:16:53 

CPython 3.5.3
IPython 5.3.0

networkx 1.11
Git hash: 9a2a7f362aa2c544a43425934b1bd131d4f4c9a3

In [18]:
G_ = nx.read_pajek("../data/raw/dolphins.paj")
G = nx.Graph(G_)
G = nx.convert_node_labels_to_integers(G, label_attribute="name")

In [19]:
def create_hc(G):
    """Creates hierarchical cluster of graph from distance matrix.
    
    Parameters
    ----------
    G : nx.Graph
        The graph to cluster.
        
    Returns
    -------
    lst : list
        The partition values.
    """
    path_length = nx.all_pairs_shortest_path_length(G)
    distances = np.zeros((len(G), len(G)))
    for u, p in path_length.items():
        for v, d in p.items():
            distances[u, v] = d
    print(distances)
    # Create hierarchical cluster.
    Y = distance.squareform(distances)
    Z = hierarchy.complete(Y)
    # This partition selection is arbitrary.
    membership = list(hierarchy.fcluster(Z, t=1.15))
    # Create collection of lists for blockmodel
    partition = defaultdict(list)
    for n, p in zip(list(range(len(G))), membership):
        partition[p].append(n)
    return list(partition.values())

In [20]:
from sklearn.cluster import DBSCAN

In [40]:
path_length = nx.all_pairs_shortest_path_length(G)
distances = np.zeros((len(G), len(G)))
print(distances)
for (u, p) in path_length.items():
    for (v, d) in p.items():
        distances[u][v] = d

# Create hierarchical cluster.
Y = distance.squareform(distances)


[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]

In [24]:
distances


Out[24]:
array([[ 0.,  4.,  3., ...,  4.,  3.,  3.],
       [ 4.,  0.,  4., ...,  4.,  5.,  2.],
       [ 3.,  4.,  0., ...,  2.,  3.,  3.],
       ..., 
       [ 4.,  4.,  2., ...,  0.,  5.,  3.],
       [ 3.,  5.,  3., ...,  5.,  0.,  4.],
       [ 3.,  2.,  3., ...,  3.,  4.,  0.]])

In [34]:
from scipy.sparse import csr_matrix
distances = csr_matrix(distances)

In [38]:
from scipy.cluster.hierarchy import complete

In [43]:
Z = complete(distances)
Z


Out[43]:
array([[  32.        ,   45.        ,    2.64575131,    2.        ],
       [  54.        ,   55.        ,    2.82842712,    2.        ],
       [   8.        ,   18.        ,    2.82842712,    2.        ],
       [  12.        ,   36.        ,    3.31662479,    2.        ],
       [   3.        ,   20.        ,    4.        ,    2.        ],
       [   9.        ,   56.        ,    4.        ,    2.        ],
       [   7.        ,   19.        ,    4.12310563,    2.        ],
       [  35.        ,   66.        ,    4.12310563,    3.        ],
       [  50.        ,   52.        ,    4.12310563,    2.        ],
       [  15.        ,   27.        ,    4.24264069,    2.        ],
       [  37.        ,   44.        ,    4.35889894,    2.        ],
       [  47.        ,   61.        ,    4.47213595,    2.        ],
       [  46.        ,   69.        ,    4.47213595,    4.        ],
       [   2.        ,   13.        ,    4.58257569,    2.        ],
       [  41.        ,   49.        ,    4.89897949,    2.        ],
       [  30.        ,   68.        ,    4.89897949,    3.        ],
       [  14.        ,   62.        ,    4.89897949,    3.        ],
       [   4.        ,   25.        ,    5.        ,    2.        ],
       [  22.        ,   53.        ,    5.19615242,    2.        ],
       [  33.        ,   72.        ,    5.19615242,    3.        ],
       [  43.        ,   65.        ,    5.19615242,    3.        ],
       [  10.        ,   40.        ,    5.29150262,    2.        ],
       [  48.        ,   64.        ,    5.29150262,    3.        ],
       [  31.        ,   74.        ,    5.56776436,    5.        ],
       [  73.        ,   79.        ,    5.56776436,    4.        ],
       [  17.        ,   57.        ,    5.65685425,    2.        ],
       [   0.        ,   39.        ,    5.83095189,    2.        ],
       [  24.        ,   51.        ,    5.91607978,    2.        ],
       [  38.        ,   82.        ,    5.91607978,    4.        ],
       [  59.        ,   63.        ,    6.        ,    3.        ],
       [  42.        ,   76.        ,    6.08276253,    3.        ],
       [  16.        ,   29.        ,    6.244998  ,    2.        ],
       [  11.        ,   23.        ,    6.244998  ,    2.        ],
       [  21.        ,   70.        ,    6.4807407 ,    3.        ],
       [  81.        ,   93.        ,    6.8556546 ,    5.        ],
       [  71.        ,   85.        ,    6.8556546 ,    7.        ],
       [  67.        ,   78.        ,    7.        ,    5.        ],
       [  34.        ,   86.        ,    7.07106781,    5.        ],
       [   1.        ,   95.        ,    7.07106781,    4.        ],
       [  89.        ,   92.        ,    7.28010989,    5.        ],
       [  58.        ,  100.        ,    7.34846923,    5.        ],
       [  28.        ,   94.        ,    7.54983444,    3.        ],
       [  80.        ,   83.        ,    7.74596669,    4.        ],
       [  90.        ,   99.        ,    7.87400787,    9.        ],
       [  26.        ,   91.        ,    8.        ,    4.        ],
       [  84.        ,   98.        ,    8.18535277,    8.        ],
       [   5.        ,   96.        ,    8.30662386,    6.        ],
       [  75.        ,  101.        ,    8.60232527,    7.        ],
       [  60.        ,   77.        ,    8.71779789,    4.        ],
       [  87.        ,  107.        ,    9.53939201,   10.        ],
       [  97.        ,  105.        ,    9.8488578 ,   16.        ],
       [  88.        ,  104.        ,   10.24695077,    6.        ],
       [ 108.        ,  110.        ,   10.81665383,   10.        ],
       [ 103.        ,  112.        ,   10.90871211,   19.        ],
       [ 109.        ,  113.        ,   12.08304597,   13.        ],
       [ 102.        ,  106.        ,   12.32882801,    9.        ],
       [ 115.        ,  117.        ,   14.03566885,   28.        ],
       [   6.        ,  111.        ,   15.84297952,   11.        ],
       [ 114.        ,  116.        ,   18.70828693,   23.        ],
       [ 118.        ,  120.        ,   24.41311123,   51.        ],
       [ 119.        ,  121.        ,   33.24154028,   62.        ]])

In [45]:
hierarchy.dendrogram(Z)
plt.show()



In [49]:
Z
import pandas as pd
data = pd.DataFrame(Z, columns=['child1', 'child2', 'distance', 'cluster_size'])

In [50]:
data.dtypes


Out[50]:
child1          float64
child2          float64
distance        float64
cluster_size    float64
dtype: object

In [51]:
data.describe()


Out[51]:
child1 child2 distance cluster_size
count 61.000000 61.000000 61.000000 61.000000
mean 48.344262 72.655738 7.573316 6.967213
std 36.301922 30.059322 5.076642 10.595545
min 0.000000 13.000000 2.645751 2.000000
25% 16.000000 52.000000 4.898979 2.000000
50% 41.000000 74.000000 6.082763 3.000000
75% 80.000000 98.000000 8.185353 6.000000
max 119.000000 121.000000 33.241540 62.000000

In [57]:
ffg = nx.convert_node_labels_to_integers(nx.florentine_families_graph(), first_label=1)

In [63]:
%matplotlib inline

In [61]:
%pylab --no-import-all


Using matplotlib backend: MacOSX
Populating the interactive namespace from numpy and matplotlib

In [72]:
nx.draw_spectral(nx.florentine_families_graph(), iterations=100)


/Users/Arya/anaconda/envs/thesis/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:126: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  b = plt.ishold()
/Users/Arya/anaconda/envs/thesis/lib/python3.5/site-packages/networkx/drawing/nx_pylab.py:138: MatplotlibDeprecationWarning: pyplot.hold is deprecated.
    Future behavior will be consistent with the long-time default:
    plot commands add elements without first clearing the
    Axes and/or Figure.
  plt.hold(b)
/Users/Arya/anaconda/envs/thesis/lib/python3.5/site-packages/matplotlib/__init__.py:917: UserWarning: axes.hold is deprecated. Please remove it from your matplotlibrc and/or style files.
  warnings.warn(self.msg_depr_set % key)
/Users/Arya/anaconda/envs/thesis/lib/python3.5/site-packages/matplotlib/rcsetup.py:152: UserWarning: axes.hold is deprecated, will be removed in 3.0
  warnings.warn("axes.hold is deprecated, will be removed in 3.0")

In [ ]: