# Comparing Front and Back end clustering

``````

In [3]:

import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform

``````
``````

In [4]:

``````
``````

In [5]:

inst_dm = df.values
print(inst_dm.shape)

``````
``````

(38, 38)

``````
``````

In [6]:

inst_dm

``````
``````

Out[6]:

array([[ 0.        ,  1.13819232,  1.0733403 , ...,  0.94640097,
0.94702352,  0.95850345],
[ 1.13819232,  0.        ,  0.93692049, ...,  1.06063839,
0.81090475,  1.03730952],
[ 1.0733403 ,  0.93692049,  0.        , ...,  1.03371267,
1.05430428,  1.09115233],
...,
[ 0.94640097,  1.06063839,  1.03371267, ...,  0.        ,
1.08551295,  0.92644981],
[ 0.94702352,  0.81090475,  1.05430428, ...,  1.08551295,
0.        ,  1.14962644],
[ 0.95850345,  1.03730952,  1.09115233, ...,  0.92644981,
1.14962644,  0.        ]])

``````
``````

In [7]:

import scipy.cluster.hierarchy as hier
inst_dm = squareform(inst_dm)
Z = hier.dendrogram(Y, no_plot=True)

``````
``````

In [8]:

def group_cutoffs():
all_dist = []
for i in range(11):
all_dist.append(float(i) / 10)
return all_dist

``````
``````

In [9]:

inst_clust_order = Z['leaves']
all_dist = group_cutoffs()

``````
``````

In [10]:

def group_cutoffs():
all_dist = []
for i in range(11):
all_dist.append(float(i) / 10)
return all_dist

``````

#### groups = {}

for inst_dist in all_dist: inst_key = str(inst_dist).replace('.', '')

``````cutoff_dist = inst_dist * inst_dm.max()

groups[inst_key] = hier.fcluster(Y, inst_dist * inst_dm.max(), 'distance')
groups[inst_key] = groups[inst_key].tolist()

print(cutoff_dist)
``````

# print(groups[inst_key])

``````

In [11]:

hier.fcluster(Y, 2.5, 'distance')

``````
``````

Out[11]:

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

``````
``````

In [12]:

hier.fcluster(Y, 0.91322, 'distance')

``````
``````

Out[12]:

array([3, 5, 6, 6, 3, 8, 3, 8, 7, 8, 1, 8, 1, 3, 6, 3, 8, 4, 3, 2, 6, 7, 5,
1, 4, 2, 7, 4, 8, 3, 6, 4, 1, 2, 1, 6, 1, 7], dtype=int32)

``````
``````

In [13]:

hier.fcluster(Y, 1.1, 'distance')

``````
``````

Out[13]:

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int32)

``````

# Cutoff for splitting into first group

### 1.07793 gives two groups

``````

In [14]:

sorted(list(set(list(hier.fcluster(Y, 1.07794, 'distance')))))

``````
``````

Out[14]:

[1]

``````
``````

In [15]:

sorted(list(set(list(hier.fcluster(Y, 1.07793, 'distance')))))

``````
``````

Out[15]:

[1, 2]

``````

### 1.06 gives two groups

``````

In [16]:

sorted(list(set(list(hier.fcluster(Y, 1.06, 'distance')))))

``````
``````

Out[16]:

[1, 2]

``````

### 1.0517 gives two groups

``````

In [17]:

sorted(list(set(list(hier.fcluster(Y, 1.0517, 'distance')))))

``````
``````

Out[17]:

[1, 2]

``````

### 1.05 gives three groups

``````

In [18]:

sorted(list(set(list(hier.fcluster(Y, 1.05, 'distance')))))

``````
``````

Out[18]:

[1, 2, 3]

``````

### 1.04 gives 4 groups

``````

In [19]:

sorted(list(set(list(hier.fcluster(Y, 1.04, 'distance')))))

``````
``````

Out[19]:

[1, 2, 3, 4]

``````

### 1.03 gives 5 groups

``````

In [20]:

sorted(list(set(list(hier.fcluster(Y, 1.03, 'distance')))))

``````
``````

Out[20]:

[1, 2, 3, 4, 5]

``````
``````

In [ ]:

``````

### 0.07

``````

In [21]:

sorted(list(set(list(hier.fcluster(Y, 0.07, 'distance')))))

``````
``````

Out[21]:

[1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37]

``````

### 0.70 gives 22 groups

``````

In [22]:

sorted(list(set(list(hier.fcluster(Y, 0.70, 'distance')))))

``````
``````

Out[22]:

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]

``````

### 0.01 gives 38 groups

``````

In [23]:

sorted(list(set(list(hier.fcluster(Y, 0.01, 'distance')))))

``````
``````

Out[23]:

[1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38]

``````
``````

In [ ]:

``````
``````

In [ ]:

``````