In [3]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform
In [4]:
df = pd.read_csv('output_1.13819231887', sep='\t', header=None)
In [5]:
inst_dm = df.values
print(inst_dm.shape)
In [6]:
inst_dm
Out[6]:
In [7]:
linkage_type = 'average'
import scipy.cluster.hierarchy as hier
inst_dm = squareform(inst_dm)
Y = hier.linkage(inst_dm, method=linkage_type)
Z = hier.dendrogram(Y, no_plot=True)
In [8]:
def group_cutoffs():
all_dist = []
for i in range(11):
all_dist.append(float(i) / 10)
return all_dist
In [9]:
inst_clust_order = Z['leaves']
all_dist = group_cutoffs()
In [10]:
def group_cutoffs():
all_dist = []
for i in range(11):
all_dist.append(float(i) / 10)
return all_dist
In [11]:
hier.fcluster(Y, 2.5, 'distance')
Out[11]:
In [12]:
hier.fcluster(Y, 0.91322, 'distance')
Out[12]:
In [13]:
hier.fcluster(Y, 1.1, 'distance')
Out[13]:
In [14]:
sorted(list(set(list(hier.fcluster(Y, 1.07794, 'distance')))))
Out[14]:
In [15]:
sorted(list(set(list(hier.fcluster(Y, 1.07793, 'distance')))))
Out[15]:
In [16]:
sorted(list(set(list(hier.fcluster(Y, 1.06, 'distance')))))
Out[16]:
In [17]:
sorted(list(set(list(hier.fcluster(Y, 1.0517, 'distance')))))
Out[17]:
In [18]:
sorted(list(set(list(hier.fcluster(Y, 1.05, 'distance')))))
Out[18]:
In [19]:
sorted(list(set(list(hier.fcluster(Y, 1.04, 'distance')))))
Out[19]:
In [20]:
sorted(list(set(list(hier.fcluster(Y, 1.03, 'distance')))))
Out[20]:
In [ ]:
In [21]:
sorted(list(set(list(hier.fcluster(Y, 0.07, 'distance')))))
Out[21]:
In [22]:
sorted(list(set(list(hier.fcluster(Y, 0.70, 'distance')))))
Out[22]:
In [23]:
sorted(list(set(list(hier.fcluster(Y, 0.01, 'distance')))))
Out[23]:
In [ ]:
In [ ]: