In [2]:
from bhc import bhclust
In [3]:
import cppimport
hh = cppimport.imp('helper')
from bhc_fast import bhclust_fast
In [21]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
In [48]:
mdat = np.array([[ 0.93637874, 1.61258974],
[ 1.95192875, 2.84452075],
[ 2.07671748, 3.24442548],
[ 3.122903 , 4.516753 ],
[ 3.56202194, 5.17531994],
[ 3.53211875, 5.75857675],
[ 4.65794237, 6.66995537],
[ 5.83738797, 8.46562797],
[ 6.22595817, 9.28082817],
[ 6.51552067, 9.36110867],
[ 7.24619975, 3.68958775],
[ 6.50554148, 3.69771048],
[ 6.58213752, 4.31283952],
[ 6.02279742, 4.52753342],
[ 5.83280398, 4.85751598],
[ 5.12305078, 4.76874878],
[ 5.0430706 , 5.2911986 ],
[ 2.44081699, 6.35402999]])
In [94]:
labm = ['A'] * 10 + ['B'] * 8
labcols = {'A': 'y', 'B':'g'}
plt.figure(figsize=(5.5,5))
plt.scatter(mdat[:,0], mdat[:,1])
for i in range(mdat.shape[0]):
plt.annotate(labm[i], (mdat[i,0], mdat[i,1]))
plt.show()
In [7]:
%%timeit
Zm, colorm = bhclust(mdat, family = "multivariate", alpha = 1)
In [8]:
%%timeit
Zm, colorm = bhclust_fast(mdat, family = "multivariate", alpha = 1)
In [57]:
plt.figure(figsize=(15,6))
plt.subplot(121)
Zm, colorm = bhclust_fast(mdat, family = "multivariate", alpha = 1)
dendrogram(Zm, link_color_func=lambda k : colorm[k], labels = labm)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcols[y.get_text()])
plt.title("BHC(fast)")
plt.subplot(122)
Zm, colorm = bhclust(mdat, family = "multivariate", alpha = 1)
dendrogram(Zm, link_color_func=lambda k : colorm[k], labels = labm)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcols[y.get_text()])
plt.title("BHC(original)")
plt.show()
In [95]:
plt.figure(figsize=(5.5,5))
Zm, colorm = bhclust_fast(mdat, family = "multivariate", alpha = 1)
dendrogram(Zm, link_color_func=lambda k : colorm[k], labels = labm)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcols[y.get_text()])
plt.title("BHC(fast)")
plt.show()
In [62]:
plt.figure(figsize=(5.5,5))
Zm3 = linkage(mdat, method='average')
dendrogram(Zm3, link_color_func=lambda k : 'b', labels = labm)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcols[y.get_text()])
plt.title("Average linkage")
plt.show()
In [26]:
spi = np.genfromtxt('../tests/spiral.txt')
spi.shape
Out[26]:
In [18]:
plt.scatter(spi[:,0], spi[:,1])
for i in range(spi.shape[0]):
if i%5 == 0:
plt.annotate(spi[i,2].astype('int'), (spi[i,0], spi[i,1]))
plt.show()
In [ ]:
### %%timeit
Zm, colorm = bhclust(mdat, family = "multivariate", alpha = 1)
Zm, colorm = bhclust_fast(mdat, family = "multivariate", alpha = 1)
In [47]:
%time Zs1, colors1 = bhclust_fast(spi[:,:2], family = "multivariate", alpha = 1, r = 0.1)
In [ ]:
%time Zs2, colors2 = bhclust(spi[:,:2], family = "multivariate", alpha = 1, r = 0.1)
In [20]:
labss = dd[:,2].astype('int').tolist()
labcolss = {'1': 'r', '2':'g', '3':'b'}
plt.figure(figsize=(15,15))
plt.subplot(211)
dendrogram(Zs1, link_color_func=lambda k : colors1[k], labels = labss)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcolss[y.get_text()])
plt.title("fast")
plt.subplot(212)
dendrogram(Zs2, link_color_func=lambda k : colors2[k], labels = labss)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcolss[y.get_text()])
plt.title("original")
plt.show()
In [39]:
ag = np.genfromtxt('../tests/aggregation.txt')
ag.shape
Out[39]:
In [12]:
# labels
import seaborn as sns
laba = ag[:,2].astype('int').tolist()
labcola = {}
for i in range(7):
labcola["%d"%(i+1)] = sns.color_palette("Paired", 7)[i]
cs = [labcola["%d"%(i)] for i in laba]
In [13]:
plt.scatter(ag[:,0], ag[:,1], c = cs)
plt.show()
In [14]:
#%%timeit -n 1 -r 1
%time Za1, colora1 = bhclust_fast(ag[:,:2], family = "multivariate", alpha = 1, r = 0.001)
In [40]:
%time Za2, colora2 = bhclust(ag[:,:2], family = "multivariate", alpha = 1, r = 0.001)
In [40]:
Za3 = linkage(ag[:,:2], method = "average")
In [44]:
plt.figure(figsize=(12,4))
dendrogram(Za1, link_color_func=lambda k : colora1[k], labels = laba)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcola[y.get_text()])
plt.title("BHC(fast)")
plt.show()
pass
In [41]:
plt.figure(figsize=(12,4))
dendrogram(Za3, link_color_func=lambda k : 'b', labels = laba)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcola[y.get_text()])
plt.title("Average linkage")
plt.show()
In [63]:
ce = np.genfromtxt('../tests/cedar.csv', delimiter = ',')
ce.shape
Out[63]:
In [93]:
plt.pcolor(ce)
plt.xlim(0,64)
plt.show()
In [75]:
labc = ['00000'] * 40 + ['22222'] * 40 + ['44444'] * 40
labcolc = {'00000': 'y', '22222':'g', '44444':'k'}
In [78]:
%timeit Zc, colorc = bhclust_fast(ce[:,:7], family = "bernoulli", alpha = 0.001)
In [87]:
plt.figure(figsize=(12,3.5))
dendrogram(Zc, link_color_func=lambda k : colorc[k], labels = labc)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcolc[y.get_text()])
plt.title("BHC")
plt.show()
In [80]:
Zc3 = linkage(ce, method = 'average')
In [88]:
plt.figure(figsize=(12,3.5))
dendrogram(Zc3, link_color_func=lambda k : 'b', labels = labc)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcolc[y.get_text()])
plt.title("Average linkage")
plt.show()
pass
In [15]:
sd = np.genfromtxt('../tests/seeds.txt')
sd.shape
Out[15]:
In [16]:
labd = ['Kama']*70+ ['Rosa']*70 + ['Canadian'] * 70
labcold = {}
for i,j in enumerate(['Kama','Rosa', 'Canadian']):
labcold[j] = sns.color_palette("Paired", 3)[i]
In [37]:
plt.pcolor(sd[:,:7])
plt.ylim(0,210)
plt.show()
In [28]:
%time Zd1, colord1 = bhclust_fast(sd[:,:7], family = "multivariate", alpha = 1)
In [ ]:
%time Zd2, colord2 = bhclust(sd[:,:7], family = "multivariate", alpha = 1)
In [26]:
Zd3 = linkage(sd[:,:7], method = "average")
In [36]:
plt.figure(figsize=(12,4))
dendrogram(Zd1, link_color_func=lambda k : colord1[k], labels = labd)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcold[y.get_text()])
plt.title("BHC(fast)")
plt.show()
In [33]:
plt.figure(figsize=(12,4))
dendrogram(Zd3, link_color_func=lambda k : 'b', labels = labd)
ax = plt.gca()
xlbls = ax.get_xmajorticklabels()
for y in xlbls:
y.set_color(labcold[y.get_text()])
plt.title("Average linkage")
plt.show()
pass
In [ ]: