In [68]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, dendrogram
from IPython.display import HTML
In [3]:
data = np.loadtxt("../../ratabase/ratabase-dists.csv")
In [87]:
data_array = data.view((np.float, np.float))
data_array = data_array.transpose()
def calc_distances(*args):
data_dist = pdist(data_array, *args)
data_link = linkage(data_dist)
return data_dist, data_link
def draw_dendrogram(data_link):
dendrogram(data_link)
plt.xlabel('Samples')
plt.ylabel('Distance')
plt.suptitle('Samples clustering', fontweight='bold', fontsize=14)
def draw_heatmap(data_link, data_dist):
# Compute and plot first dendrogram.
fig = plt.figure(figsize=(8,8))
# x ywidth height
ax1 = fig.add_axes([0.05,0.1,0.2,0.6])
Y = linkage(data_dist, method='single')
Z1 = dendrogram(Y, orientation='right',labels=data.dtype.names) # adding/removing the axes
ax1.set_xticks([])
# Compute and plot second dendrogram.
ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
Z2 = dendrogram(Y)
ax2.set_xticks([])
ax2.set_yticks([])
#Compute and plot the heatmap
axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
idx1 = Z1['leaves']
idx2 = Z2['leaves']
D = squareform(data_dist)
D = D[idx1,:]
D = D[:,idx2]
im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=plt.cm.YlGnBu)
axmatrix.set_xticks([])
axmatrix.set_yticks([])
# Plot colorbar.
axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
plt.colorbar(im, cax=axcolor)
In [105]:
data_dist, data_link = calc_distances('seuclidean')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)
In [89]:
data_dist, data_link = calc_distances('euclidean')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)
In [106]:
data_dist, data_link = calc_distances('cosine')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)
In [99]:
data_dist, data_link = calc_distances('correlation')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)
In [100]:
data_dist, data_link = calc_distances('mahalanobis')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)
In [101]:
data_dist, data_link = calc_distances('chebyshev')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)
In [103]:
data_dist, data_link = calc_distances('canberra')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)
In [104]:
data_dist, data_link = calc_distances('braycurtis')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)
In [ ]: