In [68]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform
from scipy.cluster.hierarchy import linkage, dendrogram
from IPython.display import HTML

In [3]:
data = np.loadtxt("../../ratabase/ratabase-dists.csv")

In [87]:
data_array = data.view((np.float, np.float))
data_array = data_array.transpose()


def calc_distances(*args):
    data_dist = pdist(data_array, *args)
    data_link = linkage(data_dist)
    
    return data_dist, data_link
    
def draw_dendrogram(data_link):
    dendrogram(data_link)
    plt.xlabel('Samples')
    plt.ylabel('Distance')
    plt.suptitle('Samples clustering', fontweight='bold', fontsize=14)
    
def draw_heatmap(data_link, data_dist):
    # Compute and plot first dendrogram.
    fig = plt.figure(figsize=(8,8))
    # x ywidth height
    ax1 = fig.add_axes([0.05,0.1,0.2,0.6])
    Y = linkage(data_dist, method='single')
    Z1 = dendrogram(Y, orientation='right',labels=data.dtype.names) # adding/removing the axes
    ax1.set_xticks([])
    
    # Compute and plot second dendrogram.
    ax2 = fig.add_axes([0.3,0.71,0.6,0.2])
    Z2 = dendrogram(Y)
    ax2.set_xticks([])
    ax2.set_yticks([])
    
    #Compute and plot the heatmap
    axmatrix = fig.add_axes([0.3,0.1,0.6,0.6])
    idx1 = Z1['leaves']
    idx2 = Z2['leaves']
    D = squareform(data_dist)
    D = D[idx1,:]
    D = D[:,idx2]
    im = axmatrix.matshow(D, aspect='auto', origin='lower', cmap=plt.cm.YlGnBu)
    axmatrix.set_xticks([])
    axmatrix.set_yticks([])
    
    # Plot colorbar.
    axcolor = fig.add_axes([0.91,0.1,0.02,0.6])
    plt.colorbar(im, cax=axcolor)

Standardized Euclidean


In [105]:
data_dist, data_link = calc_distances('seuclidean')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)


Euclidean


In [89]:
data_dist, data_link = calc_distances('euclidean')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)


Cosine

$$ 1 - \frac{u \dot{} v}{||u||_2 ||v||_2} $$

In [106]:
data_dist, data_link = calc_distances('cosine')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)


Correlation

$$ 1 - \frac{(u - \bar{u}) \dot{} (v - \bar{v})}{||(u - \bar{u})||_2 ||(v - \bar{v})||_2} $$

In [99]:
data_dist, data_link = calc_distances('correlation')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)


Mahalanobis


In [100]:
data_dist, data_link = calc_distances('mahalanobis')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)


Chebyshev

$$ d(u, v) = max_i |u_i - v_i|$$

In [101]:
data_dist, data_link = calc_distances('chebyshev')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)


Canberra

$$ d(u,v) = \frac{\sum_i |u_i - v_i|}{\sum_i |u_i| + |v_i|} $$


In [103]:
data_dist, data_link = calc_distances('canberra')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)


Braycurtis

$$ d(u,v) = \frac{\sum_i u_i - v_i}{\sum_i u_i + v_i} $$


In [104]:
data_dist, data_link = calc_distances('braycurtis')
draw_dendrogram(data_link)
draw_heatmap(data_link, data_dist)



In [ ]: