In [1]:
%matplotlib inline
import matplotlib
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import cophenet
from scipy.spatial.distance import pdist
In [2]:
np.random.seed(5) # random seed for consistency
N = 4
k = 4
cov_mat = np.eye(2)/10
X1 = np.vstack([np.random.multivariate_normal([-1, -1], cov_mat, N),
np.random.multivariate_normal([-1, +1], cov_mat, N),
np.random.multivariate_normal([+1, -1], cov_mat, N),
np.random.multivariate_normal([+1, +1], cov_mat, N)])
cols1 = [0] * N + [1] * N + [2] * N + [3] * N
In [3]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(X1[:,0], X1[:,1], s=10, c='grey', alpha=1, linewidth=0)
for i in xrange(X1.shape[0]):
ax.text(X1[i, 0], X1[i, 1], '%s' % i)
plt.show()
In [4]:
Z1_single = linkage(X1, method='single', metric='euclidean')
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
dendrogram(Z1_single, leaf_rotation=90, leaf_font_size=8, ax=ax)
ax.set_title('Single link, Euclidean distances')
plt.show()
In [5]:
Z1_average = linkage(X1, method='average', metric='euclidean')
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
dendrogram(Z1_average, leaf_rotation=90, leaf_font_size=8, ax=ax)
ax.set_title('Average link, Euclidean distances')
plt.show()
In [6]:
np.random.seed(1) # random seed for consistency
N = 200
theta = np.random.uniform(size=(2*N, 1)) * 2 * np.pi
r = np.vstack([np.random.uniform(low=0.8, high=1.2, size=(N, 1)),
np.random.uniform(low=1.8, high=2.2, size=(N, 1))])
x = np.multiply(r, np.cos(theta))
y = np.multiply(r, np.sin(theta))
X2 = np.hstack([x, y])
In [7]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(X2[:,0], X2[:,1], s=10, c='grey', alpha=1, linewidth=0)
#for i in xrange(X2.shape[0]):
# ax.text(X2[i, 0], X2[i, 1], '%s' % i)
plt.show()
In [8]:
Z2_single = linkage(X2, method='single', metric='euclidean')
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
dendrogram(Z2_single, leaf_rotation=90, leaf_font_size=8, ax=ax)
ax.set_title('Single link, Euclidean distances')
plt.show()
In [9]:
Z2_average = linkage(X2, method='average', metric='euclidean')
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
dendrogram(Z2_average, leaf_rotation=90, leaf_font_size=8, ax=ax)
ax.set_title('Average link, Euclidean distances')
plt.show()