In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import scipy.spatial.distance as spd
plt.rcParams['figure.figsize'] = (15.0, 12.0)
In [2]:
def plot_clusters(x, c, sizes=100):
plt.scatter(x[:, 0], x[:, 1], s=sizes, alpha=0.7)
if c is not None:
plt.plot(c[:, 0], c[:, 1], 'rx', markersize=30, markeredgewidth=5)
plt.show()
In [3]:
POINTS_PER_CLUSTER = 70
OFFSET = 4
x1 = np.random.randn(POINTS_PER_CLUSTER, 2)
x2 = (OFFSET, OFFSET) + np.random.randn(POINTS_PER_CLUSTER, 2)
x3 = (OFFSET, -OFFSET) + np.random.randn(POINTS_PER_CLUSTER, 2)
x = np.vstack((x1, x2, x3))
plot_clusters(x, None)
In [4]:
# Choose first center uniformly at random
c = np.array(x[np.random.choice(x.shape[0]), :], ndmin=2)
plot_clusters(x, c)
# Choose next centers weighted by squared distance
for i in range(2):
ds = spd.cdist(c, x, 'sqeuclidean')
mindist = np.min(ds, axis=0).flatten()
plot_clusters(x, c, sizes=3*mindist)
idx = np.random.choice(x.shape[0], p=mindist / np.sum(mindist))
c = np.vstack((c, x[idx, :]))
plot_clusters(x, c, sizes=3 * mindist)
plot_clusters(x, c)