In [1]:
# Demonstrate the K-Means algorithm
In [1]:
import matplotlib.pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
In [3]:
n1 = 300
m1 = np.asarray([0, 0])
C1 = np.array([[8, 10], [10, 6]])
X1 = np.random.multivariate_normal(m1, C1, n1)
n2 = 500
m2 = np.asarray([12, 5])
C2 = np.array([[5., -5], [-5, 10]])
X2 = np.random.multivariate_normal(m2, C2, n2)
X = np.vstack((X1, X2))
y = np.ones(n1 + n2)
y[n1:] = -1
n_centers = 2
In [43]:
n_centers = 3
X, y = make_blobs(n_samples=1000, centers=n_centers, n_features=2,
cluster_std=0.7, random_state=0)
In [4]:
import kmeans
reload(kmeans)
y_pred, centers, obj_val_seq = kmeans.kmeans(X, n_centers)
print(centers)
In [5]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(obj_val_seq, 'b-', marker='*')
fig.suptitle("Change in K-means objective value")
ax.set_xlabel("Iteration")
ax.set_ylabel("Objective value")
Out[5]:
In [7]:
from itertools import cycle
colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
fig = plt.figure(figsize=plt.figaspect(0.5)) # Make twice as wide to accomodate both plots
# Plot data
ax = fig.add_subplot(121)
ax.set_title("Data with true labels and final centers")
for k, color in zip(range(n_centers), colors):
ax.plot(X[y==k, 0], X[y==k, 1], color + '.')
initial_centers = kmeans.init_centers(X, n_centers, 2) # This is valid because we always use the same random seed.
# Plot initial centers
for x in initial_centers:
ax.plot(x[0], x[1], "mo", markeredgecolor="k", markersize=8)
# Plot final centers
for x in centers:
ax.plot(x[0], x[1], "co", markeredgecolor="k", markersize=8)
# Plot assignments
ax = fig.add_subplot(122)
ax.set_title("Data with final assignments")
for k, color in zip(range(n_centers), colors):
ax.plot(X[y_pred==k, 0], X[y_pred==k, 1], color + '.')
fig.tight_layout()
#fig.gca()
fig.show()