In [1]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use("ggplot")

from sklearn.cluster import KMeans

In [2]:
x = [1, 5, 1.5, 8, 1, 9]
y = [2, 8, 1.8, 8, 0.6, 11]

plt.scatter(x,y)


Out[2]:
<matplotlib.collections.PathCollection at 0x11330a438>

In [20]:
X = np.array([pair for pair in zip(x, y)])
# same as: X = np.array([[1,2], [5,8], [1.5,1.8], [8,8], [1,0.6], [9,11]])

In [27]:
# specifying a flat clustering with 2 clusters
kmeans = KMeans(n_clusters=2)
# fit the parameter X
kmeans.fit(X);

Points are clustered by equal degrees of variance. Centroid is the center of the cluster.


In [28]:
# the μ's
centroids = kmeans.cluster_centers_
# these are the labels the KMeans Algo actually suplpies us
labels    = kmeans.labels_

In [29]:
print(centroids)
print(labels)


[[ 7.33333333  9.        ]
 [ 1.16666667  1.46666667]]
[1 0 1 0 1 0]

In [32]:
colors = ["g.","r."] # green/red dots

# visualize dat points according to cluster
for i in range(len(X)):
    print("coordinate:", X[i], "label:", labels[i])
    plt.plot(X[i][0], X[i][1], colors[labels[i]], markersize = 10)

# scatter plot the centroids
plt.scatter(centroids[:,0], centroids[:,1], marker = "x", s=150, linewidths=5, zorder=10)
plt.show()


coordinate: [ 1.  2.] label: 1
coordinate: [ 5.  8.] label: 0
coordinate: [ 1.5  1.8] label: 1
coordinate: [ 8.  8.] label: 0
coordinate: [ 1.   0.6] label: 1
coordinate: [  9.  11.] label: 0

Special syntax for indexing NumPy arrays: centroids[:, n] plots the nth colummn of all (:) rows of centroids. s is size. zorder is background/foreground display order. Higher: greater foreground priority.

Now, what if we force the algorithm to have 3 clusters?


In [34]:
n_clusters = 3
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(X)
centroids = kmeans.cluster_centers_
labels    = kmeans.labels_
colors = ["g.","r.", "c."]
for i in range(len(X)):
    print("coordinate:", X[i], "label:", labels[i])
    plt.plot(X[i][0], X[i][1], colors[labels[i]], markersize = 10)
plt.scatter(centroids[:,0], centroids[:,1], marker = "x", s=150, linewidths=5, zorder=1)
plt.show()


coordinate: [ 1.  2.] label: 1
coordinate: [ 5.  8.] label: 0
coordinate: [ 1.5  1.8] label: 1
coordinate: [ 8.  8.] label: 0
coordinate: [ 1.   0.6] label: 1
coordinate: [  9.  11.] label: 2

In [ ]:


In [ ]:


In [ ]:


In [ ]:


In [ ]: