In [1]:

    
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
from matplotlib import style
style.use("ggplot")

from sklearn.cluster import KMeans



In [2]:

    
x = [1, 5, 1.5, 8, 1, 9]
y = [2, 8, 1.8, 8, 0.6, 11]

plt.scatter(x,y)









    Out[2]:





<matplotlib.collections.PathCollection at 0x11330a438>



In [20]:

    
X = np.array([pair for pair in zip(x, y)])
# same as: X = np.array([[1,2], [5,8], [1.5,1.8], [8,8], [1,0.6], [9,11]])



In [27]:

    
# specifying a flat clustering with 2 clusters
kmeans = KMeans(n_clusters=2)
# fit the parameter X
kmeans.fit(X);

Points are clustered by equal degrees of variance. Centroid is the center of the cluster.



In [28]:

    
# the μ's
centroids = kmeans.cluster_centers_
# these are the labels the KMeans Algo actually suplpies us
labels    = kmeans.labels_



In [29]:

    
print(centroids)
print(labels)









    



[[ 7.33333333  9.        ]
 [ 1.16666667  1.46666667]]
[1 0 1 0 1 0]



In [32]:

    
colors = ["g.","r."] # green/red dots

# visualize dat points according to cluster
for i in range(len(X)):
    print("coordinate:", X[i], "label:", labels[i])
    plt.plot(X[i][0], X[i][1], colors[labels[i]], markersize = 10)

# scatter plot the centroids
plt.scatter(centroids[:,0], centroids[:,1], marker = "x", s=150, linewidths=5, zorder=10)
plt.show()









    



coordinate: [ 1.  2.] label: 1
coordinate: [ 5.  8.] label: 0
coordinate: [ 1.5  1.8] label: 1
coordinate: [ 8.  8.] label: 0
coordinate: [ 1.   0.6] label: 1
coordinate: [  9.  11.] label: 0

Special syntax for indexing NumPy arrays: centroids[:, n] plots the nth colummn of all (:) rows of centroids. s is size. zorder is background/foreground display order. Higher: greater foreground priority.

Now, what if we force the algorithm to have 3 clusters?



In [34]:

    
n_clusters = 3
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(X)
centroids = kmeans.cluster_centers_
labels    = kmeans.labels_
colors = ["g.","r.", "c."]
for i in range(len(X)):
    print("coordinate:", X[i], "label:", labels[i])
    plt.plot(X[i][0], X[i][1], colors[labels[i]], markersize = 10)
plt.scatter(centroids[:,0], centroids[:,1], marker = "x", s=150, linewidths=5, zorder=1)
plt.show()









    



coordinate: [ 1.  2.] label: 1
coordinate: [ 5.  8.] label: 0
coordinate: [ 1.5  1.8] label: 1
coordinate: [ 8.  8.] label: 0
coordinate: [ 1.   0.6] label: 1
coordinate: [  9.  11.] label: 2



In [ ]:



In [ ]:



In [ ]:



In [ ]:



In [ ]: