In [1]:

    
%matplotlib inline
import matplotlib
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans

Disjoint clusters

generate random data



In [2]:

    
np.random.seed(1) # random seed for consistency
N = 100
k = 4

cov_mat = np.eye(2)/10

all_samples1 = np.vstack([np.random.multivariate_normal([-1, -1], cov_mat, N),
                          np.random.multivariate_normal([-1, +1], cov_mat, N),
                          np.random.multivariate_normal([+1, -1], cov_mat, N),
                          np.random.multivariate_normal([+1, +1], cov_mat, N)])
cols1 = [0] * N + [1] * N + [2] * N + [3] * N

plot data with original cluster colors



In [3]:

    
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples1[:,0], all_samples1[:,1], s=40, c=cols1, alpha=1, linewidth=0)
plt.show()

Performing K-means with K=4



In [4]:

    
kmeans_model1 = KMeans(n_clusters=4, random_state=1).fit(all_samples1)
labels1 = kmeans_model1.labels_

Scatter plot of the clustering results



In [5]:

    
labels1 = kmeans_model1.labels_
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples1[:,0], all_samples1[:,1], s=40, c=labels1, alpha=1, linewidth=0)
plt.show()

Example with two concentric rings



In [6]:

    
np.random.seed(1) # random seed for consistency
N = 200

theta = np.random.uniform(size=(2*N, 1)) * 2 * np.pi
r     = np.vstack([np.random.uniform(low=0.8, high=1.2, size=(N, 1)),
                   np.random.uniform(low=1.8, high=2.2, size=(N, 1))])

x = np.multiply(r, np.cos(theta))
y = np.multiply(r, np.sin(theta))

all_samples2 = np.hstack([x, y])
cols2 = np.array([(1.0, 0.2, 0.2)]*N + [(0.5, 0.5, 1)]*N)



In [7]:

    
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples2[:,0], all_samples2[:,1], s=40, c=cols2, alpha=1, linewidth=0)
plt.show()

performing K-means with K=2



In [8]:

    
kmeans_model2 = KMeans(n_clusters=2, random_state=1).fit(all_samples2)
labels2 = kmeans_model2.labels_

Scatter plot of the clustering results



In [9]:

    
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples2[:,0], all_samples2[:,1], s=40, c=labels2, alpha=1, linewidth=0)
plt.show()