In [1]:
%matplotlib inline
import matplotlib
import numpy as np
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
In [2]:
np.random.seed(1) # random seed for consistency
N = 100
k = 4
cov_mat = np.eye(2)/10
all_samples1 = np.vstack([np.random.multivariate_normal([-1, -1], cov_mat, N),
np.random.multivariate_normal([-1, +1], cov_mat, N),
np.random.multivariate_normal([+1, -1], cov_mat, N),
np.random.multivariate_normal([+1, +1], cov_mat, N)])
cols1 = [0] * N + [1] * N + [2] * N + [3] * N
In [3]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples1[:,0], all_samples1[:,1], s=40, c=cols1, alpha=1, linewidth=0)
plt.show()
In [4]:
kmeans_model1 = KMeans(n_clusters=4, random_state=1).fit(all_samples1)
labels1 = kmeans_model1.labels_
In [5]:
labels1 = kmeans_model1.labels_
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples1[:,0], all_samples1[:,1], s=40, c=labels1, alpha=1, linewidth=0)
plt.show()
In [6]:
np.random.seed(1) # random seed for consistency
N = 200
theta = np.random.uniform(size=(2*N, 1)) * 2 * np.pi
r = np.vstack([np.random.uniform(low=0.8, high=1.2, size=(N, 1)),
np.random.uniform(low=1.8, high=2.2, size=(N, 1))])
x = np.multiply(r, np.cos(theta))
y = np.multiply(r, np.sin(theta))
all_samples2 = np.hstack([x, y])
cols2 = np.array([(1.0, 0.2, 0.2)]*N + [(0.5, 0.5, 1)]*N)
In [7]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples2[:,0], all_samples2[:,1], s=40, c=cols2, alpha=1, linewidth=0)
plt.show()
In [8]:
kmeans_model2 = KMeans(n_clusters=2, random_state=1).fit(all_samples2)
labels2 = kmeans_model2.labels_
In [9]:
fig = plt.figure(figsize=(5,5))
ax = fig.add_subplot(1,1,1)
ax.scatter(all_samples2[:,0], all_samples2[:,1], s=40, c=labels2, alpha=1, linewidth=0)
plt.show()