In [22]:

    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd



In [23]:

    
np.set_printoptions(precision=3, suppress=True)

K-Means Clustering

Importing a dataset



In [24]:

    
dataset = pd.read_csv('Mall_Customers.csv')



In [25]:

    
X = dataset.iloc[:, [3, 4]].values

Using the elbow method to find an optimal number of clusters



In [26]:

    
from sklearn.cluster import KMeans



In [27]:

    
kmeans_params = dict(init='k-means++', max_iter=300, n_init=10, random_state=0)
wcss = [KMeans(n_clusters=i, **kmeans_params).fit(X).inertia_ for i in range(1, 11)]



In [28]:

    
plt.plot(range(1, 11), wcss)
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()

Applying K-Means to the dataset



In [29]:

    
kmeans = KMeans(n_clusters=5, **kmeans_params)



In [40]:

    
y_kmeans = kmeans.fit_predict(X)

Visualising the clusters



In [52]:

    
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s=100, c='red', label='Careful')
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s=100, c='blue', label='Standard')
plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s=100, c='green', label='Target')
plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s=100, c='cyan', label='Careless')
plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s=100, c='magenta', label='Sensible')
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='yellow', label='Centroids')
plt.title('Clusters of clients')
plt.xlabel('Annual income (K$)')
plt.ylabel('Spending score (1-100)')
plt.legend()
plt.show()