In [16]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [17]:
np.set_printoptions(precision=3, suppress=True)

Hierarchical Clustering

Importing a dataset


In [18]:
dataset = pd.read_csv('Mall_Customers.csv')

In [19]:
X = dataset.iloc[:, [3, 4]].values

Using dedrogram to find the optimal number of clusters


In [20]:
import scipy.cluster.hierarchy as sch

In [21]:
dendrogram = sch.dendrogram(sch.linkage(X, method='ward'))

In [22]:
plt.title('Dendragram')
plt.xlabel('Customers')
plt.ylabel('Euclidean distances')
plt.show()


Fitting hierarchical clustering to the mall dataset


In [23]:
from sklearn.cluster import AgglomerativeClustering

In [24]:
hc = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')
y_hc = hc.fit_predict(X)

Visualising the clusters


In [25]:
plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s=100, c='red', label='Careful')
plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s=100, c='blue', label='Standard')
plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s=100, c='green', label='Target')
plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s=100, c='cyan', label='Careless')
plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s=100, c='magenta', label='Sensible')
plt.title('Clusters of clients')
plt.xlabel('Annual income (K$)')
plt.ylabel('Spending score (1-100)')
plt.legend()
plt.show()