In [16]:

    
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd



In [17]:

    
np.set_printoptions(precision=3, suppress=True)

Hierarchical Clustering

Importing a dataset



In [18]:

    
dataset = pd.read_csv('Mall_Customers.csv')



In [19]:

    
X = dataset.iloc[:, [3, 4]].values

Using dedrogram to find the optimal number of clusters



In [20]:

    
import scipy.cluster.hierarchy as sch



In [21]:

    
dendrogram = sch.dendrogram(sch.linkage(X, method='ward'))



In [22]:

    
plt.title('Dendragram')
plt.xlabel('Customers')
plt.ylabel('Euclidean distances')
plt.show()

Fitting hierarchical clustering to the mall dataset



In [23]:

    
from sklearn.cluster import AgglomerativeClustering



In [24]:

    
hc = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='ward')
y_hc = hc.fit_predict(X)

Visualising the clusters



In [25]:

    
plt.scatter(X[y_hc == 0, 0], X[y_hc == 0, 1], s=100, c='red', label='Careful')
plt.scatter(X[y_hc == 1, 0], X[y_hc == 1, 1], s=100, c='blue', label='Standard')
plt.scatter(X[y_hc == 2, 0], X[y_hc == 2, 1], s=100, c='green', label='Target')
plt.scatter(X[y_hc == 3, 0], X[y_hc == 3, 1], s=100, c='cyan', label='Careless')
plt.scatter(X[y_hc == 4, 0], X[y_hc == 4, 1], s=100, c='magenta', label='Sensible')
plt.title('Clusters of clients')
plt.xlabel('Annual income (K$)')
plt.ylabel('Spending score (1-100)')
plt.legend()
plt.show()