In [3]:
# Customer segmentation problem
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist

In [4]:
# Data source path
data_path = os.path.join("book_code", "Section 5", "customers.csv")

In [5]:
data = pd.read_csv(data_path)
print("---> Data loaded, shape {}".format(data.shape))


---> Data loaded, shape (440, 8)

In [6]:
# Display a description of the dataset
display(data.describe())


Channel Region Fresh Milk Grocery Frozen Detergents_Paper Delicassen
count 440.000000 440.000000 440.000000 440.000000 440.000000 440.000000 440.000000 440.000000
mean 1.322727 2.543182 12000.297727 5796.265909 7951.277273 3071.931818 2881.493182 1524.870455
std 0.468052 0.774272 12647.328865 7380.377175 9503.162829 4854.673333 4767.854448 2820.105937
min 1.000000 1.000000 3.000000 55.000000 3.000000 25.000000 3.000000 3.000000
25% 1.000000 2.000000 3127.750000 1533.000000 2153.000000 742.250000 256.750000 408.250000
50% 1.000000 3.000000 8504.000000 3627.000000 4755.500000 1526.000000 816.500000 965.500000
75% 2.000000 3.000000 16933.750000 7190.250000 10655.750000 3554.250000 3922.000000 1820.250000
max 2.000000 3.000000 112151.000000 73498.000000 92780.000000 60869.000000 40827.000000 47943.000000

In [7]:
print("---> Data sample\n{}".format(data[:10]))


---> Data sample
   Channel  Region  Fresh   Milk  Grocery  Frozen  Detergents_Paper  \
0        2       3  12669   9656     7561     214              2674   
1        2       3   7057   9810     9568    1762              3293   
2        2       3   6353   8808     7684    2405              3516   
3        1       3  13265   1196     4221    6404               507   
4        2       3  22615   5410     7198    3915              1777   
5        2       3   9413   8259     5126     666              1795   
6        2       3  12126   3199     6975     480              3140   
7        2       3   7579   4956     9426    1669              3321   
8        1       3   5963   3648     6192     425              1716   
9        2       3   6006  11093    18881    1159              7425   

   Delicassen  
0        1338  
1        1776  
2        7844  
3        1788  
4        5185  
5        1451  
6         545  
7        2566  
8         750  
9        2098  

In [8]:
# Scatter plot of all the features, this is still black magic for me...
pd.plotting.scatter_matrix(data, figsize = (16, 10))


Out[8]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x116f95d68>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1192eb4a8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1193147b8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11933fac8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119368da0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119368dd8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1193ab400>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1193d5a90>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x119405160>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11942b7f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119456e80>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119484550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1194acbe0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1194de2b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119506940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11952efd0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x11955e6a0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119586d30>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1195b7400>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1195dea90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119610160>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1196397f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119660e80>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119690550>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1196b7be0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1196ea2b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119710940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119739fd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11976a6a0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119791d30>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1197c2400>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1197e8a90>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x11981d160>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1198437f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11986be80>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11989c550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1198c1be0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1198f62b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11991d940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119946fd0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x1199756a0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x11999dd30>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1199ce400>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1199f5a90>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119a26160>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119a4f7f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119a77e80>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119aa7550>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x119ad0be0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119aff2b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119b2a940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119b53fd0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119b816a0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119ba9d30>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119bdb400>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119c00a90>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x119c33160>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119c5a7f0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119c82e80>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119cb2550>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119cd9be0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119d0e2b0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119d33940>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x119d5efd0>]],
      dtype=object)

In [ ]: