In [3]:
# Customer segmentation problem
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from scipy.spatial.distance import cdist
In [4]:
# Data source path
data_path = os.path.join("book_code", "Section 5", "customers.csv")
In [5]:
data = pd.read_csv(data_path)
print("---> Data loaded, shape {}".format(data.shape))
---> Data loaded, shape (440, 8)
In [6]:
# Display a description of the dataset
display(data.describe())
Channel
Region
Fresh
Milk
Grocery
Frozen
Detergents_Paper
Delicassen
count
440.000000
440.000000
440.000000
440.000000
440.000000
440.000000
440.000000
440.000000
mean
1.322727
2.543182
12000.297727
5796.265909
7951.277273
3071.931818
2881.493182
1524.870455
std
0.468052
0.774272
12647.328865
7380.377175
9503.162829
4854.673333
4767.854448
2820.105937
min
1.000000
1.000000
3.000000
55.000000
3.000000
25.000000
3.000000
3.000000
25%
1.000000
2.000000
3127.750000
1533.000000
2153.000000
742.250000
256.750000
408.250000
50%
1.000000
3.000000
8504.000000
3627.000000
4755.500000
1526.000000
816.500000
965.500000
75%
2.000000
3.000000
16933.750000
7190.250000
10655.750000
3554.250000
3922.000000
1820.250000
max
2.000000
3.000000
112151.000000
73498.000000
92780.000000
60869.000000
40827.000000
47943.000000
In [7]:
print("---> Data sample\n{}".format(data[:10]))
---> Data sample
Channel Region Fresh Milk Grocery Frozen Detergents_Paper \
0 2 3 12669 9656 7561 214 2674
1 2 3 7057 9810 9568 1762 3293
2 2 3 6353 8808 7684 2405 3516
3 1 3 13265 1196 4221 6404 507
4 2 3 22615 5410 7198 3915 1777
5 2 3 9413 8259 5126 666 1795
6 2 3 12126 3199 6975 480 3140
7 2 3 7579 4956 9426 1669 3321
8 1 3 5963 3648 6192 425 1716
9 2 3 6006 11093 18881 1159 7425
Delicassen
0 1338
1 1776
2 7844
3 1788
4 5185
5 1451
6 545
7 2566
8 750
9 2098
In [8]:
# Scatter plot of all the features, this is still black magic for me...
pd.plotting.scatter_matrix(data, figsize = (16, 10))
Out[8]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x116f95d68>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1192eb4a8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1193147b8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11933fac8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119368da0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119368dd8>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1193ab400>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1193d5a90>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x119405160>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11942b7f0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119456e80>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119484550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1194acbe0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1194de2b0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119506940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11952efd0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x11955e6a0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119586d30>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1195b7400>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1195dea90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119610160>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1196397f0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119660e80>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119690550>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x1196b7be0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1196ea2b0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119710940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119739fd0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11976a6a0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119791d30>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1197c2400>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1197e8a90>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x11981d160>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1198437f0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11986be80>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11989c550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1198c1be0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1198f62b0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11991d940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119946fd0>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x1199756a0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x11999dd30>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1199ce400>,
<matplotlib.axes._subplots.AxesSubplot object at 0x1199f5a90>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119a26160>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119a4f7f0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119a77e80>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119aa7550>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x119ad0be0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119aff2b0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119b2a940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119b53fd0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119b816a0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119ba9d30>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119bdb400>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119c00a90>],
[<matplotlib.axes._subplots.AxesSubplot object at 0x119c33160>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119c5a7f0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119c82e80>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119cb2550>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119cd9be0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119d0e2b0>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119d33940>,
<matplotlib.axes._subplots.AxesSubplot object at 0x119d5efd0>]],
dtype=object)
In [ ]:
Content source: mbdebian/ml-playground
Similar notebooks: