In [2]:
    
%matplotlib inline
import numpy as np
import pandas as pd
import scipy
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.mlab import PCA as mlabPCA
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA 
from sklearn import preprocessing
from scipy.spatial.distance import cdist
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.cluster import AffinityPropagation
from sklearn.cluster import KMeans
from sklearn.preprocessing import normalize
from sklearn.decomposition import PCA
from sklearn.cluster import MiniBatchKMeans
from sklearn.cluster import SpectralClustering
from sklearn.metrics import silhouette_samples, silhouette_score
    
In [3]:
    
# Read and import data
boston_marathon_results = pd.read_csv('results 2013.csv')
boston_marathon_results.head()
    
    Out[3]:
  
    
       
      25k 
      age 
      name 
      division 
      10k 
      gender 
      half 
      official 
      bib 
      ctz 
      ... 
      overall 
      pace 
      state 
      30k 
      5k 
      genderdiv 
      20k 
      35k 
      city 
      40k 
     
  
  
    
      0 
      49.87 
      28 
      Cassidy, Josh R. 
      9 
      18.18 
      M 
      40.93 
      90.90 
      W1 
      NaN 
      ... 
      9 
      3.47 
      ON 
      62.07 
      8.90 
      9 
      38.80 
      74.73 
      Toronto 
      85.55 
     
    
      1 
      77.27 
      30 
      Korir, Wesley 
      5 
      30.90 
      M 
      64.90 
      132.50 
      1 
      NaN 
      ... 
      5 
      5.07 
      NaN 
      92.97 
      15.90 
      5 
      61.52 
      108.78 
      Kenya 
      124.77 
     
    
      2 
      77.23 
      23 
      Desisa, Lelisa 
      1 
      30.90 
      M 
      64.92 
      130.37 
      2 
      NaN 
      ... 
      1 
      4.98 
      NaN 
      92.72 
      15.93 
      1 
      61.53 
      108.68 
      Ambo 
      123.78 
     
    
      3 
      50.50 
      32 
      Fearnley, Kurt H. 
      5 
      18.73 
      M 
      42.00 
      88.43 
      W2 
      NaN 
      ... 
      5 
      3.38 
      NaN 
      61.35 
      8.98 
      5 
      39.88 
      73.00 
      Hamilton 
      83.43 
     
    
      4 
      48.75 
      39 
      Hokinoue, Kota 
      3 
      18.18 
      M 
      40.57 
      87.22 
      W3 
      NaN 
      ... 
      3 
      3.33 
      NaN 
      59.92 
      8.92 
      3 
      38.55 
      71.68 
      Iizuka 
      81.88 
     
  
5 rows × 21 columns
In [4]:
    
boston_marathon_results.columns
    
    Out[4]:
Index(['25k', 'age', 'name', 'division', '10k', 'gender', 'half', 'official',
       'bib', 'ctz', 'country', 'overall', 'pace', 'state', '30k', '5k',
       'genderdiv', '20k', '35k', 'city', '40k'],
      dtype='object')
In [5]:
    
boston_marathon_scores = boston_marathon_results.drop(['city','country', 'genderdiv', 'bib', 'ctz', 'state', 'name', 'division'], axis = 1)
    
In [6]:
    
boston_marathon_scores.replace('-', 0, inplace=True)
boston_marathon_scores['gender'] = boston_marathon_scores.loc[:, 'gender'].map({'F': 0,'M': 1})
print(boston_marathon_scores.columns.unique())
boston_marathon_scores.head()
    
    
Index(['25k', 'age', '10k', 'gender', 'half', 'official', 'overall', 'pace',
       '30k', '5k', '20k', '35k', '40k'],
      dtype='object')
    Out[6]:
  
    
       
      25k 
      age 
      10k 
      gender 
      half 
      official 
      overall 
      pace 
      30k 
      5k 
      20k 
      35k 
      40k 
     
  
  
    
      0 
      49.87 
      28 
      18.18 
      1 
      40.93 
      90.90 
      9 
      3.47 
      62.07 
      8.90 
      38.80 
      74.73 
      85.55 
     
    
      1 
      77.27 
      30 
      30.90 
      1 
      64.90 
      132.50 
      5 
      5.07 
      92.97 
      15.90 
      61.52 
      108.78 
      124.77 
     
    
      2 
      77.23 
      23 
      30.90 
      1 
      64.92 
      130.37 
      1 
      4.98 
      92.72 
      15.93 
      61.53 
      108.68 
      123.78 
     
    
      3 
      50.50 
      32 
      18.73 
      1 
      42.00 
      88.43 
      5 
      3.38 
      61.35 
      8.98 
      39.88 
      73.00 
      83.43 
     
    
      4 
      48.75 
      39 
      18.18 
      1 
      40.57 
      87.22 
      3 
      3.33 
      59.92 
      8.92 
      38.55 
      71.68 
      81.88 
     
  
In [7]:
    
boston_marathon_scores = boston_marathon_scores.astype(float)
boston_marathon_scores.info()
    
    
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16164 entries, 0 to 16163
Data columns (total 13 columns):
25k         16164 non-null float64
age         16164 non-null float64
10k         16164 non-null float64
gender      16164 non-null float64
half        16164 non-null float64
official    16164 non-null float64
overall     16164 non-null float64
pace        16164 non-null float64
30k         16164 non-null float64
5k          16164 non-null float64
20k         16164 non-null float64
35k         16164 non-null float64
40k         16164 non-null float64
dtypes: float64(13)
memory usage: 1.6 MB
In [8]:
    
#Make a copy of DF
X_tr = boston_marathon_scores
#Standardize
clmns = ['age', 'official','40k', '35k', '30k', '25k', 'half', '20k', '10k', '5k', 'pace']
X_tr_std = normalize(X_tr[clmns])
    
Compare Spectral Clustering against kMeans using Similarity
As there is no ground truth, the criteria used to evaluate clusters produced using Spectral and kmeans is the silhouette coefficient. From the results obtained, it can be appreaciated that Spectral Clustering requires 6 clusters to have the silhouette score similar to the one obtained with 3 clusters with kmeans.
In [19]:
    
#Compare from a silhouette_score perspective kmeans against Spectral Clustering
range_n_clusters = np.arange(10)+2
for n_clusters in range_n_clusters:
# The silhouette_score gives the average value for all the samples.
    # This gives a perspective into the density and separation of the formed
    # clusters
# Initialize the clusterer with n_clusters value and a random generator
    # seed of 10 for reproducibility.
    spec_clust = SpectralClustering(n_clusters=n_clusters)
    cluster_labels1 = spec_clust.fit_predict(X_tr_std)    
    silhouette_avg1 = silhouette_score(X_tr_std, cluster_labels1)
    
    kmeans = KMeans(n_clusters=n_clusters, init='k-means++', n_init=10).fit(X_tr_std)
    cluster_labels2 = kmeans.fit_predict(X_tr_std)    
    silhouette_avg2 = silhouette_score(X_tr_std, cluster_labels2)
    
    print("For n_clusters =", n_clusters,
          "av. sil_score for Spec. clust is :", silhouette_avg1,
         "av. sil_score for kmeans is :",silhouette_avg2 )
    
    
For n_clusters = 2 av. sil_score for Spec. clust is : 0.961171331526 av. sil_score for kmeans is : 0.960595115442
For n_clusters = 3 av. sil_score for Spec. clust is : 0.387076810597 av. sil_score for kmeans is : 0.388209250616
For n_clusters = 4 av. sil_score for Spec. clust is : 0.388415111093 av. sil_score for kmeans is : 0.396003681008
For n_clusters = 5 av. sil_score for Spec. clust is : 0.389380079665 av. sil_score for kmeans is : 0.397697024637
For n_clusters = 6 av. sil_score for Spec. clust is : 0.390484776573 av. sil_score for kmeans is : 0.329749119438
For n_clusters = 7 av. sil_score for Spec. clust is : 0.385483467461 av. sil_score for kmeans is : 0.332703384807
For n_clusters = 8 av. sil_score for Spec. clust is : 0.384559772179 av. sil_score for kmeans is : 0.335751771772
For n_clusters = 9 av. sil_score for Spec. clust is : 0.386544891208 av. sil_score for kmeans is : 0.336687926662
For n_clusters = 10 av. sil_score for Spec. clust is : 0.379711180133 av. sil_score for kmeans is : 0.292708178952
For n_clusters = 11 av. sil_score for Spec. clust is : 0.380947772569 av. sil_score for kmeans is : 0.293759032687
the optimal number of kmeans will be determined using the elbow method. Once the kmeans number of clusters is set, the number of clusters using spectral clustering will be used so that it equals the silhouette score obtained in the first case.
K-Means
In [9]:
    
#Use the elbow method to determine the number of clusters
# k-means determine k
distortions = []
K = range(1,10)
for k in K:
    kmeanModel = KMeans(n_clusters=k).fit(X_tr)
    kmeanModel.fit(X_tr)
    distortions.append(sum(np.min(cdist(X_tr, kmeanModel.cluster_centers_, 'euclidean'), axis=1)) / X_tr.shape[0])
 
# Plot the elbow
plt.plot(K, distortions, 'bx-')
plt.xlabel('k')
plt.ylabel('Distortion')
plt.title('The Elbow Method showing the optimal k')
plt.show()
    
    
 
The elbow method shows that the optimal number of clusters to be used in the kmeans method is 3, considering the euclidean distance between cluster centers. From an analytical perspective, the inertia functions shows the same results: 3 clusters were the difference between the results obtained by the inertia function are smaller when shifting from 3 to 4 clusters.
In [10]:
    
#Evaluate the best number of clusters
for i in range(1,10):
 km = KMeans(n_clusters=i, init='k-means++', n_init=10).fit(X_tr_std)
 print (i, km.inertia_)
    
    
1 30.1263346974
2 19.0457804842
3 13.188426726
4 11.2720106651
5 9.93710134099
6 8.54109431453
7 7.47659624147
8 6.8543306694
9 6.17804693014
In [59]:
    
#Cluster the data
kmeans = KMeans(n_clusters=3, init='k-means++', n_init=10).fit(X_tr_std)
labels = kmeans.labels_
#Glue back to original data
X_tr['clusters'] = labels
X_tr['Gender'] = boston_marathon_scores.gender
X_tr['Overall'] = boston_marathon_scores.overall
#Add the column into our list
clmns.extend(['clusters','Gender','Overall'])
#Lets analyze the clusters
pd.DataFrame(X_tr.groupby(['clusters']).mean())
    
    Out[59]:
  
    
       
      25k 
      age 
      10k 
      gender 
      half 
      official 
      overall 
      pace 
      30k 
      5k 
      20k 
      35k 
      40k 
      Gender 
      Overall 
     
    
      clusters 
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
     
  
  
    
      0 
      118.229587 
      33.293590 
      46.701476 
      0.386324 
      99.217164 
      208.662765 
      8523.313127 
      7.966590 
      143.787377 
      23.352465 
      94.031163 
      170.262460 
      197.110309 
      0.386324 
      8523.313127 
     
    
      1 
      0.000000 
      41.857143 
      14.568571 
      0.500000 
      14.730000 
      220.382143 
      11327.642857 
      8.412857 
      10.755714 
      9.150714 
      13.962143 
      0.000000 
      14.455000 
      0.500000 
      11327.642857 
     
    
      2 
      118.059351 
      48.824824 
      46.667129 
      0.782068 
      99.197408 
      207.706291 
      8343.792325 
      7.930118 
      143.326589 
      23.332466 
      94.015637 
      169.582073 
      195.992275 
      0.782068 
      8343.792325 
     
  
In [60]:
    
clusters_summary = X_tr.groupby(['clusters']).describe()
clusters_summary_transposed = clusters_summary.transpose()
clusters_summary_transposed
    
    Out[60]:
  
    
       
      clusters 
      0 
      1 
      2 
     
  
  
    
      10k 
      count 
      7473.000000 
      14.000000 
      8677.000000 
     
    
      mean 
      46.701476 
      14.568571 
      46.667129 
     
    
      std 
      5.086413 
      23.947241 
      5.160402 
     
    
      min 
      0.000000 
      0.000000 
      0.000000 
     
    
      25% 
      42.880000 
      0.000000 
      43.470000 
     
    
      50% 
      47.420000 
      0.000000 
      46.950000 
     
    
      75% 
      50.400000 
      36.090000 
      50.120000 
     
    
      max 
      66.680000 
      53.570000 
      61.580000 
     
    
      20k 
      count 
      7473.000000 
      14.000000 
      8677.000000 
     
    
      mean 
      94.031163 
      13.962143 
      94.015637 
     
    
      std 
      10.168686 
      35.492962 
      10.030560 
     
    
      min 
      0.000000 
      0.000000 
      0.000000 
     
    
      25% 
      85.950000 
      0.000000 
      87.320000 
     
    
      50% 
      95.430000 
      0.000000 
      94.450000 
     
    
      75% 
      101.480000 
      0.000000 
      101.000000 
     
    
      max 
      131.720000 
      98.650000 
      123.180000 
     
    
      25k 
      count 
      7473.000000 
      14.000000 
      8677.000000 
     
    
      mean 
      118.229587 
      0.000000 
      118.059351 
     
    
      std 
      13.036739 
      0.000000 
      12.914815 
     
    
      min 
      0.000000 
      0.000000 
      0.000000 
     
    
      25% 
      108.130000 
      0.000000 
      109.600000 
     
    
      50% 
      120.100000 
      0.000000 
      118.570000 
     
    
      75% 
      127.580000 
      0.000000 
      127.050000 
     
    
      max 
      163.620000 
      0.000000 
      158.550000 
     
    
      30k 
      count 
      7473.000000 
      14.000000 
      8677.000000 
     
    
      mean 
      143.787377 
      10.755714 
      143.326589 
     
    
      std 
      15.833160 
      40.244198 
      15.838780 
     
    
      min 
      68.220000 
      0.000000 
      0.000000 
     
    
      25% 
      131.230000 
      0.000000 
      132.920000 
     
    
      50% 
      145.900000 
      0.000000 
      143.820000 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      half 
      std 
      10.737139 
      37.444634 
      10.539430 
     
    
      min 
      0.000000 
      0.000000 
      0.000000 
     
    
      25% 
      90.680000 
      0.000000 
      92.130000 
     
    
      50% 
      100.750000 
      0.000000 
      99.650000 
     
    
      75% 
      107.100000 
      0.000000 
      106.580000 
     
    
      max 
      138.670000 
      104.000000 
      129.880000 
     
    
      official 
      count 
      7473.000000 
      14.000000 
      8677.000000 
     
    
      mean 
      208.662765 
      220.382143 
      207.706291 
     
    
      std 
      23.933593 
      18.144768 
      23.574875 
     
    
      min 
      97.580000 
      173.580000 
      85.530000 
     
    
      25% 
      191.530000 
      214.842500 
      191.780000 
     
    
      50% 
      210.420000 
      224.450000 
      208.170000 
     
    
      75% 
      225.220000 
      230.722500 
      225.200000 
     
    
      max 
      284.230000 
      245.450000 
      281.600000 
     
    
      overall 
      count 
      7473.000000 
      14.000000 
      8677.000000 
     
    
      mean 
      8523.313127 
      11327.642857 
      8343.792325 
     
    
      std 
      5116.560194 
      4217.833651 
      4994.673736 
     
    
      min 
      1.000000 
      1114.000000 
      1.000000 
     
    
      25% 
      4030.000000 
      9781.250000 
      4071.000000 
     
    
      50% 
      8581.000000 
      12472.500000 
      7924.000000 
     
    
      75% 
      12661.000000 
      14006.250000 
      12655.000000 
     
    
      max 
      17598.000000 
      16825.000000 
      17596.000000 
     
    
      pace 
      count 
      7473.000000 
      14.000000 
      8677.000000 
     
    
      mean 
      7.966590 
      8.412857 
      7.930118 
     
    
      std 
      0.912951 
      0.690935 
      0.899336 
     
    
      min 
      3.730000 
      6.630000 
      3.270000 
     
    
      25% 
      7.320000 
      8.197500 
      7.320000 
     
    
      50% 
      8.030000 
      8.565000 
      7.950000 
     
    
      75% 
      8.600000 
      8.802500 
      8.600000 
     
    
      max 
      10.850000 
      9.370000 
      10.750000 
     
  
120 rows × 3 columns
In [61]:
    
# Reduce it to two components.
X_pca = PCA(2).fit_transform(X_tr_std)
# Calculate predicted values.
y_pred = KMeans(n_clusters=3, random_state=42).fit_predict(X_pca)
# Plot the solution.
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_pred)
plt.show()
    
    
 
In [62]:
    
Graph_kmeans_official = pd.pivot_table(X_tr, 'official', ['clusters', 'gender'])
Graph_kmeans_pace = pd.pivot_table(X_tr, 'pace', ['clusters', 'gender'])
Graph_kmeans_age = pd.pivot_table(X_tr, 'age', ['clusters', 'gender'])
print(Graph_kmeans_official, Graph_kmeans_pace, Graph_kmeans_age)
    
    
                   official
clusters gender            
0        0.0     218.637580
         1.0     192.817769
1        0.0     223.532857
         1.0     217.231429
2        0.0     222.032644
         1.0     203.714082                      pace
clusters gender          
0        0.0     8.347072
         1.0     7.362196
1        0.0     8.534286
         1.0     8.291429
2        0.0     8.476526
         1.0     7.777854                        age
clusters gender           
0        0.0     34.193197
         1.0     31.864565
1        0.0     40.142857
         1.0     43.571429
2        0.0     48.960338
         1.0     48.787062
Spectral Clustering
In [15]:
    
# We know we're looking for 6 clusters from the comparison with the kmeans.
n_clusters=6
# Declare and fit the model.
sc = SpectralClustering(n_clusters=n_clusters).fit(X_tr_std)
# Extract cluster assignments for each data point.
labels = sc.labels_
#Glue back to original data
X_tr['clusters'] = labels
X_tr['Gender'] = boston_marathon_scores.gender
X_tr['Overall'] = boston_marathon_scores.overall
#Add the column into our list
clmns.extend(['clusters','Gender','Overall'])
#Lets analyze the clusters
pd.DataFrame(X_tr.groupby(['clusters']).mean())
    
    Out[15]:
  
    
       
      25k 
      age 
      10k 
      gender 
      half 
      official 
      overall 
      pace 
      30k 
      5k 
      20k 
      35k 
      40k 
      Gender 
      Overall 
     
    
      clusters 
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
     
  
  
    
      0 
      117.848578 
      49.104459 
      46.705808 
      0.777035 
      99.082917 
      205.460250 
      7864.603982 
      7.844473 
      142.773846 
      23.347735 
      93.929491 
      168.457354 
      194.105159 
      0.777035 
      7864.603982 
     
    
      1 
      118.205056 
      33.151903 
      46.951070 
      0.348409 
      99.416279 
      205.273537 
      7829.390674 
      7.837257 
      143.102483 
      23.508974 
      94.253910 
      168.663509 
      194.152764 
      0.348409 
      7829.390674 
     
    
      2 
      0.000000 
      40.416667 
      12.936667 
      0.416667 
      8.518333 
      220.857500 
      11460.166667 
      8.430833 
      0.000000 
      8.661667 
      8.068333 
      0.000000 
      0.000000 
      0.416667 
      11460.166667 
     
    
      3 
      31.868571 
      39.928571 
      42.200000 
      0.785714 
      89.550714 
      204.827143 
      7637.428571 
      7.820000 
      141.996429 
      22.919286 
      84.792143 
      155.258571 
      126.517143 
      0.785714 
      7637.428571 
     
    
      4 
      130.341429 
      45.428571 
      50.031429 
      0.571429 
      108.240000 
      236.570000 
      14046.285714 
      9.030000 
      161.281429 
      21.010000 
      102.022857 
      0.000000 
      223.585714 
      0.571429 
      14046.285714 
     
    
      5 
      119.219489 
      41.503353 
      45.890427 
      0.724225 
      99.013487 
      224.082938 
      11750.388935 
      8.554849 
      146.968906 
      22.873650 
      93.681865 
      178.077615 
      210.484363 
      0.724225 
      11750.388935 
     
  
In [16]:
    
clusters_summary = X_tr.groupby(['clusters']).describe()
clusters_summary_transposed = clusters_summary.transpose()
clusters_summary_transposed
    
    Out[16]:
  
    
       
      clusters 
      0 
      1 
      2 
      3 
      4 
      5 
     
  
  
    
      10k 
      count 
      7333.000000 
      6412.000000 
      12.000000 
      14.000000 
      7.000000 
      2386.000000 
     
    
      mean 
      46.705808 
      46.951070 
      12.936667 
      42.200000 
      50.031429 
      45.890427 
     
    
      std 
      5.067477 
      4.974319 
      23.441598 
      12.654792 
      2.584411 
      5.614510 
     
    
      min 
      0.000000 
      21.400000 
      0.000000 
      0.000000 
      46.270000 
      0.000000 
     
    
      25% 
      43.420000 
      42.977500 
      0.000000 
      43.435000 
      48.075000 
      43.020000 
     
    
      50% 
      46.980000 
      47.750000 
      0.000000 
      45.020000 
      51.180000 
      46.260000 
     
    
      75% 
      50.180000 
      50.600000 
      12.030000 
      47.632500 
      51.685000 
      49.430000 
     
    
      max 
      61.580000 
      66.680000 
      53.570000 
      51.800000 
      53.250000 
      57.880000 
     
    
      20k 
      count 
      7333.000000 
      6412.000000 
      12.000000 
      14.000000 
      7.000000 
      2386.000000 
     
    
      mean 
      93.929491 
      94.253910 
      8.068333 
      84.792143 
      102.022857 
      93.681865 
     
    
      std 
      10.114733 
      10.079694 
      27.949527 
      25.409862 
      5.972176 
      10.072741 
     
    
      min 
      38.230000 
      44.380000 
      0.000000 
      0.000000 
      92.820000 
      0.000000 
     
    
      25% 
      87.120000 
      85.950000 
      0.000000 
      87.247500 
      98.875000 
      87.412500 
     
    
      50% 
      94.370000 
      95.890000 
      0.000000 
      90.400000 
      102.270000 
      94.230000 
     
    
      75% 
      101.030000 
      101.600000 
      0.000000 
      95.192500 
      105.610000 
      100.845000 
     
    
      max 
      123.180000 
      131.720000 
      96.820000 
      103.920000 
      110.100000 
      121.070000 
     
    
      25k 
      count 
      7333.000000 
      6412.000000 
      12.000000 
      14.000000 
      7.000000 
      2386.000000 
     
    
      mean 
      117.848578 
      118.205056 
      0.000000 
      31.868571 
      130.341429 
      119.219489 
     
    
      std 
      12.791647 
      12.783019 
      0.000000 
      52.296443 
      9.249017 
      12.165837 
     
    
      min 
      48.320000 
      56.050000 
      0.000000 
      0.000000 
      115.920000 
      0.000000 
     
    
      25% 
      109.220000 
      107.715000 
      0.000000 
      0.000000 
      126.725000 
      110.872500 
     
    
      50% 
      118.320000 
      120.260000 
      0.000000 
      0.000000 
      129.100000 
      119.680000 
     
    
      75% 
      126.850000 
      127.470000 
      0.000000 
      82.575000 
      134.945000 
      128.300000 
     
    
      max 
      153.480000 
      163.620000 
      0.000000 
      113.350000 
      144.030000 
      158.550000 
     
    
      30k 
      count 
      7333.000000 
      6412.000000 
      12.000000 
      14.000000 
      7.000000 
      2386.000000 
     
    
      mean 
      142.773846 
      143.102483 
      0.000000 
      141.996429 
      161.281429 
      146.968906 
     
    
      std 
      15.661068 
      15.694454 
      0.000000 
      13.742250 
      13.225840 
      16.564537 
     
    
      min 
      59.450000 
      68.220000 
      0.000000 
      113.600000 
      139.300000 
      0.000000 
     
    
      25% 
      132.220000 
      130.420000 
      0.000000 
      134.082500 
      156.000000 
      136.330000 
     
    
      50% 
      143.230000 
      145.520000 
      0.000000 
      139.625000 
      161.870000 
      147.915000 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      half 
      std 
      10.678470 
      10.639831 
      29.508372 
      26.845829 
      7.034143 
      10.464197 
     
    
      min 
      40.270000 
      46.720000 
      0.000000 
      0.000000 
      97.920000 
      0.000000 
     
    
      25% 
      91.900000 
      90.645000 
      0.000000 
      92.120000 
      104.490000 
      92.305000 
     
    
      50% 
      99.530000 
      101.180000 
      0.000000 
      95.275000 
      107.750000 
      99.560000 
     
    
      75% 
      106.600000 
      107.180000 
      0.000000 
      100.762500 
      112.740000 
      106.520000 
     
    
      max 
      129.880000 
      138.670000 
      102.220000 
      109.670000 
      117.550000 
      127.050000 
     
    
      official 
      count 
      7333.000000 
      6412.000000 
      12.000000 
      14.000000 
      7.000000 
      2386.000000 
     
    
      mean 
      205.460250 
      205.273537 
      220.857500 
      204.827143 
      236.570000 
      224.082938 
     
    
      std 
      22.892748 
      22.731726 
      19.680258 
      20.528396 
      23.676290 
      22.654757 
     
    
      min 
      85.530000 
      97.580000 
      173.580000 
      162.000000 
      195.030000 
      114.830000 
     
    
      25% 
      190.280000 
      187.837500 
      212.997500 
      191.492500 
      225.100000 
      207.672500 
     
    
      50% 
      205.850000 
      208.210000 
      225.400000 
      205.635000 
      242.100000 
      226.035000 
     
    
      75% 
      222.080000 
      221.600000 
      231.710000 
      219.295000 
      251.140000 
      240.082500 
     
    
      max 
      269.550000 
      278.000000 
      245.450000 
      238.220000 
      266.380000 
      284.230000 
     
    
      overall 
      count 
      7333.000000 
      6412.000000 
      12.000000 
      14.000000 
      7.000000 
      2386.000000 
     
    
      mean 
      7864.603982 
      7829.390674 
      11460.166667 
      7637.428571 
      14046.285714 
      11750.388935 
     
    
      std 
      4834.454210 
      4911.403729 
      4570.114716 
      4612.962167 
      4648.163498 
      4783.808730 
     
    
      min 
      1.000000 
      1.000000 
      1114.000000 
      346.000000 
      4739.000000 
      34.000000 
     
    
      25% 
      3790.000000 
      3326.000000 
      9272.750000 
      4024.000000 
      12607.000000 
      7786.000000 
     
    
      50% 
      7326.000000 
      7938.000000 
      12716.000000 
      7252.000000 
      16397.000000 
      12868.000000 
     
    
      75% 
      11858.000000 
      11740.250000 
      14245.000000 
      11069.250000 
      17210.000000 
      16096.250000 
     
    
      max 
      17568.000000 
      17589.000000 
      16825.000000 
      15721.000000 
      17554.000000 
      17598.000000 
     
    
      pace 
      count 
      7333.000000 
      6412.000000 
      12.000000 
      14.000000 
      7.000000 
      2386.000000 
     
    
      mean 
      7.844473 
      7.837257 
      8.430833 
      7.820000 
      9.030000 
      8.554849 
     
    
      std 
      0.873348 
      0.867035 
      0.749405 
      0.783768 
      0.901924 
      0.864270 
     
    
      min 
      3.270000 
      3.730000 
      6.630000 
      6.180000 
      7.450000 
      4.380000 
     
    
      25% 
      7.270000 
      7.170000 
      8.127500 
      7.320000 
      8.590000 
      7.930000 
     
    
      50% 
      7.870000 
      7.950000 
      8.600000 
      7.850000 
      9.250000 
      8.630000 
     
    
      75% 
      8.480000 
      8.470000 
      8.840000 
      8.367500 
      9.580000 
      9.170000 
     
    
      max 
      10.280000 
      10.620000 
      9.370000 
      9.100000 
      10.170000 
      10.850000 
     
  
120 rows × 6 columns
In [63]:
    
# Reduce it to two components.
X_pca = PCA(2).fit_transform(X_tr_std)
# Calculate predicted values.
y_pred = SpectralClustering(n_clusters=3).fit_predict(X_pca)
# Plot the solution.
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_pred)
plt.show()
    
    
 
Mean Shift
In [24]:
    
# Here we set the bandwidth. This function automatically derives a bandwidth
# number based on an inspection of the distances among points in the data.
bandwidth = estimate_bandwidth(X_tr_std, quantile=0.9)
# Declare and fit the model.
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit(X_tr_std)
# Extract cluster assignments for each data point.
labels = ms.labels_
# Coordinates of the cluster centers.
cluster_centers = ms.cluster_centers_
# Count our clusters.
n_clusters_ = len(np.unique(labels))
#Glue back to original data
X_tr['clusters'] = labels
X_tr['Gender'] = boston_marathon_scores.gender
X_tr['Overall'] = boston_marathon_scores.overall
#Add the column into our list
clmns.extend(['clusters','Gender','Overall'])
#Lets analyze the clusters
print("Number of estimated clusters: {}".format(n_clusters_))
pd.DataFrame(X_tr.groupby(['clusters']).mean())
    
    
Number of estimated clusters: 18
    Out[24]:
  
    
       
      25k 
      age 
      10k 
      gender 
      half 
      official 
      overall 
      pace 
      30k 
      5k 
      20k 
      35k 
      40k 
      Gender 
      Overall 
     
    
      clusters 
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
     
  
  
    
      0 
      118.195629 
      41.634089 
      46.729067 
      0.599007 
      99.225182 
      208.131799 
      8423.401304 
      7.946343 
      143.564142 
      23.349711 
      94.047516 
      169.965941 
      196.538315 
      0.599007 
      8423.401304 
     
    
      1 
      123.384286 
      45.357143 
      0.000000 
      0.571429 
      103.810000 
      216.680000 
      10269.571429 
      8.272857 
      149.630714 
      17.516429 
      98.415000 
      177.007857 
      204.820000 
      0.571429 
      10269.571429 
     
    
      2 
      0.000000 
      39.500000 
      0.000000 
      0.500000 
      0.000000 
      219.632500 
      11138.875000 
      8.385000 
      0.000000 
      0.000000 
      0.000000 
      0.000000 
      0.000000 
      0.500000 
      11138.875000 
     
    
      3 
      0.000000 
      41.750000 
      45.547500 
      0.750000 
      96.765000 
      205.925000 
      8088.750000 
      7.860000 
      142.660000 
      22.890000 
      91.538750 
      168.860000 
      194.555000 
      0.750000 
      8088.750000 
     
    
      4 
      130.341429 
      45.428571 
      50.031429 
      0.571429 
      108.240000 
      236.570000 
      14046.285714 
      9.030000 
      161.281429 
      21.010000 
      102.022857 
      0.000000 
      223.585714 
      0.571429 
      14046.285714 
     
    
      5 
      111.540000 
      39.500000 
      44.425000 
      0.750000 
      93.897500 
      193.430000 
      4521.000000 
      7.387500 
      135.355000 
      22.262500 
      89.032500 
      159.577500 
      0.000000 
      0.750000 
      4521.000000 
     
    
      6 
      119.052500 
      46.750000 
      46.847500 
      0.500000 
      99.985000 
      214.370000 
      9688.500000 
      8.185000 
      0.000000 
      23.267500 
      94.575000 
      173.530000 
      202.502500 
      0.500000 
      9688.500000 
     
    
      7 
      115.956667 
      41.000000 
      45.010000 
      0.333333 
      0.000000 
      205.583333 
      8294.333333 
      7.856667 
      141.570000 
      22.523333 
      91.533333 
      167.580000 
      193.983333 
      0.333333 
      8294.333333 
     
    
      8 
      121.625000 
      41.500000 
      48.775000 
      0.500000 
      102.750000 
      205.265000 
      8105.500000 
      7.835000 
      145.960000 
      24.650000 
      0.000000 
      170.645000 
      194.890000 
      0.500000 
      8105.500000 
     
    
      9 
      0.000000 
      31.000000 
      0.000000 
      1.000000 
      0.000000 
      228.380000 
      13438.000000 
      8.720000 
      154.670000 
      24.530000 
      0.000000 
      184.430000 
      214.800000 
      1.000000 
      13438.000000 
     
    
      10 
      0.000000 
      36.000000 
      48.720000 
      1.000000 
      104.000000 
      218.080000 
      10692.000000 
      8.330000 
      150.580000 
      24.170000 
      98.650000 
      0.000000 
      0.000000 
      1.000000 
      10692.000000 
     
    
      11 
      115.100000 
      41.000000 
      0.000000 
      0.000000 
      97.100000 
      196.080000 
      4939.000000 
      7.480000 
      138.730000 
      22.750000 
      0.000000 
      162.550000 
      185.830000 
      0.000000 
      4939.000000 
     
    
      12 
      0.000000 
      23.000000 
      53.570000 
      0.000000 
      0.000000 
      225.400000 
      12717.000000 
      8.600000 
      0.000000 
      26.870000 
      0.000000 
      0.000000 
      0.000000 
      0.000000 
      12717.000000 
     
    
      13 
      0.000000 
      65.000000 
      0.000000 
      1.000000 
      0.000000 
      216.980000 
      10373.000000 
      8.280000 
      0.000000 
      0.000000 
      0.000000 
      0.000000 
      202.370000 
      1.000000 
      10373.000000 
     
    
      14 
      0.000000 
      43.000000 
      48.120000 
      0.000000 
      102.220000 
      214.130000 
      9584.000000 
      8.170000 
      0.000000 
      24.200000 
      96.820000 
      0.000000 
      0.000000 
      0.000000 
      9584.000000 
     
    
      15 
      0.000000 
      57.000000 
      53.550000 
      1.000000 
      0.000000 
      225.400000 
      12715.000000 
      8.600000 
      0.000000 
      26.870000 
      0.000000 
      0.000000 
      0.000000 
      1.000000 
      12715.000000 
     
    
      16 
      121.720000 
      29.000000 
      46.920000 
      0.000000 
      100.950000 
      221.850000 
      11810.000000 
      8.470000 
      150.850000 
      0.000000 
      0.000000 
      181.100000 
      209.350000 
      0.000000 
      11810.000000 
     
    
      17 
      0.000000 
      46.000000 
      0.000000 
      0.000000 
      0.000000 
      228.300000 
      13395.000000 
      8.720000 
      0.000000 
      26.000000 
      0.000000 
      0.000000 
      0.000000 
      0.000000 
      13395.000000 
     
  
In [21]:
    
clusters_summary = X_tr.groupby(['clusters']).describe()
clusters_summary_transposed = clusters_summary.transpose()
clusters_summary_transposed
    
    Out[21]:
  
    
       
      clusters 
      0 
      1 
      2 
      3 
      4 
      5 
      6 
      7 
      8 
      9 
      ... 
      12 
      13 
      14 
      15 
      16 
      17 
      18 
      19 
      20 
      21 
     
  
  
    
      10k 
      count 
      16105.000000 
      10.000000 
      8.000000 
      7.000000 
      4.000000 
      4.000000 
      4.000000 
      3.000000 
      3.000000 
      3.000000 
      ... 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
     
    
      mean 
      46.729067 
      0.000000 
      0.000000 
      45.501429 
      44.425000 
      46.847500 
      0.000000 
      45.010000 
      50.280000 
      49.400000 
      ... 
      0.00 
      48.72 
      0.00 
      51.18 
      53.57 
      0.00 
      48.12 
      53.55 
      46.92 
      0.00 
     
    
      std 
      4.914671 
      0.000000 
      0.000000 
      4.923442 
      1.081558 
      2.355184 
      0.000000 
      6.085294 
      3.604345 
      2.386608 
      ... 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      min 
      18.030000 
      0.000000 
      0.000000 
      36.900000 
      43.170000 
      43.580000 
      0.000000 
      38.630000 
      46.270000 
      47.420000 
      ... 
      0.00 
      48.72 
      0.00 
      51.18 
      53.57 
      0.00 
      48.12 
      53.55 
      46.92 
      0.00 
     
    
      25% 
      43.220000 
      0.000000 
      0.000000 
      43.485000 
      43.965000 
      45.905000 
      0.000000 
      42.140000 
      48.795000 
      48.075000 
      ... 
      0.00 
      48.72 
      0.00 
      51.18 
      53.57 
      0.00 
      48.12 
      53.55 
      46.92 
      0.00 
     
    
      50% 
      47.170000 
      0.000000 
      0.000000 
      45.420000 
      44.365000 
      47.555000 
      0.000000 
      45.650000 
      51.320000 
      48.730000 
      ... 
      0.00 
      48.72 
      0.00 
      51.18 
      53.57 
      0.00 
      48.12 
      53.55 
      46.92 
      0.00 
     
    
      75% 
      50.280000 
      0.000000 
      0.000000 
      48.710000 
      44.825000 
      48.497500 
      0.000000 
      48.200000 
      52.285000 
      50.390000 
      ... 
      0.00 
      48.72 
      0.00 
      51.18 
      53.57 
      0.00 
      48.12 
      53.55 
      46.92 
      0.00 
     
    
      max 
      66.680000 
      0.000000 
      0.000000 
      51.800000 
      45.800000 
      48.700000 
      0.000000 
      50.750000 
      53.250000 
      52.050000 
      ... 
      0.00 
      48.72 
      0.00 
      51.18 
      53.57 
      0.00 
      48.12 
      53.55 
      46.92 
      0.00 
     
    
      20k 
      count 
      16105.000000 
      10.000000 
      8.000000 
      7.000000 
      4.000000 
      4.000000 
      4.000000 
      3.000000 
      3.000000 
      3.000000 
      ... 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
     
    
      mean 
      94.047516 
      98.301000 
      0.000000 
      91.482857 
      89.032500 
      94.575000 
      98.700000 
      91.533333 
      101.730000 
      101.850000 
      ... 
      0.00 
      98.65 
      0.00 
      103.42 
      0.00 
      0.00 
      96.82 
      0.00 
      0.00 
      0.00 
     
    
      std 
      9.963129 
      7.606121 
      0.000000 
      9.712080 
      1.795223 
      4.755562 
      5.907137 
      14.224072 
      8.652647 
      5.566229 
      ... 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      min 
      38.230000 
      81.900000 
      0.000000 
      74.980000 
      86.830000 
      87.920000 
      91.350000 
      76.570000 
      92.820000 
      96.770000 
      ... 
      0.00 
      98.65 
      0.00 
      103.42 
      0.00 
      0.00 
      96.82 
      0.00 
      0.00 
      0.00 
     
    
      25% 
      86.770000 
      94.292500 
      0.000000 
      86.975000 
      88.082500 
      92.877500 
      95.700000 
      84.860000 
      97.545000 
      98.875000 
      ... 
      0.00 
      98.65 
      0.00 
      103.42 
      0.00 
      0.00 
      96.82 
      0.00 
      0.00 
      0.00 
     
    
      50% 
      94.870000 
      98.775000 
      0.000000 
      91.630000 
      89.135000 
      95.855000 
      99.110000 
      93.150000 
      102.270000 
      100.980000 
      ... 
      0.00 
      98.65 
      0.00 
      103.42 
      0.00 
      0.00 
      96.82 
      0.00 
      0.00 
      0.00 
     
    
      75% 
      101.270000 
      104.217500 
      0.000000 
      97.950000 
      90.085000 
      97.552500 
      102.110000 
      99.015000 
      106.185000 
      104.390000 
      ... 
      0.00 
      98.65 
      0.00 
      103.42 
      0.00 
      0.00 
      96.82 
      0.00 
      0.00 
      0.00 
     
    
      max 
      131.720000 
      106.870000 
      0.000000 
      103.920000 
      91.030000 
      98.670000 
      105.230000 
      104.880000 
      110.100000 
      107.800000 
      ... 
      0.00 
      98.65 
      0.00 
      103.42 
      0.00 
      0.00 
      96.82 
      0.00 
      0.00 
      0.00 
     
    
      25k 
      count 
      16105.000000 
      10.000000 
      8.000000 
      7.000000 
      4.000000 
      4.000000 
      4.000000 
      3.000000 
      3.000000 
      3.000000 
      ... 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
     
    
      mean 
      118.195629 
      123.299000 
      0.000000 
      0.000000 
      111.540000 
      119.052500 
      123.597500 
      115.956667 
      127.806667 
      132.933333 
      ... 
      0.00 
      0.00 
      115.10 
      130.17 
      0.00 
      0.00 
      0.00 
      0.00 
      121.72 
      0.00 
     
    
      std 
      12.674378 
      9.617818 
      0.000000 
      0.000000 
      1.605138 
      6.217853 
      7.188743 
      19.538911 
      11.900022 
      9.761825 
      ... 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      min 
      48.320000 
      102.830000 
      0.000000 
      0.000000 
      110.100000 
      110.420000 
      114.680000 
      95.570000 
      115.920000 
      125.670000 
      ... 
      0.00 
      0.00 
      115.10 
      130.17 
      0.00 
      0.00 
      0.00 
      0.00 
      121.72 
      0.00 
     
    
      25% 
      108.920000 
      118.290000 
      0.000000 
      0.000000 
      110.235000 
      116.802500 
      119.885000 
      106.675000 
      121.850000 
      127.385000 
      ... 
      0.00 
      0.00 
      115.10 
      130.17 
      0.00 
      0.00 
      0.00 
      0.00 
      121.72 
      0.00 
     
    
      50% 
      119.180000 
      123.420000 
      0.000000 
      0.000000 
      111.355000 
      120.555000 
      124.095000 
      117.780000 
      127.780000 
      129.100000 
      ... 
      0.00 
      0.00 
      115.10 
      130.17 
      0.00 
      0.00 
      0.00 
      0.00 
      121.72 
      0.00 
     
    
      75% 
      127.330000 
      131.315000 
      0.000000 
      0.000000 
      112.660000 
      122.805000 
      127.807500 
      126.150000 
      133.750000 
      136.565000 
      ... 
      0.00 
      0.00 
      115.10 
      130.17 
      0.00 
      0.00 
      0.00 
      0.00 
      121.72 
      0.00 
     
    
      max 
      163.620000 
      134.000000 
      0.000000 
      0.000000 
      113.350000 
      124.680000 
      131.520000 
      134.520000 
      139.720000 
      144.030000 
      ... 
      0.00 
      0.00 
      115.10 
      130.17 
      0.00 
      0.00 
      0.00 
      0.00 
      121.72 
      0.00 
     
    
      30k 
      count 
      16105.000000 
      10.000000 
      8.000000 
      7.000000 
      4.000000 
      4.000000 
      4.000000 
      3.000000 
      3.000000 
      3.000000 
      ... 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
     
    
      mean 
      143.564142 
      149.453000 
      0.000000 
      139.261429 
      135.355000 
      0.000000 
      150.075000 
      141.570000 
      154.596667 
      169.233333 
      ... 
      154.67 
      150.58 
      138.73 
      157.48 
      0.00 
      0.00 
      0.00 
      0.00 
      150.85 
      0.00 
     
    
      std 
      15.677899 
      11.785741 
      0.000000 
      15.166610 
      2.146012 
      0.000000 
      8.655736 
      25.491828 
      15.335144 
      10.977169 
      ... 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      min 
      59.450000 
      125.330000 
      0.000000 
      113.600000 
      132.180000 
      0.000000 
      139.150000 
      115.080000 
      139.300000 
      161.870000 
      ... 
      154.67 
      150.58 
      138.73 
      157.48 
      0.00 
      0.00 
      0.00 
      0.00 
      150.85 
      0.00 
     
    
      25% 
      132.200000 
      142.905000 
      0.000000 
      131.375000 
      135.097500 
      0.000000 
      145.285000 
      129.390000 
      146.910000 
      162.925000 
      ... 
      154.67 
      150.58 
      138.73 
      157.48 
      0.00 
      0.00 
      0.00 
      0.00 
      150.85 
      0.00 
     
    
      50% 
      144.730000 
      149.165000 
      0.000000 
      142.350000 
      136.170000 
      0.000000 
      151.375000 
      143.700000 
      154.520000 
      163.980000 
      ... 
      154.67 
      150.58 
      138.73 
      157.48 
      0.00 
      0.00 
      0.00 
      0.00 
      150.85 
      0.00 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      half 
      std 
      10.521256 
      8.028551 
      0.000000 
      10.297575 
      1.800748 
      5.022838 
      6.131479 
      0.000000 
      9.187667 
      7.857394 
      ... 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      min 
      40.270000 
      86.330000 
      0.000000 
      79.100000 
      91.720000 
      92.820000 
      96.480000 
      0.000000 
      97.920000 
      102.280000 
      ... 
      0.00 
      104.00 
      97.10 
      109.20 
      0.00 
      0.00 
      102.22 
      0.00 
      100.95 
      0.00 
     
    
      25% 
      91.550000 
      99.520000 
      0.000000 
      91.685000 
      92.920000 
      98.520000 
      100.980000 
      0.000000 
      102.835000 
      104.490000 
      ... 
      0.00 
      104.00 
      97.10 
      109.20 
      0.00 
      0.00 
      102.22 
      0.00 
      100.95 
      0.00 
     
    
      50% 
      100.100000 
      104.130000 
      0.000000 
      96.680000 
      93.970000 
      101.450000 
      104.500000 
      0.000000 
      107.750000 
      106.700000 
      ... 
      0.00 
      104.00 
      97.10 
      109.20 
      0.00 
      0.00 
      102.22 
      0.00 
      100.95 
      0.00 
     
    
      75% 
      106.850000 
      109.995000 
      0.000000 
      103.450000 
      94.947500 
      102.915000 
      107.615000 
      0.000000 
      112.015000 
      112.125000 
      ... 
      0.00 
      104.00 
      97.10 
      109.20 
      0.00 
      0.00 
      102.22 
      0.00 
      100.95 
      0.00 
     
    
      max 
      138.670000 
      112.700000 
      0.000000 
      109.670000 
      95.930000 
      104.220000 
      110.900000 
      0.000000 
      116.280000 
      117.550000 
      ... 
      0.00 
      104.00 
      97.10 
      109.20 
      0.00 
      0.00 
      102.22 
      0.00 
      100.95 
      0.00 
     
    
      official 
      count 
      16105.000000 
      10.000000 
      8.000000 
      7.000000 
      4.000000 
      4.000000 
      4.000000 
      3.000000 
      3.000000 
      3.000000 
      ... 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
     
    
      mean 
      208.131799 
      217.303000 
      219.632500 
      201.311429 
      193.430000 
      214.370000 
      215.122500 
      205.583333 
      221.903333 
      254.120000 
      ... 
      228.38 
      218.08 
      196.08 
      227.92 
      225.40 
      216.98 
      214.13 
      225.40 
      221.85 
      228.30 
     
    
      std 
      23.747320 
      15.328319 
      24.220754 
      21.732860 
      8.019406 
      19.050727 
      10.866865 
      37.232860 
      26.686994 
      12.141779 
      ... 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      min 
      85.530000 
      194.170000 
      173.580000 
      162.000000 
      184.400000 
      194.050000 
      199.900000 
      167.000000 
      195.030000 
      242.100000 
      ... 
      228.38 
      218.08 
      196.08 
      227.92 
      225.40 
      216.98 
      214.13 
      225.40 
      221.85 
      228.30 
     
    
      25% 
      191.700000 
      205.922500 
      207.037500 
      191.105000 
      189.672500 
      202.090000 
      211.112500 
      187.725000 
      208.655000 
      247.990000 
      ... 
      228.38 
      218.08 
      196.08 
      227.92 
      225.40 
      216.98 
      214.13 
      225.40 
      221.85 
      228.30 
     
    
      50% 
      209.220000 
      215.690000 
      227.515000 
      207.500000 
      192.775000 
      212.925000 
      218.310000 
      208.450000 
      222.280000 
      253.880000 
      ... 
      228.38 
      218.08 
      196.08 
      227.92 
      225.40 
      216.98 
      214.13 
      225.40 
      221.85 
      228.30 
     
    
      75% 
      225.200000 
      226.117500 
      234.637500 
      216.025000 
      196.532500 
      225.205000 
      222.320000 
      224.875000 
      235.340000 
      260.130000 
      ... 
      228.38 
      218.08 
      196.08 
      227.92 
      225.40 
      216.98 
      214.13 
      225.40 
      221.85 
      228.30 
     
    
      max 
      284.230000 
      242.780000 
      245.450000 
      225.420000 
      203.770000 
      237.580000 
      223.970000 
      241.300000 
      248.400000 
      266.380000 
      ... 
      228.38 
      218.08 
      196.08 
      227.92 
      225.40 
      216.98 
      214.13 
      225.40 
      221.85 
      228.30 
     
    
      overall 
      count 
      16105.000000 
      10.000000 
      8.000000 
      7.000000 
      4.000000 
      4.000000 
      4.000000 
      3.000000 
      3.000000 
      3.000000 
      ... 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
     
    
      mean 
      8423.401304 
      10399.700000 
      11138.875000 
      6998.428571 
      4521.000000 
      9688.500000 
      9944.250000 
      8294.333333 
      11245.333333 
      17092.666667 
      ... 
      13438.00 
      10692.00 
      4939.00 
      13310.00 
      12717.00 
      10373.00 
      9584.00 
      12715.00 
      11810.00 
      13395.00 
     
    
      std 
      5052.028950 
      3920.898283 
      5586.997518 
      4438.780270 
      1672.116423 
      4895.439272 
      2959.093484 
      7857.198250 
      6203.282223 
      613.063075 
      ... 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      min 
      1.000000 
      4559.000000 
      1114.000000 
      346.000000 
      2759.000000 
      4534.000000 
      5825.000000 
      580.000000 
      4739.000000 
      16397.000000 
      ... 
      13438.00 
      10692.00 
      4939.00 
      13310.00 
      12717.00 
      10373.00 
      9584.00 
      12715.00 
      11810.00 
      13395.00 
     
    
      25% 
      4055.000000 
      7360.250000 
      7676.500000 
      3945.500000 
      3701.000000 
      6421.000000 
      8801.750000 
      4298.000000 
      8321.500000 
      16862.000000 
      ... 
      13438.00 
      10692.00 
      4939.00 
      13310.00 
      12717.00 
      10373.00 
      9584.00 
      12715.00 
      11810.00 
      13395.00 
     
    
      50% 
      8241.000000 
      10063.000000 
      13220.000000 
      7741.000000 
      4281.000000 
      9317.500000 
      10793.500000 
      8016.000000 
      11904.000000 
      17327.000000 
      ... 
      13438.00 
      10692.00 
      4939.00 
      13310.00 
      12717.00 
      10373.00 
      9584.00 
      12715.00 
      11810.00 
      13395.00 
     
    
      75% 
      12655.000000 
      12885.500000 
      14851.000000 
      10145.500000 
      5101.000000 
      12585.000000 
      11936.000000 
      12151.500000 
      14498.500000 
      17440.500000 
      ... 
      13438.00 
      10692.00 
      4939.00 
      13310.00 
      12717.00 
      10373.00 
      9584.00 
      12715.00 
      11810.00 
      13395.00 
     
    
      max 
      17598.000000 
      16503.000000 
      16825.000000 
      12720.000000 
      6763.000000 
      15585.000000 
      12365.000000 
      16287.000000 
      17093.000000 
      17554.000000 
      ... 
      13438.00 
      10692.00 
      4939.00 
      13310.00 
      12717.00 
      10373.00 
      9584.00 
      12715.00 
      11810.00 
      13395.00 
     
    
      pace 
      count 
      16105.000000 
      10.000000 
      8.000000 
      7.000000 
      4.000000 
      4.000000 
      4.000000 
      3.000000 
      3.000000 
      3.000000 
      ... 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
      1.00 
     
    
      mean 
      7.946343 
      8.297000 
      8.385000 
      7.682857 
      7.387500 
      8.185000 
      8.212500 
      7.856667 
      8.470000 
      9.700000 
      ... 
      8.72 
      8.33 
      7.48 
      8.70 
      8.60 
      8.28 
      8.17 
      8.60 
      8.47 
      8.72 
     
    
      std 
      0.905882 
      0.584524 
      0.922109 
      0.827642 
      0.309556 
      0.721503 
      0.416203 
      1.423388 
      1.015037 
      0.460326 
      ... 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
      NaN 
     
    
      min 
      3.270000 
      7.420000 
      6.630000 
      6.180000 
      7.030000 
      7.420000 
      7.630000 
      6.380000 
      7.450000 
      9.250000 
      ... 
      8.72 
      8.33 
      7.48 
      8.70 
      8.60 
      8.28 
      8.17 
      8.60 
      8.47 
      8.72 
     
    
      25% 
      7.320000 
      7.860000 
      7.905000 
      7.300000 
      7.247500 
      7.720000 
      8.057500 
      7.175000 
      7.965000 
      9.465000 
      ... 
      8.72 
      8.33 
      7.48 
      8.70 
      8.60 
      8.28 
      8.17 
      8.60 
      8.47 
      8.72 
     
    
      50% 
      7.980000 
      8.230000 
      8.680000 
      7.920000 
      7.370000 
      8.125000 
      8.335000 
      7.970000 
      8.480000 
      9.680000 
      ... 
      8.72 
      8.33 
      7.48 
      8.70 
      8.60 
      8.28 
      8.17 
      8.60 
      8.47 
      8.72 
     
    
      75% 
      8.600000 
      8.640000 
      8.960000 
      8.240000 
      7.510000 
      8.590000 
      8.490000 
      8.595000 
      8.980000 
      9.925000 
      ... 
      8.72 
      8.33 
      7.48 
      8.70 
      8.60 
      8.28 
      8.17 
      8.60 
      8.47 
      8.72 
     
    
      max 
      10.850000 
      9.270000 
      9.370000 
      8.600000 
      7.780000 
      9.070000 
      8.550000 
      9.220000 
      9.480000 
      10.170000 
      ... 
      8.72 
      8.33 
      7.48 
      8.70 
      8.60 
      8.28 
      8.17 
      8.60 
      8.47 
      8.72 
     
  
120 rows × 22 columns
In [25]:
    
# Reduce it to two components.
X_pca = PCA(2).fit_transform(X_tr_std)
# Calculate predicted values.
bandwidth = estimate_bandwidth(X_tr_std, quantile=0.9)
y_pred = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit_predict(X_pca)
# Plot the solution.
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_pred)
plt.show()
    
    
 
In [17]:
    
# Declare the model and fit it in one statement.
# Note that you can provide arguments to the model, but we didn't.
af = AffinityPropagation().fit(X_tr_std)
print('Done')
# Pull the number of clusters and cluster assignments for each data point.
cluster_centers_indices = af.cluster_centers_indices_
n_clusters_ = len(cluster_centers_indices)
labels = af.labels_
#Glue back to original data
X_tr['clusters'] = labels
X_tr['Gender'] = boston_marathon_scores.gender
X_tr['Overall'] = boston_marathon_scores.overall
#Add the column into our list
clmns.extend(['clusters','Gender','Overall'])
#Lets analyze the clusters
print("Number of estimated clusters: {}".format(n_clusters_))
pd.DataFrame(X_tr.groupby(['clusters']).mean())
    
    
Done
Number of estimated clusters: 251
    Out[17]:
  
    
       
      25k 
      age 
      10k 
      gender 
      half 
      official 
      overall 
      pace 
      30k 
      5k 
      20k 
      35k 
      40k 
      Gender 
      Overall 
     
    
      clusters 
       
       
       
       
       
       
       
       
       
       
       
       
       
       
       
     
  
  
    
      0 
      51.880000 
      43.000000 
      19.148000 
      1.000000 
      43.080000 
      92.754000 
      9.400000 
      3.542000 
      63.870000 
      9.226000 
      40.898000 
      76.494000 
      87.242000 
      1.000000 
      9.400000 
     
    
      1 
      58.814286 
      38.142857 
      21.591429 
      0.857143 
      48.728571 
      107.681429 
      22.428571 
      4.115714 
      73.342857 
      10.270000 
      46.268571 
      88.742857 
      101.232857 
      0.857143 
      22.428571 
     
    
      2 
      79.596667 
      42.555556 
      29.865556 
      0.888889 
      65.995556 
      144.394444 
      3486.333333 
      5.512222 
      99.325556 
      14.605556 
      62.567778 
      119.272222 
      136.260000 
      0.888889 
      3486.333333 
     
    
      3 
      51.017500 
      49.500000 
      19.212500 
      1.000000 
      42.605000 
      90.327500 
      7.750000 
      3.452500 
      62.542500 
      9.290000 
      40.477500 
      74.807500 
      85.082500 
      1.000000 
      7.750000 
     
    
      4 
      57.156000 
      41.000000 
      21.306000 
      0.800000 
      47.652000 
      101.820000 
      20.400000 
      3.888000 
      70.392000 
      10.136000 
      45.276000 
      84.248000 
      95.932000 
      0.800000 
      20.400000 
     
    
      5 
      131.848333 
      45.000000 
      44.376667 
      0.833333 
      99.755000 
      233.056667 
      13954.666667 
      8.898333 
      164.263333 
      22.131667 
      93.786667 
      193.041667 
      221.220000 
      0.833333 
      13954.666667 
     
    
      6 
      0.000000 
      37.000000 
      43.683333 
      0.666667 
      92.746667 
      192.650000 
      5635.333333 
      7.353333 
      133.676667 
      21.993333 
      87.916667 
      157.470000 
      181.750000 
      0.666667 
      5635.333333 
     
    
      7 
      105.182083 
      60.541667 
      41.791667 
      0.958333 
      88.628333 
      180.174167 
      2933.500000 
      6.880833 
      126.957083 
      20.949583 
      84.047500 
      148.961667 
      170.547500 
      0.958333 
      2933.500000 
     
    
      8 
      110.972500 
      58.535714 
      44.063214 
      0.964286 
      93.403929 
      191.054107 
      4668.089286 
      7.294643 
      134.142857 
      22.101607 
      88.561964 
      157.621964 
      180.764821 
      0.964286 
      4668.089286 
     
    
      9 
      122.701395 
      46.829457 
      48.009457 
      0.550388 
      102.830310 
      216.295736 
      10391.356589 
      8.258295 
      149.411240 
      23.974109 
      97.425504 
      177.058992 
      204.317364 
      0.550388 
      10391.356589 
     
    
      10 
      114.659000 
      44.500000 
      46.056000 
      1.000000 
      96.658000 
      220.956000 
      11392.900000 
      8.436000 
      139.021000 
      23.197000 
      91.650000 
      165.798000 
      202.483000 
      1.000000 
      11392.900000 
     
    
      11 
      116.124444 
      57.444444 
      45.626905 
      0.968254 
      97.453889 
      203.575079 
      7514.071429 
      7.771667 
      141.069206 
      22.821667 
      92.328492 
      166.788968 
      192.265635 
      0.968254 
      7514.071429 
     
    
      12 
      116.609249 
      41.680751 
      46.546667 
      0.553991 
      98.295634 
      199.267042 
      6533.812207 
      7.608404 
      140.593568 
      23.372300 
      93.217887 
      164.852676 
      188.750423 
      0.553991 
      6533.812207 
     
    
      13 
      116.692391 
      50.347826 
      46.594420 
      0.891304 
      98.382826 
      198.714420 
      6150.724638 
      7.587681 
      140.623551 
      23.361957 
      93.301739 
      164.740580 
      188.360217 
      0.891304 
      6150.724638 
     
    
      14 
      110.578000 
      26.800000 
      42.462000 
      0.800000 
      91.928000 
      248.208000 
      16283.800000 
      9.476000 
      138.808000 
      21.234000 
      87.094000 
      178.784000 
      229.872000 
      0.800000 
      16283.800000 
     
    
      15 
      107.839615 
      33.692308 
      42.428846 
      0.923077 
      90.321538 
      215.145769 
      9724.230769 
      8.215769 
      132.647308 
      21.296154 
      85.590769 
      164.268846 
      201.421154 
      0.923077 
      9724.230769 
     
    
      16 
      106.675000 
      32.000000 
      42.140000 
      0.500000 
      0.000000 
      187.725000 
      4298.000000 
      7.175000 
      129.390000 
      21.320000 
      84.860000 
      153.185000 
      176.985000 
      0.500000 
      4298.000000 
     
    
      17 
      117.098605 
      51.232558 
      46.693256 
      1.000000 
      98.542326 
      216.117209 
      10320.046512 
      8.252093 
      142.068140 
      23.506977 
      93.430698 
      169.548837 
      201.450465 
      1.000000 
      10320.046512 
     
    
      18 
      115.245541 
      32.254777 
      46.116561 
      0.375796 
      97.101783 
      198.944331 
      6562.611465 
      7.595096 
      139.224395 
      23.185987 
      92.081783 
      163.764140 
      188.216815 
      0.375796 
      6562.611465 
     
    
      19 
      119.309057 
      58.490566 
      46.152642 
      0.962264 
      99.569434 
      217.951887 
      10739.622642 
      8.322075 
      146.331698 
      22.955849 
      94.247736 
      175.279057 
      204.732830 
      0.962264 
      10739.622642 
     
    
      20 
      113.050000 
      27.666667 
      43.616667 
      0.333333 
      92.996667 
      236.006667 
      14524.666667 
      9.003333 
      138.673333 
      22.046667 
      87.990000 
      195.016667 
      224.593333 
      0.333333 
      14524.666667 
     
    
      21 
      122.548047 
      35.875000 
      48.223516 
      0.171875 
      102.752734 
      213.003359 
      9596.625000 
      8.132891 
      148.813125 
      24.104688 
      97.376250 
      175.530859 
      201.691875 
      0.171875 
      9596.625000 
     
    
      22 
      132.750000 
      36.000000 
      45.300000 
      1.000000 
      94.820000 
      229.080000 
      13647.000000 
      8.750000 
      174.520000 
      22.450000 
      89.570000 
      197.070000 
      219.620000 
      1.000000 
      13647.000000 
     
    
      23 
      97.650000 
      24.000000 
      39.220000 
      1.000000 
      82.270000 
      241.620000 
      16324.000000 
      9.220000 
      119.330000 
      19.830000 
      78.030000 
      161.770000 
      230.050000 
      1.000000 
      16324.000000 
     
    
      24 
      117.728293 
      23.341463 
      46.793415 
      0.365854 
      98.858293 
      214.841463 
      9810.097561 
      8.201463 
      143.700976 
      23.520976 
      93.726585 
      171.802927 
      201.884146 
      0.365854 
      9810.097561 
     
    
      25 
      0.000000 
      38.000000 
      45.870000 
      1.000000 
      98.400000 
      238.220000 
      15721.000000 
      9.100000 
      166.450000 
      23.130000 
      91.930000 
      200.020000 
      227.600000 
      1.000000 
      15721.000000 
     
    
      26 
      112.653333 
      24.666667 
      44.510000 
      0.333333 
      94.826667 
      194.580000 
      5858.666667 
      7.433333 
      136.340000 
      0.000000 
      89.806667 
      160.466667 
      184.140000 
      0.333333 
      5858.666667 
     
    
      27 
      117.573675 
      38.524096 
      46.917771 
      0.415663 
      99.012952 
      205.115301 
      7771.156627 
      7.830783 
      142.267048 
      23.590542 
      93.907169 
      167.800482 
      193.653735 
      0.415663 
      7771.156627 
     
    
      28 
      120.913656 
      53.193548 
      47.172366 
      0.827957 
      100.997742 
      215.927527 
      10345.354839 
      8.243441 
      147.719462 
      23.581398 
      95.652043 
      175.846129 
      203.689140 
      0.827957 
      10345.354839 
     
    
      29 
      116.074000 
      34.800000 
      43.846000 
      0.800000 
      95.202000 
      236.736000 
      13539.300000 
      9.036000 
      148.700000 
      21.918000 
      89.999000 
      182.708000 
      221.378000 
      0.800000 
      13539.300000 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      221 
      115.874184 
      42.673469 
      45.857653 
      0.755102 
      97.253163 
      208.741327 
      8321.234694 
      7.969082 
      141.165918 
      23.038878 
      92.159898 
      168.365306 
      196.411327 
      0.755102 
      8321.234694 
     
    
      222 
      119.938173 
      39.512690 
      47.774467 
      0.340102 
      101.015178 
      206.156599 
      8130.954315 
      7.871168 
      144.909239 
      24.011320 
      95.792183 
      170.282081 
      195.252843 
      0.340102 
      8130.954315 
     
    
      223 
      116.287197 
      33.560510 
      46.810127 
      0.433121 
      98.273376 
      195.656051 
      5794.585987 
      7.471210 
      139.674395 
      23.551911 
      93.252484 
      163.004713 
      185.785287 
      0.433121 
      5794.585987 
     
    
      224 
      121.720000 
      29.000000 
      46.920000 
      0.000000 
      100.950000 
      221.850000 
      11810.000000 
      8.470000 
      150.850000 
      0.000000 
      0.000000 
      181.100000 
      209.350000 
      0.000000 
      11810.000000 
     
    
      225 
      120.766744 
      53.534884 
      47.531395 
      0.976744 
      101.120698 
      228.733256 
      12893.976744 
      8.733256 
      148.155116 
      23.808140 
      95.796047 
      179.166047 
      213.889767 
      0.976744 
      12893.976744 
     
    
      226 
      121.458417 
      50.891667 
      47.504750 
      0.800000 
      101.566167 
      216.118167 
      10132.900000 
      8.250083 
      148.209250 
      23.753583 
      96.198333 
      176.190750 
      203.928000 
      0.800000 
      10132.900000 
     
    
      227 
      117.549773 
      56.363636 
      47.297955 
      0.931818 
      99.253864 
      198.214091 
      6252.522727 
      7.567045 
      141.340682 
      23.769318 
      94.167500 
      165.119773 
      188.182727 
      0.931818 
      6252.522727 
     
    
      228 
      117.201768 
      42.939024 
      47.097378 
      0.615854 
      98.972073 
      198.559817 
      6428.896341 
      7.581159 
      140.981585 
      23.648780 
      93.886341 
      164.873598 
      188.298110 
      0.615854 
      6428.896341 
     
    
      229 
      114.603256 
      33.511628 
      45.149535 
      0.627907 
      95.881395 
      215.327209 
      9778.116279 
      8.221395 
      140.828372 
      22.634419 
      90.840233 
      170.989535 
      202.097442 
      0.627907 
      9778.116279 
     
    
      230 
      120.292027 
      35.040541 
      47.345811 
      0.364865 
      100.658784 
      218.332568 
      10687.702703 
      8.336351 
      147.319459 
      23.700811 
      95.357973 
      176.417568 
      205.741081 
      0.364865 
      10687.702703 
     
    
      231 
      115.883310 
      46.151724 
      46.502207 
      0.779310 
      97.809379 
      197.544069 
      6012.165517 
      7.542138 
      139.571517 
      23.396552 
      92.789172 
      163.532000 
      187.127586 
      0.779310 
      6012.165517 
     
    
      232 
      123.013571 
      42.959184 
      47.685510 
      0.357143 
      102.650102 
      219.902347 
      11153.693878 
      8.394592 
      150.351633 
      23.767551 
      97.196939 
      178.971122 
      207.417449 
      0.357143 
      11153.693878 
     
    
      233 
      117.870411 
      49.013699 
      46.665753 
      0.787671 
      99.012329 
      207.639795 
      8161.253425 
      7.927740 
      143.132055 
      23.396027 
      93.852260 
      169.464932 
      195.891507 
      0.787671 
      8161.253425 
     
    
      234 
      119.457500 
      56.125000 
      45.772083 
      1.000000 
      98.970000 
      228.516667 
      12796.125000 
      8.722917 
      148.905417 
      22.817917 
      93.666667 
      182.617500 
      214.803333 
      1.000000 
      12796.125000 
     
    
      235 
      123.151373 
      37.441176 
      47.796176 
      0.235294 
      102.759804 
      218.981765 
      11053.411765 
      8.360294 
      150.522647 
      23.818333 
      97.319314 
      179.113529 
      206.917059 
      0.235294 
      11053.411765 
     
    
      236 
      114.700556 
      56.611111 
      45.436667 
      1.000000 
      96.422778 
      215.272778 
      10009.222222 
      8.219444 
      139.328889 
      22.772778 
      91.368889 
      166.620556 
      200.129444 
      1.000000 
      10009.222222 
     
    
      237 
      0.000000 
      46.000000 
      0.000000 
      0.000000 
      0.000000 
      228.300000 
      13395.000000 
      8.720000 
      0.000000 
      26.000000 
      0.000000 
      0.000000 
      0.000000 
      0.000000 
      13395.000000 
     
    
      238 
      120.149014 
      40.183099 
      46.546479 
      0.577465 
      99.976620 
      222.242394 
      11449.352113 
      8.484789 
      148.167606 
      23.214225 
      94.603099 
      178.832254 
      209.181972 
      0.577465 
      11449.352113 
     
    
      239 
      120.591377 
      38.224638 
      47.550217 
      0.326087 
      101.219928 
      209.267536 
      8897.311594 
      7.989783 
      146.235797 
      23.767101 
      95.937899 
      172.488841 
      198.126594 
      0.326087 
      8897.311594 
     
    
      240 
      117.789706 
      53.529412 
      46.660000 
      0.931373 
      98.959314 
      208.923824 
      8509.313725 
      7.976176 
      143.038824 
      23.407255 
      93.793725 
      169.537647 
      196.661961 
      0.931373 
      8509.313725 
     
    
      241 
      121.667470 
      57.385542 
      47.188554 
      0.891566 
      101.581446 
      216.733373 
      10764.903614 
      8.274578 
      148.893494 
      23.555542 
      96.194096 
      177.106747 
      204.573614 
      0.891566 
      10764.903614 
     
    
      242 
      121.539621 
      47.992424 
      47.755152 
      0.568182 
      101.896212 
      212.162879 
      9277.303030 
      8.100530 
      147.643333 
      23.879167 
      96.577803 
      174.401212 
      200.674167 
      0.568182 
      9277.303030 
     
    
      243 
      122.611000 
      46.266667 
      46.400667 
      0.900000 
      101.238667 
      237.035000 
      14403.333333 
      9.047667 
      153.571000 
      23.088000 
      95.542000 
      187.626667 
      222.534000 
      0.900000 
      14403.333333 
     
    
      244 
      110.205588 
      57.647059 
      43.587059 
      0.970588 
      92.563529 
      197.367647 
      6159.823529 
      7.535000 
      133.829118 
      21.824412 
      87.695000 
      158.894706 
      185.215882 
      0.970588 
      6159.823529 
     
    
      245 
      126.345634 
      43.119718 
      49.374577 
      0.204225 
      105.745141 
      220.125986 
      11368.014085 
      8.404225 
      153.622254 
      24.638451 
      100.189718 
      181.463310 
      208.426056 
      0.204225 
      11368.014085 
     
    
      246 
      118.554913 
      40.861272 
      47.136185 
      0.456647 
      99.796994 
      203.899538 
      7458.953757 
      7.785202 
      143.264509 
      23.621098 
      94.629422 
      168.359884 
      193.076243 
      0.456647 
      7458.953757 
     
    
      247 
      118.770707 
      46.555556 
      46.697778 
      0.787879 
      99.523131 
      214.228586 
      9672.101010 
      8.178182 
      145.048687 
      23.409798 
      94.291313 
      173.045960 
      201.635152 
      0.787879 
      9672.101010 
     
    
      248 
      134.520000 
      59.000000 
      50.750000 
      0.000000 
      0.000000 
      241.300000 
      16287.000000 
      9.220000 
      165.930000 
      24.930000 
      104.880000 
      196.370000 
      227.980000 
      0.000000 
      16287.000000 
     
    
      249 
      113.978571 
      52.714286 
      45.228571 
      0.857143 
      95.975714 
      197.030000 
      5447.285714 
      7.524286 
      137.797143 
      0.000000 
      90.990000 
      161.990000 
      186.204286 
      0.857143 
      5447.285714 
     
    
      250 
      121.839065 
      53.582734 
      47.941583 
      0.863309 
      102.317410 
      211.594604 
      9116.100719 
      8.079353 
      147.807842 
      23.948417 
      96.948921 
      174.247194 
      200.225755 
      0.863309 
      9116.100719 
     
  
251 rows × 15 columns
In [18]:
    
clusters_summary = X_tr.groupby(['clusters']).describe()
clusters_summary_transposed = clusters_summary.transpose()
clusters_summary_transposed
    
    Out[18]:
  
    
       
      clusters 
      0 
      1 
      2 
      3 
      4 
      5 
      6 
      7 
      8 
      9 
      ... 
      241 
      242 
      243 
      244 
      245 
      246 
      247 
      248 
      249 
      250 
     
  
  
    
      10k 
      count 
      5.000000 
      7.000000 
      9.000000 
      4.000000 
      5.000000 
      6.000000 
      3.000000 
      24.000000 
      56.000000 
      129.000000 
      ... 
      83.000000 
      132.000000 
      30.000000 
      34.000000 
      142.000000 
      173.000000 
      99.000000 
      1.00 
      7.000000 
      139.000000 
     
    
      mean 
      19.148000 
      21.591429 
      29.865556 
      19.212500 
      21.306000 
      44.376667 
      43.683333 
      41.791667 
      44.063214 
      48.009457 
      ... 
      47.188554 
      47.755152 
      46.400667 
      43.587059 
      49.374577 
      47.136185 
      46.697778 
      50.75 
      45.228571 
      47.941583 
     
    
      std 
      1.170543 
      3.465484 
      12.856572 
      1.027696 
      1.654004 
      4.063942 
      7.538954 
      4.458106 
      4.551551 
      5.290160 
      ... 
      5.457879 
      4.973370 
      3.627409 
      4.724065 
      4.781201 
      4.948415 
      4.171106 
      NaN 
      3.022590 
      4.054450 
     
    
      min 
      18.180000 
      18.200000 
      18.180000 
      18.030000 
      19.770000 
      37.730000 
      36.900000 
      32.980000 
      34.700000 
      23.570000 
      ... 
      20.150000 
      34.420000 
      39.230000 
      31.500000 
      32.370000 
      35.800000 
      31.630000 
      50.75 
      41.900000 
      35.620000 
     
    
      25% 
      18.180000 
      19.250000 
      19.780000 
      18.555000 
      19.770000 
      42.217500 
      39.625000 
      39.555000 
      40.620000 
      45.330000 
      ... 
      45.275000 
      44.565000 
      43.235000 
      40.682500 
      47.990000 
      42.820000 
      44.840000 
      50.75 
      43.605000 
      45.775000 
     
    
      50% 
      18.730000 
      20.780000 
      23.580000 
      19.250000 
      20.900000 
      46.300000 
      42.350000 
      41.060000 
      43.355000 
      48.250000 
      ... 
      48.520000 
      47.740000 
      47.125000 
      44.625000 
      50.255000 
      47.650000 
      46.670000 
      50.75 
      44.430000 
      48.200000 
     
    
      75% 
      19.750000 
      22.655000 
      41.500000 
      19.907500 
      22.970000 
      46.970000 
      47.075000 
      43.975000 
      47.327500 
      52.170000 
      ... 
      50.720000 
      52.127500 
      48.690000 
      46.482500 
      52.695000 
      50.980000 
      49.015000 
      50.75 
      45.890000 
      50.775000 
     
    
      max 
      20.900000 
      28.350000 
      52.350000 
      20.320000 
      23.120000 
      47.880000 
      51.800000 
      53.130000 
      54.970000 
      58.530000 
      ... 
      53.850000 
      57.720000 
      52.820000 
      53.020000 
      58.570000 
      57.180000 
      57.520000 
      50.75 
      51.280000 
      56.180000 
     
    
      20k 
      count 
      5.000000 
      7.000000 
      9.000000 
      4.000000 
      5.000000 
      6.000000 
      3.000000 
      24.000000 
      56.000000 
      129.000000 
      ... 
      83.000000 
      132.000000 
      30.000000 
      34.000000 
      142.000000 
      173.000000 
      99.000000 
      1.00 
      7.000000 
      139.000000 
     
    
      mean 
      40.898000 
      46.268571 
      62.567778 
      40.477500 
      45.276000 
      93.786667 
      87.916667 
      84.047500 
      88.561964 
      97.425504 
      ... 
      96.194096 
      96.577803 
      95.542000 
      87.695000 
      100.189718 
      94.629422 
      94.291313 
      104.88 
      90.990000 
      96.948921 
     
    
      std 
      2.917074 
      8.611826 
      26.296880 
      1.778490 
      3.058648 
      9.439968 
      14.711704 
      9.085377 
      9.056660 
      10.828335 
      ... 
      10.803554 
      10.081225 
      6.874044 
      9.533775 
      9.702004 
      9.943687 
      8.389691 
      NaN 
      6.049554 
      8.271588 
     
    
      min 
      38.550000 
      38.800000 
      38.550000 
      38.230000 
      41.700000 
      77.970000 
      74.980000 
      66.630000 
      70.430000 
      47.930000 
      ... 
      43.880000 
      69.550000 
      84.430000 
      63.550000 
      65.520000 
      71.820000 
      63.920000 
      104.88 
      84.680000 
      72.320000 
     
    
      25% 
      38.570000 
      40.825000 
      42.020000 
      39.467500 
      43.870000 
      90.165000 
      79.915000 
      79.437500 
      82.132500 
      91.870000 
      ... 
      92.365000 
      89.860000 
      89.267500 
      81.367500 
      96.770000 
      85.950000 
      90.505000 
      104.88 
      87.405000 
      92.340000 
     
    
      50% 
      39.870000 
      43.900000 
      50.870000 
      40.825000 
      43.900000 
      96.315000 
      84.850000 
      82.325000 
      86.720000 
      98.430000 
      ... 
      98.450000 
      97.125000 
      97.185000 
      88.925000 
      101.775000 
      95.650000 
      93.930000 
      104.88 
      90.930000 
      97.150000 
     
    
      75% 
      42.050000 
      47.725000 
      84.580000 
      41.835000 
      47.930000 
      98.310000 
      94.385000 
      88.660000 
      94.820000 
      105.300000 
      ... 
      103.465000 
      105.085000 
      100.917500 
      93.815000 
      107.395000 
      102.680000 
      99.075000 
      104.88 
      91.720000 
      102.885000 
     
    
      max 
      45.450000 
      64.080000 
      110.870000 
      42.030000 
      48.980000 
      105.030000 
      103.920000 
      107.850000 
      111.450000 
      120.700000 
      ... 
      111.530000 
      116.230000 
      107.050000 
      107.180000 
      118.920000 
      114.650000 
      116.420000 
      104.88 
      103.070000 
      113.930000 
     
    
      25k 
      count 
      5.000000 
      7.000000 
      9.000000 
      4.000000 
      5.000000 
      6.000000 
      3.000000 
      24.000000 
      56.000000 
      129.000000 
      ... 
      83.000000 
      132.000000 
      30.000000 
      34.000000 
      142.000000 
      173.000000 
      99.000000 
      1.00 
      7.000000 
      139.000000 
     
    
      mean 
      51.880000 
      58.814286 
      79.596667 
      51.017500 
      57.156000 
      131.848333 
      0.000000 
      105.182083 
      110.972500 
      122.701395 
      ... 
      121.667470 
      121.539621 
      122.611000 
      110.205588 
      126.345634 
      118.554913 
      118.770707 
      134.52 
      113.978571 
      121.839065 
     
    
      std 
      4.016118 
      11.045453 
      33.893913 
      2.058517 
      3.769235 
      12.763898 
      0.000000 
      11.447189 
      11.345336 
      13.686596 
      ... 
      13.691701 
      12.751737 
      8.995522 
      12.038437 
      12.342555 
      12.530305 
      10.600679 
      NaN 
      7.683835 
      10.436578 
     
    
      min 
      48.750000 
      49.870000 
      48.750000 
      48.320000 
      52.620000 
      110.950000 
      0.000000 
      83.470000 
      88.470000 
      59.800000 
      ... 
      55.600000 
      87.630000 
      107.850000 
      80.380000 
      82.180000 
      89.780000 
      80.380000 
      134.52 
      106.220000 
      91.020000 
     
    
      25% 
      48.750000 
      51.565000 
      53.080000 
      49.955000 
      55.600000 
      125.967500 
      0.000000 
      99.322500 
      103.230000 
      115.450000 
      ... 
      116.755000 
      113.032500 
      113.882500 
      102.190000 
      121.632500 
      107.530000 
      114.025000 
      134.52 
      109.435000 
      116.000000 
     
    
      50% 
      50.500000 
      55.600000 
      63.830000 
      51.560000 
      55.630000 
      135.205000 
      0.000000 
      103.090000 
      108.550000 
      124.330000 
      ... 
      124.200000 
      122.215000 
      124.415000 
      111.065000 
      128.290000 
      119.650000 
      118.300000 
      134.52 
      113.350000 
      121.930000 
     
    
      75% 
      53.070000 
      60.690000 
      107.280000 
      52.622500 
      59.850000 
      139.072500 
      0.000000 
      111.090000 
      119.062500 
      132.670000 
      ... 
      130.400000 
      132.077500 
      128.597500 
      117.840000 
      135.377500 
      128.500000 
      124.800000 
      134.52 
      115.065000 
      129.300000 
     
    
      max 
      58.330000 
      81.720000 
      142.680000 
      52.630000 
      62.080000 
      146.480000 
      0.000000 
      135.380000 
      139.830000 
      152.270000 
      ... 
      140.670000 
      146.370000 
      138.980000 
      134.420000 
      149.020000 
      144.330000 
      145.700000 
      134.52 
      129.280000 
      143.330000 
     
    
      30k 
      count 
      5.000000 
      7.000000 
      9.000000 
      4.000000 
      5.000000 
      6.000000 
      3.000000 
      24.000000 
      56.000000 
      129.000000 
      ... 
      83.000000 
      132.000000 
      30.000000 
      34.000000 
      142.000000 
      173.000000 
      99.000000 
      1.00 
      7.000000 
      139.000000 
     
    
      mean 
      63.870000 
      73.342857 
      99.325556 
      62.542500 
      70.392000 
      164.263333 
      133.676667 
      126.957083 
      134.142857 
      149.411240 
      ... 
      148.893494 
      147.643333 
      153.571000 
      133.829118 
      153.622254 
      143.264509 
      145.048687 
      165.93 
      137.797143 
      147.807842 
     
    
      std 
      5.217461 
      14.766886 
      42.514617 
      2.234925 
      4.935693 
      14.671503 
      22.566183 
      14.021694 
      13.756113 
      16.710898 
      ... 
      16.851564 
      15.550967 
      11.652216 
      14.830801 
      14.983018 
      15.214577 
      13.007040 
      NaN 
      9.392784 
      12.755483 
     
    
      min 
      59.920000 
      61.350000 
      60.100000 
      59.450000 
      63.980000 
      137.020000 
      113.600000 
      100.530000 
      107.050000 
      73.300000 
      ... 
      68.180000 
      106.520000 
      133.630000 
      98.070000 
      100.000000 
      108.250000 
      98.370000 
      165.93 
      128.070000 
      110.350000 
     
    
      25% 
      59.920000 
      63.875000 
      65.970000 
      61.640000 
      68.000000 
      161.610000 
      121.465000 
      119.522500 
      125.080000 
      140.600000 
      ... 
      142.750000 
      136.942500 
      142.957500 
      123.702500 
      147.820000 
      129.750000 
      139.350000 
      165.93 
      132.535000 
      140.705000 
     
    
      50% 
      61.630000 
      68.180000 
      78.900000 
      63.185000 
      69.530000 
      168.615000 
      129.330000 
      124.540000 
      131.335000 
      151.420000 
      ... 
      152.100000 
      148.315000 
      154.650000 
      134.645000 
      156.040000 
      144.270000 
      144.270000 
      165.93 
      136.670000 
      148.230000 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      half 
      std 
      3.164830 
      8.989719 
      27.897963 
      1.840480 
      3.168465 
      9.724984 
      15.546177 
      9.582230 
      9.547910 
      11.433770 
      ... 
      11.458470 
      10.606785 
      7.256935 
      10.051022 
      10.237196 
      10.476615 
      8.844753 
      NaN 
      6.376530 
      8.737190 
     
    
      min 
      40.570000 
      40.930000 
      40.570000 
      40.270000 
      43.970000 
      82.530000 
      79.100000 
      70.300000 
      74.320000 
      50.280000 
      ... 
      46.180000 
      73.420000 
      89.370000 
      67.200000 
      69.100000 
      75.820000 
      67.500000 
      0.00 
      89.420000 
      76.350000 
     
    
      25% 
      40.570000 
      43.065000 
      44.180000 
      41.567500 
      46.200000 
      97.702500 
      84.285000 
      83.752500 
      86.692500 
      96.970000 
      ... 
      97.510000 
      94.867500 
      94.365000 
      85.845000 
      102.100000 
      90.630000 
      95.505000 
      0.00 
      92.150000 
      97.435000 
     
    
      50% 
      41.980000 
      46.180000 
      53.370000 
      42.985000 
      46.220000 
      102.050000 
      89.470000 
      86.815000 
      91.450000 
      104.000000 
      ... 
      103.930000 
      102.600000 
      102.725000 
      93.680000 
      107.375000 
      100.850000 
      99.030000 
      0.00 
      95.820000 
      102.480000 
     
    
      75% 
      44.200000 
      50.280000 
      89.320000 
      44.022500 
      50.270000 
      103.712500 
      99.570000 
      93.555000 
      100.055000 
      111.120000 
      ... 
      109.360000 
      110.877500 
      106.725000 
      98.972500 
      113.547500 
      108.300000 
      104.580000 
      0.00 
      96.785000 
      108.665000 
     
    
      max 
      48.080000 
      67.300000 
      117.320000 
      44.180000 
      51.600000 
      111.380000 
      109.670000 
      113.800000 
      117.580000 
      127.300000 
      ... 
      117.580000 
      122.580000 
      113.330000 
      113.070000 
      125.300000 
      120.950000 
      122.770000 
      0.00 
      108.720000 
      120.320000 
     
    
      official 
      count 
      5.000000 
      7.000000 
      9.000000 
      4.000000 
      5.000000 
      6.000000 
      3.000000 
      24.000000 
      56.000000 
      129.000000 
      ... 
      83.000000 
      132.000000 
      30.000000 
      34.000000 
      142.000000 
      173.000000 
      99.000000 
      1.00 
      7.000000 
      139.000000 
     
    
      mean 
      92.754000 
      107.681429 
      144.394444 
      90.327500 
      101.820000 
      233.056667 
      192.650000 
      180.174167 
      191.054107 
      216.295736 
      ... 
      216.733373 
      212.162879 
      237.035000 
      197.367647 
      220.125986 
      203.899538 
      214.228586 
      241.30 
      197.030000 
      211.594604 
     
    
      std 
      7.062208 
      22.301293 
      60.885144 
      3.328116 
      7.491559 
      19.637942 
      31.763106 
      20.247115 
      19.496418 
      23.989609 
      ... 
      24.611155 
      22.066124 
      17.385564 
      21.897770 
      21.434793 
      21.522678 
      19.128276 
      NaN 
      12.999608 
      18.065614 
     
    
      min 
      87.200000 
      88.430000 
      88.320000 
      85.530000 
      91.780000 
      196.830000 
      162.000000 
      142.450000 
      152.370000 
      105.400000 
      ... 
      99.220000 
      153.130000 
      209.280000 
      144.150000 
      144.080000 
      153.820000 
      145.720000 
      241.30 
      183.650000 
      158.400000 
     
    
      25% 
      87.220000 
      93.185000 
      95.980000 
      89.542500 
      97.570000 
      229.877500 
      176.265000 
      168.720000 
      178.120000 
      203.680000 
      ... 
      207.010000 
      197.675000 
      219.907500 
      182.645000 
      211.797500 
      184.820000 
      205.300000 
      241.30 
      189.740000 
      200.880000 
     
    
      50% 
      90.020000 
      101.070000 
      114.520000 
      91.330000 
      102.100000 
      238.300000 
      190.530000 
      176.470000 
      186.600000 
      217.500000 
      ... 
      222.020000 
      213.035000 
      239.545000 
      199.660000 
      223.700000 
      205.550000 
      213.550000 
      241.30 
      194.670000 
      212.150000 
     
    
      75% 
      95.480000 
      112.785000 
      194.730000 
      92.115000 
      106.900000 
      243.295000 
      207.975000 
      191.415000 
      205.380000 
      233.470000 
      ... 
      232.150000 
      229.995000 
      249.167500 
      212.777500 
      236.080000 
      220.700000 
      225.400000 
      241.30 
      198.955000 
      224.515000 
     
    
      max 
      103.850000 
      152.330000 
      254.320000 
      93.120000 
      110.750000 
      252.880000 
      225.420000 
      233.900000 
      239.970000 
      269.550000 
      ... 
      248.580000 
      252.950000 
      266.120000 
      240.600000 
      260.750000 
      248.200000 
      262.570000 
      241.30 
      223.500000 
      248.680000 
     
    
      overall 
      count 
      5.000000 
      7.000000 
      9.000000 
      4.000000 
      5.000000 
      6.000000 
      3.000000 
      24.000000 
      56.000000 
      129.000000 
      ... 
      83.000000 
      132.000000 
      30.000000 
      34.000000 
      142.000000 
      173.000000 
      99.000000 
      1.00 
      7.000000 
      139.000000 
     
    
      mean 
      9.400000 
      22.428571 
      3486.333333 
      7.750000 
      20.400000 
      13954.666667 
      5635.333333 
      2933.500000 
      4668.089286 
      10391.356589 
      ... 
      10764.903614 
      9277.303030 
      14403.333333 
      6159.823529 
      11368.014085 
      7458.953757 
      9672.101010 
      16287.00 
      5447.285714 
      9116.100719 
     
    
      std 
      8.473488 
      15.977663 
      6105.646260 
      4.991660 
      7.635444 
      4557.433123 
      6379.371860 
      3414.465774 
      4037.332361 
      4992.539485 
      ... 
      4655.468169 
      5046.808356 
      3247.004289 
      4242.884342 
      4650.684574 
      4859.636492 
      4176.880201 
      NaN 
      3199.037966 
      4228.294858 
     
    
      min 
      2.000000 
      5.000000 
      4.000000 
      1.000000 
      10.000000 
      5100.000000 
      346.000000 
      31.000000 
      103.000000 
      24.000000 
      ... 
      19.000000 
      110.000000 
      8261.000000 
      38.000000 
      37.000000 
      118.000000 
      45.000000 
      16287.00 
      2647.000000 
      236.000000 
     
    
      25% 
      3.000000 
      10.500000 
      15.000000 
      5.500000 
      16.000000 
      13803.000000 
      2093.000000 
      708.250000 
      1731.250000 
      6728.000000 
      ... 
      7646.000000 
      5291.000000 
      11226.000000 
      2510.500000 
      8929.500000 
      2812.000000 
      7180.500000 
      16287.00 
      3692.500000 
      6064.500000 
     
    
      50% 
      6.000000 
      20.000000 
      33.000000 
      9.000000 
      21.000000 
      15625.500000 
      3840.000000 
      1467.500000 
      3107.000000 
      10508.000000 
      ... 
      11844.000000 
      9269.000000 
      15993.500000 
      5772.000000 
      12270.500000 
      7243.000000 
      9421.000000 
      16287.00 
      4660.000000 
      9039.000000 
     
    
      75% 
      14.000000 
      31.000000 
      4673.000000 
      11.250000 
      26.000000 
      16563.750000 
      8280.000000 
      4011.750000 
      7200.750000 
      14654.000000 
      ... 
      14343.500000 
      13851.250000 
      17141.000000 
      9203.750000 
      15257.500000 
      11482.000000 
      12717.000000 
      16287.00 
      5602.500000 
      12505.500000 
     
    
      max 
      22.000000 
      49.000000 
      17338.000000 
      12.000000 
      29.000000 
      17305.000000 
      12720.000000 
      14768.000000 
      16083.000000 
      17568.000000 
      ... 
      17109.000000 
      17309.000000 
      17550.000000 
      16185.000000 
      17479.000000 
      17076.000000 
      17510.000000 
      16287.00 
      12234.000000 
      17115.000000 
     
    
      pace 
      count 
      5.000000 
      7.000000 
      9.000000 
      4.000000 
      5.000000 
      6.000000 
      3.000000 
      24.000000 
      56.000000 
      129.000000 
      ... 
      83.000000 
      132.000000 
      30.000000 
      34.000000 
      142.000000 
      173.000000 
      99.000000 
      1.00 
      7.000000 
      139.000000 
     
    
      mean 
      3.542000 
      4.115714 
      5.512222 
      3.452500 
      3.888000 
      8.898333 
      7.353333 
      6.880833 
      7.294643 
      8.258295 
      ... 
      8.274578 
      8.100530 
      9.047667 
      7.535000 
      8.404225 
      7.785202 
      8.178182 
      9.22 
      7.524286 
      8.079353 
     
    
      std 
      0.272617 
      0.851545 
      2.321432 
      0.128679 
      0.287002 
      0.747942 
      1.211666 
      0.773844 
      0.742960 
      0.914664 
      ... 
      0.940157 
      0.840754 
      0.662777 
      0.836465 
      0.817842 
      0.820805 
      0.730255 
      NaN 
      0.493824 
      0.688615 
     
    
      min 
      3.330000 
      3.380000 
      3.380000 
      3.270000 
      3.500000 
      7.520000 
      6.180000 
      5.430000 
      5.820000 
      4.030000 
      ... 
      3.780000 
      5.850000 
      7.980000 
      5.500000 
      5.500000 
      5.880000 
      5.570000 
      9.22 
      7.020000 
      6.050000 
     
    
      25% 
      3.330000 
      3.560000 
      3.670000 
      3.420000 
      3.730000 
      8.772500 
      6.730000 
      6.447500 
      6.807500 
      7.780000 
      ... 
      7.905000 
      7.545000 
      8.392500 
      6.972500 
      8.090000 
      7.050000 
      7.830000 
      9.22 
      7.245000 
      7.670000 
     
    
      50% 
      3.430000 
      3.870000 
      4.370000 
      3.485000 
      3.900000 
      9.100000 
      7.280000 
      6.740000 
      7.125000 
      8.300000 
      ... 
      8.480000 
      8.135000 
      9.145000 
      7.620000 
      8.530000 
      7.850000 
      8.150000 
      9.22 
      7.430000 
      8.100000 
     
    
      75% 
      3.650000 
      4.310000 
      7.430000 
      3.517500 
      4.080000 
      9.292500 
      7.940000 
      7.307500 
      7.835000 
      8.920000 
      ... 
      8.865000 
      8.782500 
      9.515000 
      8.122500 
      9.020000 
      8.430000 
      8.610000 
      9.22 
      7.600000 
      8.575000 
     
    
      max 
      3.970000 
      5.820000 
      9.700000 
      3.570000 
      4.230000 
      9.650000 
      8.600000 
      8.930000 
      9.170000 
      10.280000 
      ... 
      9.480000 
      9.650000 
      10.150000 
      9.180000 
      9.950000 
      9.470000 
      10.020000 
      9.22 
      8.530000 
      9.500000 
     
  
120 rows × 251 columns
From all the clustering techniques that have been used_ kmeans, spectral, mean shift and affinity, the ones that present more stability in terms of the variance withn the clusters are kmeans and spectral clustering. When the bandwidth is close to quantile 1 (0.9) then the number of clusters obtained with mean shift is reduced to 18 (from 58 in the case of using quantile 0.25). In this case most of the clusters are empty reason why this clustering method has been discarded. The same case applies to the Affinity clustering as there are 251 clusters with less than 1% of the data in each of them.
From the kmeans and spectral clustering perspective, each cluster contains between 1% and 43% of the datapoints. In this case, the best one from a similarity analysis perspective is the kmeans as with less clusters achieves similar silhouette values. For the kmeans cluster, the best solution is 3 clusters from an elbow methodology perspective although the last cluster containes less than 1% of the data, so 2 clusters should be considered.
rom the 3 clusters we can see that in the first cluster men finished the marathon quicker than women with an average pace of 7.36 bein the official timings lower in all cases. Additionally, in cluster one we see that men are younger than women entering in position 192 against women 218. The third cluster men and women have similar age and there is also a difference in 20 positions from the first to the last person in the cluster of people aged on average 48 years old.
Content source: borja876/Thinkful-DataScience-Borja
Similar notebooks: