In [1]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer, SilhouetteVisualizer
import yellowbrick.datasets as ybdata
%matplotlib inline
In [2]:
nfl_df = (ybdata.load_nfl(return_dataset=True)
.to_dataframe()
.query('Tgt >= 20'))
In [3]:
nfl_df.head()
Out[3]:
In [4]:
features = ['Rec', 'Yds', 'TD', 'Fmb', 'Ctch_Rate']
X = nfl_df[features].values
In [5]:
elbow_visualizer = KElbowVisualizer(KMeans(random_state=42), k=(2,10), timings=False)
elbow_visualizer.fit(X)
elbow_visualizer.size = (600, 400)
elbow_visualizer.show()
In [6]:
model = KMeans(4, random_state=42)
silhouette_visualizer = SilhouetteVisualizer(model, colors='yellowbrick')
silhouette_visualizer.fit(X)
elbow_visualizer.size = (600, 400)
silhouette_visualizer.show()
In [ ]: