In [1]:

    
import numpy as np

from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LogisticRegression, Lasso, LassoCV

from yellowbrick.cluster import *
from yellowbrick.features import FeatureImportances
from yellowbrick.classifier import ROCAUC, DiscriminationThreshold
from yellowbrick.classifier import ClassPredictionError, ConfusionMatrix
from yellowbrick.datasets import load_occupancy, load_energy, load_credit
from yellowbrick.classifier import ClassificationReport, PrecisionRecallCurve
from yellowbrick.regressor import PredictionError, ResidualsPlot, AlphaSelection

Check if fitted on Classifiers



In [2]:

    
X, y = load_occupancy(return_dataset=True).to_numpy()
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.20)



In [3]:

    
unfitted_model = LogisticRegression(solver='lbfgs')
fitted_model = unfitted_model.fit(X_train, y_train)



In [4]:

    
oz = ClassPredictionError(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [5]:

    
oz = ClassPredictionError(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [6]:

    
oz = ClassificationReport(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [7]:

    
oz = ClassificationReport(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [8]:

    
oz = ConfusionMatrix(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [9]:

    
oz = ConfusionMatrix(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [10]:

    
oz = PrecisionRecallCurve(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [11]:

    
oz = PrecisionRecallCurve(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [12]:

    
oz = ROCAUC(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [13]:

    
oz = ROCAUC(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [14]:

    
oz = DiscriminationThreshold(fitted_model)
oz.fit(X, y)
oz.show()



In [15]:

    
oz = DiscriminationThreshold(unfitted_model)
oz.fit(X, y)
oz.show()

Check if fitted on Feature Visualizers*

Just the ones that inherit from ModelVisualizer



In [16]:

    
viz = FeatureImportances(fitted_model)
viz.fit(X, y)
viz.show()









    



/Users/rbilbro/pyjects/my_yb/yellowbrick/features/importances.py:159: YellowbrickWarning: detected multi-dimensional feature importances but stack=False, using mean to aggregate them.
  ), YellowbrickWarning)



In [17]:

    
viz = FeatureImportances(unfitted_model)
viz.fit(X, y)
viz.show()









    



/Users/rbilbro/pyjects/my_yb/yellowbrick/features/importances.py:159: YellowbrickWarning: detected multi-dimensional feature importances but stack=False, using mean to aggregate them.
  ), YellowbrickWarning)



In [18]:

    
# NOTE: Not sure how to deal with Recursive Feature Elimination

Check if fitted on Regressors



In [19]:

    
X, y = load_energy(return_dataset=True).to_numpy()
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.20)



In [20]:

    
unfitted_nonlinear_model = RandomForestRegressor(n_estimators=10)
fitted_nonlinear_model = unfitted_nonlinear_model.fit(X_train, y_train)



In [21]:

    
unfitted_linear_model = Lasso()
fitted_linear_model = unfitted_linear_model.fit(X_train, y_train)



In [22]:

    
oz = PredictionError(unfitted_linear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [23]:

    
oz = PredictionError(fitted_linear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [24]:

    
oz = ResidualsPlot(unfitted_linear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [25]:

    
oz = ResidualsPlot(fitted_linear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [26]:

    
oz = ResidualsPlot(unfitted_nonlinear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [27]:

    
oz = ResidualsPlot(fitted_nonlinear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [28]:

    
unfitted_cv_model = LassoCV(alphas=[.01,1,10], cv=3)
fitted_cv_model = unfitted_cv_model.fit(X, y)



In [29]:

    
oz = AlphaSelection(unfitted_cv_model)
oz.fit(X, y)
oz.show()



In [30]:

    
oz = AlphaSelection(fitted_cv_model)
oz.fit(X, y)
oz.show()

Check if fitted on Clusterers



In [31]:

    
X, _ = load_credit(return_dataset=True).to_numpy()



In [32]:

    
unfitted_cluster_model = KMeans(6)
fitted_cluster_model = unfitted_cluster_model.fit(X)



In [33]:

    
# NOTE: Not sure how to deal with K-Elbow and prefitted models...

# visualizer = KElbowVisualizer(unfitted_cluster_model, k=(4,12))
# visualizer.fit(X)
# visualizer.show()



In [34]:

    
# visualizer = KElbowVisualizer(fitted_cluster_model, k=(4,12))
# visualizer.fit(X)
# visualizer.show()



In [35]:

    
# NOTE: Silhouette Scores doesn't have a quick method
visualizer = SilhouetteVisualizer(unfitted_cluster_model)
visualizer.fit(X)
visualizer.show()



In [36]:

    
visualizer = SilhouetteVisualizer(fitted_cluster_model)
visualizer.fit(X)
visualizer.show()



In [37]:

    
visualizer = InterclusterDistance(unfitted_cluster_model)
visualizer.fit(X)
visualizer.show()



In [38]:

    
visualizer = InterclusterDistance(fitted_cluster_model)
visualizer.fit(X)
visualizer.show()

Check if fitted on Model Selection Visualizers

NOTE: Not sure how to proceed with multi-model visualizers -- is already fitted a real use case here?