In [1]:
import numpy as np

from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LogisticRegression, Lasso, LassoCV

from yellowbrick.cluster import *
from yellowbrick.features import FeatureImportances
from yellowbrick.classifier import ROCAUC, DiscriminationThreshold
from yellowbrick.classifier import ClassPredictionError, ConfusionMatrix
from yellowbrick.datasets import load_occupancy, load_energy, load_credit
from yellowbrick.classifier import ClassificationReport, PrecisionRecallCurve
from yellowbrick.regressor import PredictionError, ResidualsPlot, AlphaSelection

Check if fitted on Classifiers


In [2]:
X, y = load_occupancy(return_dataset=True).to_numpy()
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.20)

In [3]:
unfitted_model = LogisticRegression(solver='lbfgs')
fitted_model = unfitted_model.fit(X_train, y_train)

In [4]:
oz = ClassPredictionError(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [5]:
oz = ClassPredictionError(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [6]:
oz = ClassificationReport(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [7]:
oz = ClassificationReport(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [8]:
oz = ConfusionMatrix(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [9]:
oz = ConfusionMatrix(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [10]:
oz = PrecisionRecallCurve(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [11]:
oz = PrecisionRecallCurve(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [12]:
oz = ROCAUC(fitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [13]:
oz = ROCAUC(unfitted_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [14]:
oz = DiscriminationThreshold(fitted_model)
oz.fit(X, y)
oz.show()



In [15]:
oz = DiscriminationThreshold(unfitted_model)
oz.fit(X, y)
oz.show()


Check if fitted on Feature Visualizers*

Just the ones that inherit from ModelVisualizer


In [16]:
viz = FeatureImportances(fitted_model)
viz.fit(X, y)
viz.show()


/Users/rbilbro/pyjects/my_yb/yellowbrick/features/importances.py:159: YellowbrickWarning: detected multi-dimensional feature importances but stack=False, using mean to aggregate them.
  ), YellowbrickWarning)

In [17]:
viz = FeatureImportances(unfitted_model)
viz.fit(X, y)
viz.show()


/Users/rbilbro/pyjects/my_yb/yellowbrick/features/importances.py:159: YellowbrickWarning: detected multi-dimensional feature importances but stack=False, using mean to aggregate them.
  ), YellowbrickWarning)

In [18]:
# NOTE: Not sure how to deal with Recursive Feature Elimination

Check if fitted on Regressors


In [19]:
X, y = load_energy(return_dataset=True).to_numpy()
X_train, X_test, y_train, y_test = tts(X, y, test_size=0.20)

In [20]:
unfitted_nonlinear_model = RandomForestRegressor(n_estimators=10)
fitted_nonlinear_model = unfitted_nonlinear_model.fit(X_train, y_train)

In [21]:
unfitted_linear_model = Lasso()
fitted_linear_model = unfitted_linear_model.fit(X_train, y_train)

In [22]:
oz = PredictionError(unfitted_linear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [23]:
oz = PredictionError(fitted_linear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [24]:
oz = ResidualsPlot(unfitted_linear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [25]:
oz = ResidualsPlot(fitted_linear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [26]:
oz = ResidualsPlot(unfitted_nonlinear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [27]:
oz = ResidualsPlot(fitted_nonlinear_model)
oz.fit(X_train, y_train)
oz.score(X_test, y_test)
oz.show()



In [28]:
unfitted_cv_model = LassoCV(alphas=[.01,1,10], cv=3)
fitted_cv_model = unfitted_cv_model.fit(X, y)

In [29]:
oz = AlphaSelection(unfitted_cv_model)
oz.fit(X, y)
oz.show()



In [30]:
oz = AlphaSelection(fitted_cv_model)
oz.fit(X, y)
oz.show()


Check if fitted on Clusterers


In [31]:
X, _ = load_credit(return_dataset=True).to_numpy()

In [32]:
unfitted_cluster_model = KMeans(6)
fitted_cluster_model = unfitted_cluster_model.fit(X)

In [33]:
# NOTE: Not sure how to deal with K-Elbow and prefitted models...

# visualizer = KElbowVisualizer(unfitted_cluster_model, k=(4,12))
# visualizer.fit(X)
# visualizer.show()

In [34]:
# visualizer = KElbowVisualizer(fitted_cluster_model, k=(4,12))
# visualizer.fit(X)
# visualizer.show()

In [35]:
# NOTE: Silhouette Scores doesn't have a quick method
visualizer = SilhouetteVisualizer(unfitted_cluster_model)
visualizer.fit(X)
visualizer.show()



In [36]:
visualizer = SilhouetteVisualizer(fitted_cluster_model)
visualizer.fit(X)
visualizer.show()



In [37]:
visualizer = InterclusterDistance(unfitted_cluster_model)
visualizer.fit(X)
visualizer.show()



In [38]:
visualizer = InterclusterDistance(fitted_cluster_model)
visualizer.fit(X)
visualizer.show()


Check if fitted on Model Selection Visualizers

NOTE: Not sure how to proceed with multi-model visualizers -- is already fitted a real use case here?