In [1]:
%matplotlib inline

Yellowbrick Feature Importance Examples

This notebook is a sample of the feature importance examples that yellowbrick provides.


In [2]:
import os
import sys
sys.path.insert(0, "../..")

import importlib
import numpy as np
import pandas as pd
import yellowbrick
import yellowbrick as yb
from yellowbrick.features.importances import FeatureImportances
import matplotlib as mpl
import matplotlib.pyplot as plt

from sklearn import manifold, datasets
from sklearn.linear_model import LogisticRegression, LinearRegression

mpl.rcParams["figure.figsize"] = (9,6)

Load Iris Datasets for Example Code


In [106]:
X_iris, y_iris = datasets.load_iris(True)
X_iris_pd = pd.DataFrame(X_iris, columns=['f1', 'f2', 'f3', 'f4'])

Logistic Regression with Mean of Feature Importances

Should we normalize relative to maximum value or maximum absolute value?


In [126]:
viz = FeatureImportances(LogisticRegression())
viz.fit(X_iris, y_iris)
viz.show()



In [113]:
viz = FeatureImportances(LogisticRegression(), relative=False)
viz.fit(X_iris, y_iris)
viz.show()



In [114]:
viz = FeatureImportances(LogisticRegression(), absolute=True)
viz.fit(X_iris, y_iris)
viz.show()



In [116]:
viz = FeatureImportances(LogisticRegression(), relative=False, absolute=True)
viz.fit(X_iris, y_iris)
viz.show()


Logistic Regression with Stacked Feature Importances

Need to decide how to scale scale feature importance when relative=True


In [127]:
viz = FeatureImportances(LogisticRegression(), stack=True)
viz.fit(X_iris, y_iris)
viz.show()



In [128]:
viz = FeatureImportances(LogisticRegression(), stack=True, relative=False)
viz.fit(X_iris, y_iris)
viz.show()



In [129]:
viz = FeatureImportances(LogisticRegression(), stack=True, absolute=True)
viz.fit(X_iris, y_iris)
viz.show()



In [130]:
viz = FeatureImportances(LogisticRegression(), stack=True, relative=False, absolute=True)
viz.fit(X_iris, y_iris)
viz.show()


Load Digits Datasets for Example Code

Should we add an option to show only top n features?


In [121]:
X_digits, y_digits = datasets.load_digits(return_X_y=True)

In [124]:
viz = FeatureImportances(LogisticRegression(), stack=True, relative=True)
viz.fit(X_digits, y_digits)
viz.show()


Linear Regression


In [131]:
viz = FeatureImportances(LinearRegression())
viz.fit(X_iris, y_iris)
viz.show()



In [132]:
viz = FeatureImportances(LinearRegression(), stack=True)
viz.fit(X_iris, y_iris)
viz.show()


Playground


In [102]:
importlib.reload(yellowbrick.features.importances)
from yellowbrick.features.importances import FeatureImportances

In [103]:
viz = FeatureImportances(LogisticRegression(), relative=False, absolute=False, stack=True)
viz.fit(X_pd, y)
viz.show()