In [128]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn import partial_dependence
import numpy as np
from scipy.stats import norm
X = np.random.normal(0, 5, size= (1000 , 3))
B= np.array([1.22, -6.2, 0.2])
y = np.dot(X, B) + norm(0, 5).rvs(1000)
y_binary = y > 0.
rf_r_model = RandomForestRegressor()
rf_r_model.fit(X, y)
svm_model = SVC()
svm_model.fit(X, y_binary)
logit_model = LogisticRegression()
logit_model.fit(X, y_binary)
Out[128]:
In [97]:
def f(array):
return (np.sum(X, axis=1) > 0.1).astype(float)
from sklearn.dummy import DummyRegressor
c = DummyRegressor()
c.fit(X, y)
c.predict = f
pdp, axes = partial_dependence.partial_dependence(c, [0, 1], X = X, method='exact')
In [104]:
pdp, axes = partial_dependence.partial_dependence(logit_model, [0, 1], X = X, method='exact')
In [109]:
pdp, axes = partial_dependence.partial_dependence(logit_model, [0, 1], X = X, method='estimated')
In [124]:
pdp_exact, axes_exact = partial_dependence.partial_dependence(rf_r_model, [0], X = X, method='exact')
pdp_est, axes_est = partial_dependence.partial_dependence(rf_r_model, [0], X = X, method='estimated')
pdp_rec, axes_rec = partial_dependence.partial_dependence(rf_r_model, [0], X = X, method='recursion')
ax = pd.DataFrame(pdp_exact[0], index = axes_exact[0], columns=['exact']).plot()
pd.DataFrame(pdp_est[0], index = axes_est[0], columns=['estimated']).plot(ax=ax)
pd.DataFrame(pdp_rec[0], index = axes_rec[0], columns=['recursion']).plot(ax=ax)
Out[124]:
In [127]:
grid = [i / 10. for i in range(-70, 90)]
pdp_exact, axes_exact = partial_dependence.partial_dependence(rf_r_model, [0], grid = grid, X=X, method='exact')
pdp_est, axes_est = partial_dependence.partial_dependence(rf_r_model, [0], grid = grid, X=X, method='estimated')
pdp_rec, axes_rec = partial_dependence.partial_dependence(rf_r_model, [0], grid = grid, X=X, method='recursion')
ax = pd.DataFrame(pdp_exact[0], index = axes_exact[0], columns=['exact']).plot()
pd.DataFrame(pdp_est[0], index = axes_est[0], columns=['estimated']).plot(ax=ax)
pd.DataFrame(pdp_rec[0], index = axes_rec[0], columns=['recursion']).plot(ax=ax)
In [132]:
def get_multiclass(x, Y):
if x > np.percentile(Y, .66):
return 1
elif x < np.percentile(Y, .33):
return -1
else:
return 0
y_multiclass = np.array(map(lambda x: get_multiclass(x, y), y))
rf_multi_model = RandomForestClassifier()
rf_multi_model.fit(X, y_multiclass)
pdp_exact, axes_exact = partial_dependence.partial_dependence(rf_multi_model, [0], X=X, method='exact')
In [ ]:
In [ ]: