notebook.community

Edit and run



In [1]:

    
from sklearn.datasets import load_boston
import sklearn.ensemble
import numpy as np
from __future__ import print_function



In [2]:

    
boston = load_boston()



In [3]:

    
rf = sklearn.ensemble.RandomForestRegressor(n_estimators=1000)



In [4]:

    
train, test, labels_train, labels_test = sklearn.cross_validation.train_test_split(boston.data, boston.target, train_size=0.80)



In [5]:

    
rf.fit(train, labels_train)









    Out[5]:





RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=1000, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False)



In [6]:

    
print('Random Forest MSError', np.mean((rf.predict(test) - labels_test) ** 2))









    



Random Forest MSError 14.9115438028



In [7]:

    
print('MSError when predicting the mean mean', np.mean((labels_train.mean() - labels_test) ** 2))









    



MSError when predicting the mean mean 89.7125582736



In [8]:

    
boston.feature_names









    Out[8]:





array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], 
      dtype='|S7')



In [9]:

    
categorical_features = np.argwhere(np.array([len(set(boston.data[:,x])) for x in range(boston.data.shape[1])]) <= 10).flatten()



In [10]:

    
import lime
import lime.lime_tabular



In [11]:

    
explainer = lime.lime_tabular.LimeTabularExplainer(train, feature_names=boston.feature_names, class_names=['price'], categorical_features=categorical_features, verbose=True)



In [12]:

    
predict_fn = lambda x: rf.predict(x).reshape(-1,1)



In [13]:

    
np.argmin(predict_fn(test))









    Out[13]:





11



In [14]:

    
i = 8
exp = explainer.explain_instance(test[i], predict_fn, labels=[0], num_features=5)









    



Intercept 23.705428679
Prediction_local [ 19.44210311]
Right: 12.4991



In [15]:

    
exp.as_list(0)









    Out[15]:





[('LSTAT', -3.8488800215287866),
 ('RM', 2.72730759682283),
 ('DIS', -1.8314574638891676),
 ('PTRATIO', -0.4657069420491039),
 ('NOX', -0.41191353773578032)]



In [16]:

    
print('Prediction', predict_fn(test[i].reshape(1,-1))[0,0])
print('True', labels_test[i])









    



Prediction 12.4991
True 11.0



In [17]:

    
exp.show_in_notebook(predict_proba=False)



In [18]:

    
list(boston.feature_names).index('LSTAT')









    Out[18]:





12



In [19]:

    
explainer.scaler.scale_[12]









    Out[19]:





7.2090353371977915



In [21]:

    
x = test[i].copy()
before = predict_fn(x.reshape(1, -1))
x[12] = x[12]  - 1.22  * explainer.scaler.scale_[12] 
after = predict_fn(x.reshape(1, -1))
print('Before', before)
print('After', after)
print('Difference',  after - before)









    



Before [[ 12.4991]]
After [[ 18.2741]]
Difference [[ 5.775]]