In [2]:
import pandas as pd
df = pd.read_csv('titanic_train_new.csv')
display(df.head())
X = df.drop(['Survived', 'PassengerId'], axis=1)
y = df['Survived']
from xtoy import Toy
toy = Toy(use_lightgbm=False, use_xgboost=True)
toy.fit(X ,y)


PassengerId Survived Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 1 0 3 Braund, Mr. Owen Harris male 22.0 1 0 A/5 21171 7.2500 NaN S
1 2 1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 0 PC 17599 71.2833 C85 C
2 3 1 3 Heikkinen, Miss. Laina female 26.0 0 0 STON/O2. 3101282 7.9250 NaN S
3 4 1 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 0 113803 53.1000 C123 S
4 5 0 3 Allen, Mr. William Henry male 35.0 0 0 373450 8.0500 NaN S
/Users/sethuiyer/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d
make_scorer(f1_weighted_score)
estimator: ridge
unique_combinations 16
Types [2] and maxint [15] detected
--- Evolve in 16 possible combinations ---
gen	nevals	avg     	min     	max     
0  	5     	0.689742	0.654157	0.733195
1  	2     	0.679021	0.640221	0.702697
2  	2     	0.689675	0.6583  	0.702697
3  	4     	0.682846	0.659591	0.702697
4  	3     	0.674335	0.621703	0.702697
5  	5     	0.68524 	0.665538	0.695859
6  	2     	0.701537	0.686516	0.73597 
7  	4     	0.694487	0.671264	0.714945
8  	3     	0.696822	0.67901 	0.714945
9  	3     	0.690318	0.623581	0.714945
10 	2     	0.715361	0.70439 	0.721945
(0.7359696065779963,)
cbp {'estimator__alpha': 0.001}
Best individual is: {'estimator__alpha': 0.001}
with fitness: 0.7359696065779963
estimator: rf
unique_combinations 216
Types [1, 1, 1, 1, 1, 1] and maxint [5, 3, 2, 2, 0, 0] detected
--- Evolve in 216 possible combinations ---
gen	nevals	avg     	min    	max    
0  	5     	0.795542	0.77349	0.81893
1  	2     	0.80894 	0.788244	0.822922
2  	3     	0.797709	0.763932	0.81893 
3  	4     	0.800394	0.785012	0.824188
4  	1     	0.805025	0.779771	0.824188
5  	5     	0.787438	0.762702	0.840506
6  	4     	0.789424	0.764625	0.840506
7  	3     	0.805538	0.791489	0.840506
8  	4     	0.788507	0.77228 	0.800128
9  	4     	0.773438	0.757836	0.785949
10 	4     	0.790818	0.768333	0.803311
11 	0     	0.803311	0.803311	0.803311
12 	2     	0.800764	0.776111	0.817775
13 	0     	0.809096	0.803311	0.817775
14 	4     	0.804443	0.788986	0.817775
15 	2     	0.805595	0.785612	0.817775
16 	4     	0.787949	0.767265	0.817775
17 	4     	0.800028	0.774016	0.817775
18 	4     	0.794458	0.747996	0.808771
19 	1     	0.801319	0.777041	0.808771
20 	4     	0.78454 	0.772483	0.800024
21 	3     	0.794575	0.762681	0.819709
(0.8241883798898725,)
cbp {'estimator__max_features': 0.5, 'estimator__max_depth': 10, 'estimator__min_samples_leaf': 5, 'estimator__min_samples_split': 20, 'estimator__n_estimators': 1000, 'estimator__class_weight': 'balanced'}
Best individual is: {'estimator__max_features': 0.5, 'estimator__max_depth': 10, 'estimator__min_samples_leaf': 5, 'estimator__min_samples_split': 20, 'estimator__n_estimators': 1000, 'estimator__class_weight': 'balanced'}
with fitness: 0.8241883798898725
estimator: knn
unique_combinations 336
Types [1, 1, 1, 1] and maxint [5, 6, 3, 1] detected
--- Evolve in 336 possible combinations ---
gen	nevals	avg     	min     	max     
0  	5     	0.728349	0.699398	0.753189
1  	5     	0.733158	0.703808	0.78842 
2  	4     	0.739315	0.678993	0.775224
3  	5     	0.732318	0.713896	0.754946
4  	4     	0.723504	0.663968	0.754946
5  	2     	0.7392  	0.71354 	0.754946
6  	2     	0.738477	0.695622	0.754946
7  	4     	0.737678	0.701343	0.777205
8  	3     	0.734933	0.699171	0.777205
9  	1     	0.75768 	0.701839	0.777205
10 	0     	0.762132	0.701839	0.777205
11 	2     	0.763111	0.72634 	0.777205
12 	4     	0.726617	0.67593 	0.777205
13 	4     	0.735894	0.711275	0.777205
14 	2     	0.728609	0.711275	0.749758
15 	4     	0.726591	0.691991	0.749758
16 	4     	0.701094	0.650055	0.742242
17 	2     	0.733319	0.704893	0.742242
18 	2     	0.722204	0.642465	0.742242
19 	2     	0.729732	0.710871	0.742242
20 	1     	0.735558	0.708823	0.742242
21 	4     	0.718573	0.681926	0.742818
22 	4     	0.73579 	0.684558	0.752722
23 	3     	0.72    	0.676035	0.752202
24 	2     	0.750753	0.747225	0.758339
25 	3     	0.732015	0.702391	0.748774
26 	2     	0.735641	0.710112	0.748774
27 	4     	0.718904	0.69773 	0.748774
28 	3     	0.734688	0.689374	0.748774
29 	2     	0.753876	0.748197	0.769271
30 	2     	0.738207	0.709012	0.769271
31 	3     	0.719145	0.684383	0.754364
32 	1     	0.741529	0.696356	0.754364
33 	5     	0.72871 	0.71174 	0.737569
(0.7884199758303595,)
cbp {'estimator__n_neighbors': 5, 'estimator__leaf_size': 3, 'estimator__p': 2, 'estimator__weights': 'distance'}
Best individual is: {'estimator__n_neighbors': 5, 'estimator__leaf_size': 3, 'estimator__p': 2, 'estimator__weights': 'distance'}
with fitness: 0.7884199758303595
estimator: xgb
unique_combinations 270
Types [1, 1, 2, 1] and maxint [4, 5, 8, 0] detected
--- Evolve in 270 possible combinations ---
gen	nevals	avg     	min     	max     
0  	5     	0.808367	0.791833	0.820858
1  	3     	0.797774	0.774765	0.816199
2  	2     	0.810423	0.801415	0.816199
3  	4     	0.796619	0.756186	0.843127
4  	3     	0.817328	0.788144	0.843127
5  	1     	0.809282	0.747912	0.843127
6  	2     	0.81195 	0.798454	0.843127
7  	1     	0.818529	0.790227	0.843127
8  	2     	0.820652	0.764773	0.843127
9  	4     	0.797062	0.737579	0.843127
10 	2     	0.813633	0.791774	0.843127
11 	5     	0.800168	0.77124 	0.851061
12 	1     	0.818479	0.78652 	0.851061
13 	0     	0.831387	0.789613	0.851061
14 	2     	0.832532	0.791779	0.851061
15 	2     	0.838702	0.804821	0.851061
16 	3     	0.828386	0.796409	0.851061
17 	2     	0.833577	0.812072	0.851061
18 	1     	0.834974	0.819057	0.851061
19 	3     	0.820793	0.795979	0.851061
20 	2     	0.83296 	0.808224	0.851061
21 	4     	0.81702 	0.782231	0.844766
22 	3     	0.82765 	0.80363 	0.844766
23 	2     	0.833485	0.825278	0.844766
24 	3     	0.827005	0.802584	0.853273
25 	1     	0.835164	0.822451	0.853273
26 	2     	0.820604	0.776367	0.853273
27 	3     	0.814249	0.79489 	0.833364
(0.8510609205371467,)
cbp {'estimator__max_depth': 20, 'estimator__min_child_weight': 15, 'estimator__learning_rate': 0.3, 'estimator__n_estimators': 100}
Best individual is: {'estimator__max_depth': 20, 'estimator__min_child_weight': 15, 'estimator__learning_rate': 0.3, 'estimator__n_estimators': 100}
with fitness: 0.8510609205371467
best: Pipeline(memory=None,
     steps=[('scaler', Normalizer(copy=True, norm='l2')), ('estimator', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.3, max_delta_step=0,
       max_depth=20, min_child_weight=15, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1))])
Out[2]:
Pipeline(memory=None,
     steps=[('scaler', Normalizer(copy=True, norm='l2')), ('estimator', XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.3, max_delta_step=0,
       max_depth=20, min_child_weight=15, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1))])

In [3]:
df_test = pd.read_csv('titanic_test.csv')
X_test = df_test.drop(['Survived','PassengerId'], axis=1)
X_test = toy.featurizer.imputer.transform(X_test)
y_test = df_test['Survived']

In [9]:
X_test


Out[9]:
Pclass Name Sex Age SibSp Parch Ticket Fare Cabin Embarked
0 3 Braund, Mr. Owen Harris male 22.000000 1 0 A/5 21171 7.2500 B96 B98 S
1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.000000 1 0 PC 17599 71.2833 C85 C
2 3 Heikkinen, Miss. Laina female 26.000000 0 0 STON/O2. 3101282 7.9250 B96 B98 S
3 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.000000 1 0 113803 53.1000 C123 S
4 3 Allen, Mr. William Henry male 35.000000 0 0 373450 8.0500 B96 B98 S
... ... ... ... ... ... ... ... ... ... ...
886 2 Montvila, Rev. Juozas male 27.000000 0 0 211536 13.0000 B96 B98 S
887 1 Graham, Miss. Margaret Edith female 19.000000 0 0 112053 30.0000 B42 S
888 3 Johnston, Miss. Catherine Helen "Carrie" female 29.699118 1 2 W./C. 6607 23.4500 B96 B98 S
889 1 Behr, Mr. Karl Howell male 26.000000 0 0 111369 30.0000 C148 C
890 3 Dooley, Mr. Patrick male 32.000000 0 0 370376 7.7500 B96 B98 Q

891 rows × 10 columns


In [5]:
from interpret import show
from interpret.perf import ROC

blackbox_perf = ROC(toy.predict_proba).explain_perf(X_test, y_test, name='Blackbox')


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2896             try:
-> 2897                 return self._engine.get_loc(key)
   2898             except KeyError:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()

KeyError: 'Age'

During handling of the above exception, another exception occurred:

KeyError                                  Traceback (most recent call last)
<ipython-input-5-9263b995b073> in <module>
      2 from interpret.perf import ROC
      3 
----> 4 blackbox_perf = ROC(toy.predict_proba).explain_perf(X_test, y_test, name='Blackbox')

~/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/interpret/perf/curve.py in explain_perf(self, X, y, name)
     73             X, y, self.feature_names, self.feature_types
     74         )
---> 75         predict_fn = unify_predict_fn(self.predict_fn, X)
     76         scores = predict_fn(X)
     77 

~/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/interpret/utils/all.py in unify_predict_fn(predict_fn, X)
    188 
    189 def unify_predict_fn(predict_fn, X):
--> 190     predictions = predict_fn(X[:1])
    191     if predictions.ndim == 2:
    192         new_predict_fn = lambda x: predict_fn(x)[:, 1]  # noqa: E731

~/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/xtoy/toys.py in predict_proba(self, X)
    136 
    137     def predict_proba(self, X):
--> 138         X = self.featurizer.transform(X).A
    139         return self.best_evo.predict_proba(X)
    140 

~/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/xtoy/prep.py in transform(self, X, y)
    218         date_groups = []
    219         for col in self.missing_col_names:
--> 220             res.append(pd.isnull(X[col]))
    221         X = self.imputer.transform(X)
    222         for i, col in enumerate(X):

~/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/pandas/core/frame.py in __getitem__(self, key)
   2993             if self.columns.nlevels > 1:
   2994                 return self._getitem_multilevel(key)
-> 2995             indexer = self.columns.get_loc(key)
   2996             if is_integer(indexer):
   2997                 indexer = [indexer]

~/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/pandas/core/indexes/range.py in get_loc(self, key, method, tolerance)
    377             except ValueError:
    378                 raise KeyError(key)
--> 379         return super().get_loc(key, method=method, tolerance=tolerance)
    380 
    381     @Appender(_index_shared_docs["get_indexer"])

~/Documents/Workspace/interpret/venv/lib/python3.6/site-packages/pandas/core/indexes/base.py in get_loc(self, key, method, tolerance)
   2897                 return self._engine.get_loc(key)
   2898             except KeyError:
-> 2899                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2900         indexer = self.get_indexer([key], method=method, tolerance=tolerance)
   2901         if indexer.ndim > 1 or indexer.size > 1:

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index.pyx in pandas._libs.index.IndexEngine.get_loc()

pandas/_libs/index_class_helper.pxi in pandas._libs.index.Int64Engine._check_type()

KeyError: 'Age'

In [ ]: