In [2]:
import pandas as pd

%load_ext autoreload
%autoreload 2

import sys
import os
sys.path.append(os.path.abspath('..'))
from category_encoder_comparison import titanic_experiment, beeline_experiment


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [3]:
titanic_experiment({
    'category_encoding': 'kfold_mean'
})

In [5]:
stats = pd.read_json('titanic.json').sort_values('score-mean')
stats.T[stats.nunique(dropna=False) > 1].T


Out[5]:
category_encoding exec-time-sec imputation multi_class score-mean score-std
3 empirical_bayes_vibrant 2.04532 NaN NaN 0.866682 0.0178235
0 onehot 0.687949 NaN NaN 0.86683 0.02215
2 empirical_bayes 0.989245 NaN NaN 0.867268 0.0264805
5 noisy_mean 0.67812 most_frequent 0 0.867468 0.0244902
4 yandex_mean 0.99721 most_frequent 0 0.867509 0.026371
1 count 0.504651 NaN NaN 0.867574 0.0229623
6 kfold_mean 1.44304 most_frequent 0 0.867599 0.027625

In [3]:
beeline_experiment({
    'category_encoding': 'kfold_mean'
})

In [4]:
stats = pd.read_json('beeline.json').sort_values('score-mean')
stats.T[stats.nunique() > 1].T


Out[4]:
category_encoding exec-time-sec score-mean score-std
1 onehot 49.0752 0.451683 0.0200957
4 yandex_mean 20.4326 0.5209 0.00292436
2 empirical_bayes 20.4211 0.524902 0.0057843
5 noisy_mean 24.7269 0.566999 0.0133952
6 kfold_mean 33.6657 0.568895 0.0134582
3 empirical_bayes_vibrant 1007.79 0.571096 0.00725394
0 count 13.0555 0.581793 0.0112694

In [ ]: