In [16]:
%pylab inline
from sklearn.metrics import mean_squared_error
import pandas as pd
import xgboost as xgb
from soln import expert_params
from soln.dataset import AllCategoricalsFeaturizer
from soln.dataset import generate_xv_splits
from soln.dataset import get_augmented_train_and_test_set
from soln.experts import get_predictions
from soln.experts import train_and_save_expert
from soln.experts import xv_eval_experts
from soln.utils import eval_model
from soln.utils import train_model
pd.set_option('display.max_columns', None)
In [17]:
%time aug_train_set, aug_test_set = get_augmented_train_and_test_set()
In [18]:
%time train_and_save_expert('base', aug_train_set)
In [19]:
%time tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "test RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [24]:
%time train_and_save_expert('supplier66', aug_train_set)
In [21]:
tmp = xv_eval_experts(['base'], expert_params.supplier66_get_indices, aug_train_set)
print "base on supplier66_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier66'], expert_params.supplier66_get_indices, aug_train_set)
print "supplier66 on supplier66_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [22]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier66'], expert_params.base_get_indices, aug_train_set)
print "base + supplier66 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [25]:
%time train_and_save_expert('supplier41', aug_train_set)
In [26]:
tmp = xv_eval_experts(['base'], expert_params.supplier41_get_indices, aug_train_set)
print "base on supplier41_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier41'], expert_params.supplier41_get_indices, aug_train_set)
print "supplier41 on supplier41_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [27]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier41'], expert_params.base_get_indices, aug_train_set)
print "base + supplier41 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [28]:
%time train_and_save_expert('supplier72', aug_train_set)
In [29]:
tmp = xv_eval_experts(['base'], expert_params.supplier72_get_indices, aug_train_set)
print "base on supplier72_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier72'], expert_params.supplier72_get_indices, aug_train_set)
print "supplier72 on supplier72_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [30]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier72'], expert_params.base_get_indices, aug_train_set)
print "base + supplier72 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [31]:
%time train_and_save_expert('supplier54', aug_train_set)
In [32]:
tmp = xv_eval_experts(['base'], expert_params.supplier54_get_indices, aug_train_set)
print "base on supplier54_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier54'], expert_params.supplier54_get_indices, aug_train_set)
print "supplier54 on supplier54_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [33]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier54'], expert_params.base_get_indices, aug_train_set)
print "base + supplier54 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [34]:
%time train_and_save_expert('supplier26', aug_train_set)
In [35]:
tmp = xv_eval_experts(['base'], expert_params.supplier26_get_indices, aug_train_set)
print "base on supplier26_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier26'], expert_params.supplier26_get_indices, aug_train_set)
print "supplier26 on supplier26_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [36]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier26'], expert_params.base_get_indices, aug_train_set)
print "base + supplier26 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [37]:
%time train_and_save_expert('supplier13', aug_train_set)
In [38]:
tmp = xv_eval_experts(['base'], expert_params.supplier13_get_indices, aug_train_set)
print "base on supplier13_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier13'], expert_params.supplier13_get_indices, aug_train_set)
print "supplier13 on supplier13_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [39]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier13'], expert_params.base_get_indices, aug_train_set)
print "base + supplier13 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [40]:
%time train_and_save_expert('supplier58', aug_train_set)
In [41]:
tmp = xv_eval_experts(['base'], expert_params.supplier58_get_indices, aug_train_set)
print "base on supplier58_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier58'], expert_params.supplier58_get_indices, aug_train_set)
print "supplier58 on supplier58_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [42]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier58'], expert_params.base_get_indices, aug_train_set)
print "base + supplier58 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [43]:
%time train_and_save_expert('supplier64', aug_train_set)
In [44]:
tmp = xv_eval_experts(['base'], expert_params.supplier64_get_indices, aug_train_set)
print "base on supplier64_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier64'], expert_params.supplier64_get_indices, aug_train_set)
print "supplier64 on supplier64_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [45]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier64'], expert_params.base_get_indices, aug_train_set)
print "base + supplier64 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [47]:
%time train_and_save_expert('supplier62', aug_train_set)
In [48]:
tmp = xv_eval_experts(['base'], expert_params.supplier62_get_indices, aug_train_set)
print "base on supplier62_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['supplier62'], expert_params.supplier62_get_indices, aug_train_set)
print "supplier62 on supplier62_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [49]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'supplier62'], expert_params.base_get_indices, aug_train_set)
print "base + supplier62 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [50]:
%time train_and_save_expert('uncommon_suppliers_1', aug_train_set)
In [51]:
tmp = xv_eval_experts(['base'], expert_params.uncommon_suppliers_1_get_indices, aug_train_set)
print "base on uncommon_suppliers_1_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['uncommon_suppliers_1'], expert_params.uncommon_suppliers_1_get_indices, aug_train_set)
print "uncommon_suppliers_1 on uncommon_suppliers_1_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [52]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'uncommon_suppliers_1'], expert_params.base_get_indices, aug_train_set)
print "base + uncommon_suppliers_1 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [54]:
expert_params.uncommon_suppliers_1_get_indices(aug_test_set).mean()
Out[54]:
In [55]:
expert_params.uncommon_suppliers_2_get_indices(aug_test_set).mean()
Out[55]:
In [56]:
%time train_and_save_expert('uncommon_suppliers_2', aug_train_set)
In [57]:
tmp = xv_eval_experts(['base'], expert_params.uncommon_suppliers_2_get_indices, aug_train_set)
print "base on uncommon_suppliers_2_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['uncommon_suppliers_2'], expert_params.uncommon_suppliers_2_get_indices, aug_train_set)
print "uncommon_suppliers_2 on uncommon_suppliers_2_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [58]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
tmp = xv_eval_experts(['base', 'uncommon_suppliers_2'], expert_params.base_get_indices, aug_train_set)
print "base + uncommon_suppliers_2 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])
In [ ]: