In [16]:
%pylab inline

from sklearn.metrics import mean_squared_error
import pandas as pd
import xgboost as xgb

from soln import expert_params
from soln.dataset import AllCategoricalsFeaturizer
from soln.dataset import generate_xv_splits
from soln.dataset import get_augmented_train_and_test_set
from soln.experts import get_predictions
from soln.experts import train_and_save_expert
from soln.experts import xv_eval_experts
from soln.utils import eval_model
from soln.utils import train_model

pd.set_option('display.max_columns', None)


Populating the interactive namespace from numpy and matplotlib

In [17]:
%time aug_train_set, aug_test_set = get_augmented_train_and_test_set()


CPU times: user 13.4 s, sys: 8 ms, total: 13.4 s
Wall time: 13.5 s

In [18]:
%time train_and_save_expert('base', aug_train_set)


Training base...
fold 0...
  -> saving to experts/base/0
fold 1...
  -> saving to experts/base/1
fold 2...
  -> saving to experts/base/2
fold 3...
  -> saving to experts/base/3
fold 4...
  -> saving to experts/base/4
fold 5...
  -> saving to experts/base/5
fold 6...
  -> saving to experts/base/6
fold 7...
  -> saving to experts/base/7
fold 8...
  -> saving to experts/base/8
fold 9...
  -> saving to experts/base/9
CPU times: user 25min 55s, sys: 8.58 s, total: 26min 4s
Wall time: 14min 50s

In [19]:
%time tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "test RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


CPU times: user 10.6 s, sys: 88 ms, total: 10.6 s
Wall time: 7.75 s
base on base_get_indices:
test RMSLE avg 0.223523998772 std 0.0161170838101

In [24]:
%time train_and_save_expert('supplier66', aug_train_set)


Training supplier66...
fold 0...
  -> skipping because experts/supplier66/0 exists
fold 1...
  -> skipping because experts/supplier66/1 exists
fold 2...
  -> skipping because experts/supplier66/2 exists
fold 3...
  -> skipping because experts/supplier66/3 exists
fold 4...
  -> skipping because experts/supplier66/4 exists
fold 5...
  -> skipping because experts/supplier66/5 exists
fold 6...
  -> skipping because experts/supplier66/6 exists
fold 7...
  -> skipping because experts/supplier66/7 exists
fold 8...
  -> skipping because experts/supplier66/8 exists
fold 9...
  -> skipping because experts/supplier66/9 exists
CPU times: user 732 ms, sys: 12 ms, total: 744 ms
Wall time: 809 ms

In [21]:
tmp = xv_eval_experts(['base'], expert_params.supplier66_get_indices, aug_train_set)
print "base on supplier66_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier66'], expert_params.supplier66_get_indices, aug_train_set)
print "supplier66 on supplier66_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier66_get_indices:
RMSLE avg 0.178588691492 std 0.0194095090042
supplier66 on supplier66_get_indices:
RMSLE avg 0.174437162019 std 0.0186045491317

In [22]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier66'], expert_params.base_get_indices, aug_train_set)
print "base + supplier66 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier66 on base_get_indices:
RMSLE avg 0.22134058227 std 0.0147758417817

In [25]:
%time train_and_save_expert('supplier41', aug_train_set)


Training supplier41...
fold 0...
  -> skipping because experts/supplier41/0 exists
fold 1...
  -> skipping because experts/supplier41/1 exists
fold 2...
  -> skipping because experts/supplier41/2 exists
fold 3...
  -> skipping because experts/supplier41/3 exists
fold 4...
  -> skipping because experts/supplier41/4 exists
fold 5...
  -> skipping because experts/supplier41/5 exists
fold 6...
  -> skipping because experts/supplier41/6 exists
fold 7...
  -> skipping because experts/supplier41/7 exists
fold 8...
  -> skipping because experts/supplier41/8 exists
fold 9...
  -> skipping because experts/supplier41/9 exists
CPU times: user 732 ms, sys: 0 ns, total: 732 ms
Wall time: 754 ms

In [26]:
tmp = xv_eval_experts(['base'], expert_params.supplier41_get_indices, aug_train_set)
print "base on supplier41_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier41'], expert_params.supplier41_get_indices, aug_train_set)
print "supplier41 on supplier41_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier41_get_indices:
RMSLE avg 0.163396060187 std 0.0134416351528
supplier41 on supplier41_get_indices:
RMSLE avg 0.143444397918 std 0.0149992580948

In [27]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier41'], expert_params.base_get_indices, aug_train_set)
print "base + supplier41 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier41 on base_get_indices:
RMSLE avg 0.222003568033 std 0.0161722028712

In [28]:
%time train_and_save_expert('supplier72', aug_train_set)


Training supplier72...
fold 0...
  -> saving to experts/supplier72/0
fold 1...
  -> saving to experts/supplier72/1
fold 2...
  -> saving to experts/supplier72/2
fold 3...
  -> saving to experts/supplier72/3
fold 4...
  -> saving to experts/supplier72/4
fold 5...
  -> saving to experts/supplier72/5
fold 6...
  -> saving to experts/supplier72/6
fold 7...
  -> saving to experts/supplier72/7
fold 8...
  -> saving to experts/supplier72/8
fold 9...
  -> saving to experts/supplier72/9
CPU times: user 2min 33s, sys: 700 ms, total: 2min 34s
Wall time: 1min 34s

In [29]:
tmp = xv_eval_experts(['base'], expert_params.supplier72_get_indices, aug_train_set)
print "base on supplier72_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier72'], expert_params.supplier72_get_indices, aug_train_set)
print "supplier72 on supplier72_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier72_get_indices:
RMSLE avg 0.281759076081 std 0.0171937181317
supplier72 on supplier72_get_indices:
RMSLE avg 0.267424776038 std 0.0184438028293

In [30]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier72'], expert_params.base_get_indices, aug_train_set)
print "base + supplier72 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier72 on base_get_indices:
RMSLE avg 0.22220707573 std 0.0156347836814

In [31]:
%time train_and_save_expert('supplier54', aug_train_set)


Training supplier54...
fold 0...
  -> saving to experts/supplier54/0
fold 1...
  -> saving to experts/supplier54/1
fold 2...
  -> saving to experts/supplier54/2
fold 3...
  -> saving to experts/supplier54/3
fold 4...
  -> saving to experts/supplier54/4
fold 5...
  -> saving to experts/supplier54/5
fold 6...
  -> saving to experts/supplier54/6
fold 7...
  -> saving to experts/supplier54/7
fold 8...
  -> saving to experts/supplier54/8
fold 9...
  -> saving to experts/supplier54/9
CPU times: user 54.1 s, sys: 328 ms, total: 54.4 s
Wall time: 33.9 s

In [32]:
tmp = xv_eval_experts(['base'], expert_params.supplier54_get_indices, aug_train_set)
print "base on supplier54_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier54'], expert_params.supplier54_get_indices, aug_train_set)
print "supplier54 on supplier54_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier54_get_indices:
RMSLE avg 0.196001976694 std 0.0288157042557
supplier54 on supplier54_get_indices:
RMSLE avg 0.174941158883 std 0.043814579223

In [33]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier54'], expert_params.base_get_indices, aug_train_set)
print "base + supplier54 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier54 on base_get_indices:
RMSLE avg 0.223053005283 std 0.0166112166009

In [34]:
%time train_and_save_expert('supplier26', aug_train_set)


Training supplier26...
fold 0...
  -> saving to experts/supplier26/0
fold 1...
  -> saving to experts/supplier26/1
fold 2...
  -> saving to experts/supplier26/2
fold 3...
  -> saving to experts/supplier26/3
fold 4...
  -> saving to experts/supplier26/4
fold 5...
  -> saving to experts/supplier26/5
fold 6...
  -> saving to experts/supplier26/6
fold 7...
  -> saving to experts/supplier26/7
fold 8...
  -> saving to experts/supplier26/8
fold 9...
  -> saving to experts/supplier26/9
CPU times: user 51.8 s, sys: 404 ms, total: 52.2 s
Wall time: 31.3 s

In [35]:
tmp = xv_eval_experts(['base'], expert_params.supplier26_get_indices, aug_train_set)
print "base on supplier26_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier26'], expert_params.supplier26_get_indices, aug_train_set)
print "supplier26 on supplier26_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier26_get_indices:
RMSLE avg 0.452715240586 std 0.0607581643206
supplier26 on supplier26_get_indices:
RMSLE avg 0.472837940468 std 0.0922499875918

In [36]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier26'], expert_params.base_get_indices, aug_train_set)
print "base + supplier26 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier26 on base_get_indices:
RMSLE avg 0.22489079128 std 0.0173128430875

In [37]:
%time train_and_save_expert('supplier13', aug_train_set)


Training supplier13...
fold 0...
  -> saving to experts/supplier13/0
fold 1...
  -> saving to experts/supplier13/1
fold 2...
  -> saving to experts/supplier13/2
fold 3...
  -> saving to experts/supplier13/3
fold 4...
  -> saving to experts/supplier13/4
fold 5...
  -> saving to experts/supplier13/5
fold 6...
  -> saving to experts/supplier13/6
fold 7...
  -> saving to experts/supplier13/7
fold 8...
  -> saving to experts/supplier13/8
fold 9...
  -> saving to experts/supplier13/9
CPU times: user 42.4 s, sys: 356 ms, total: 42.7 s
Wall time: 26 s

In [38]:
tmp = xv_eval_experts(['base'], expert_params.supplier13_get_indices, aug_train_set)
print "base on supplier13_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier13'], expert_params.supplier13_get_indices, aug_train_set)
print "supplier13 on supplier13_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier13_get_indices:
RMSLE avg 0.316476428517 std 0.0607323134667
supplier13 on supplier13_get_indices:
RMSLE avg 0.299190107795 std 0.0657662232869

In [39]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier13'], expert_params.base_get_indices, aug_train_set)
print "base + supplier13 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier13 on base_get_indices:
RMSLE avg 0.223086949093 std 0.0165452595306

In [40]:
%time train_and_save_expert('supplier58', aug_train_set)


Training supplier58...
fold 0...
  -> saving to experts/supplier58/0
fold 1...
  -> saving to experts/supplier58/1
fold 2...
  -> saving to experts/supplier58/2
fold 3...
  -> saving to experts/supplier58/3
fold 4...
  -> saving to experts/supplier58/4
fold 5...
  -> saving to experts/supplier58/5
fold 6...
  -> saving to experts/supplier58/6
fold 7...
  -> saving to experts/supplier58/7
fold 8...
  -> saving to experts/supplier58/8
fold 9...
  -> saving to experts/supplier58/9
CPU times: user 42 s, sys: 352 ms, total: 42.4 s
Wall time: 26.4 s

In [41]:
tmp = xv_eval_experts(['base'], expert_params.supplier58_get_indices, aug_train_set)
print "base on supplier58_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier58'], expert_params.supplier58_get_indices, aug_train_set)
print "supplier58 on supplier58_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier58_get_indices:
RMSLE avg 0.322744111228 std 0.101481522894
supplier58 on supplier58_get_indices:
RMSLE avg 0.316461749096 std 0.131210662746

In [42]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier58'], expert_params.base_get_indices, aug_train_set)
print "base + supplier58 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier58 on base_get_indices:
RMSLE avg 0.223558939634 std 0.0171433485861

In [43]:
%time train_and_save_expert('supplier64', aug_train_set)


Training supplier64...
fold 0...
  -> saving to experts/supplier64/0
fold 1...
  -> saving to experts/supplier64/1
fold 2...
  -> saving to experts/supplier64/2
fold 3...
  -> saving to experts/supplier64/3
fold 4...
  -> saving to experts/supplier64/4
fold 5...
  -> saving to experts/supplier64/5
fold 6...
  -> saving to experts/supplier64/6
fold 7...
  -> saving to experts/supplier64/7
fold 8...
  -> saving to experts/supplier64/8
fold 9...
  -> saving to experts/supplier64/9
CPU times: user 36.1 s, sys: 296 ms, total: 36.4 s
Wall time: 22.5 s

In [44]:
tmp = xv_eval_experts(['base'], expert_params.supplier64_get_indices, aug_train_set)
print "base on supplier64_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier64'], expert_params.supplier64_get_indices, aug_train_set)
print "supplier64 on supplier64_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier64_get_indices:
RMSLE avg 0.376089336046 std 0.142983989166
supplier64 on supplier64_get_indices:
RMSLE avg 0.396000073661 std 0.133008152011

In [45]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier64'], expert_params.base_get_indices, aug_train_set)
print "base + supplier64 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier64 on base_get_indices:
RMSLE avg 0.223941029907 std 0.0162087745856

In [47]:
%time train_and_save_expert('supplier62', aug_train_set)


Training supplier62...
fold 0...
  -> saving to experts/supplier62/0
fold 1...
  -> saving to experts/supplier62/1
fold 2...
  -> saving to experts/supplier62/2
fold 3...
  -> saving to experts/supplier62/3
fold 4...
  -> saving to experts/supplier62/4
fold 5...
  -> saving to experts/supplier62/5
fold 6...
  -> saving to experts/supplier62/6
fold 7...
  -> saving to experts/supplier62/7
fold 8...
  -> saving to experts/supplier62/8
fold 9...
  -> saving to experts/supplier62/9
CPU times: user 22.5 s, sys: 336 ms, total: 22.8 s
Wall time: 14.4 s

In [48]:
tmp = xv_eval_experts(['base'], expert_params.supplier62_get_indices, aug_train_set)
print "base on supplier62_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['supplier62'], expert_params.supplier62_get_indices, aug_train_set)
print "supplier62 on supplier62_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on supplier62_get_indices:
RMSLE avg 0.475676609874 std 0.0754847461796
supplier62 on supplier62_get_indices:
RMSLE avg 0.561581751886 std 0.120933813703

In [49]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'supplier62'], expert_params.base_get_indices, aug_train_set)
print "base + supplier62 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + supplier62 on base_get_indices:
RMSLE avg 0.22545096456 std 0.0170530155014

In [50]:
%time train_and_save_expert('uncommon_suppliers_1', aug_train_set)


Training uncommon_suppliers_1...
fold 0...
  -> saving to experts/uncommon_suppliers_1/0
fold 1...
  -> saving to experts/uncommon_suppliers_1/1
fold 2...
  -> saving to experts/uncommon_suppliers_1/2
fold 3...
  -> saving to experts/uncommon_suppliers_1/3
fold 4...
  -> saving to experts/uncommon_suppliers_1/4
fold 5...
  -> saving to experts/uncommon_suppliers_1/5
fold 6...
  -> saving to experts/uncommon_suppliers_1/6
fold 7...
  -> saving to experts/uncommon_suppliers_1/7
fold 8...
  -> saving to experts/uncommon_suppliers_1/8
fold 9...
  -> saving to experts/uncommon_suppliers_1/9
CPU times: user 3min 27s, sys: 844 ms, total: 3min 28s
Wall time: 2min 5s

In [51]:
tmp = xv_eval_experts(['base'], expert_params.uncommon_suppliers_1_get_indices, aug_train_set)
print "base on uncommon_suppliers_1_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['uncommon_suppliers_1'], expert_params.uncommon_suppliers_1_get_indices, aug_train_set)
print "uncommon_suppliers_1 on uncommon_suppliers_1_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on uncommon_suppliers_1_get_indices:
RMSLE avg 0.413210373315 std 0.0374128227102
uncommon_suppliers_1 on uncommon_suppliers_1_get_indices:
RMSLE avg 0.414015992514 std 0.0366362355741

In [52]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'uncommon_suppliers_1'], expert_params.base_get_indices, aug_train_set)
print "base + uncommon_suppliers_1 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + uncommon_suppliers_1 on base_get_indices:
RMSLE avg 0.223712620133 std 0.0158303194415

In [54]:
expert_params.uncommon_suppliers_1_get_indices(aug_test_set).mean()


Out[54]:
0.10792128328096577

In [55]:
expert_params.uncommon_suppliers_2_get_indices(aug_test_set).mean()


Out[55]:
0.044451794278154454

In [56]:
%time train_and_save_expert('uncommon_suppliers_2', aug_train_set)


Training uncommon_suppliers_2...
fold 0...
  -> saving to experts/uncommon_suppliers_2/0
fold 1...
  -> saving to experts/uncommon_suppliers_2/1
fold 2...
  -> saving to experts/uncommon_suppliers_2/2
fold 3...
  -> saving to experts/uncommon_suppliers_2/3
fold 4...
  -> saving to experts/uncommon_suppliers_2/4
fold 5...
  -> saving to experts/uncommon_suppliers_2/5
fold 6...
  -> saving to experts/uncommon_suppliers_2/6
fold 7...
  -> saving to experts/uncommon_suppliers_2/7
fold 8...
  -> saving to experts/uncommon_suppliers_2/8
fold 9...
  -> saving to experts/uncommon_suppliers_2/9
CPU times: user 1min 32s, sys: 468 ms, total: 1min 32s
Wall time: 54.5 s

In [57]:
tmp = xv_eval_experts(['base'], expert_params.uncommon_suppliers_2_get_indices, aug_train_set)
print "base on uncommon_suppliers_2_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['uncommon_suppliers_2'], expert_params.uncommon_suppliers_2_get_indices, aug_train_set)
print "uncommon_suppliers_2 on uncommon_suppliers_2_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on uncommon_suppliers_2_get_indices:
RMSLE avg 0.446594856972 std 0.0583481619269
uncommon_suppliers_2 on uncommon_suppliers_2_get_indices:
RMSLE avg 0.468371800266 std 0.0639988972849

In [58]:
tmp = xv_eval_experts(['base'], expert_params.base_get_indices, aug_train_set)
print "base on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])

tmp = xv_eval_experts(['base', 'uncommon_suppliers_2'], expert_params.base_get_indices, aug_train_set)
print "base + uncommon_suppliers_2 on base_get_indices:"
print "RMSLE avg {} std {}".format(tmp['rmsle_avg'], tmp['rmsle_std'])


base on base_get_indices:
RMSLE avg 0.223523998772 std 0.0161170838101
base + uncommon_suppliers_2 on base_get_indices:
RMSLE avg 0.225722687125 std 0.0155113430071

In [ ]: