In [15]:
%pylab inline
from sklearn.dummy import DummyRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import pandas as pd
from soln.dataset import AllCategoricalsFeaturizer
from soln.dataset import generate_xv_splits
from soln.dataset import get_augmented_train_and_test_set
from soln.utils import dump_decision_tree
from soln.utils import eval_regressor
from soln.utils import print_feature_importances
Populating the interactive namespace from numpy and matplotlib
In [2]:
%time aug_train_set, aug_test_set = get_augmented_train_and_test_set()
CPU times: user 13.7 s, sys: 148 ms, total: 13.8 s
Wall time: 14 s
In [3]:
from itertools import islice
fold_number = 0
%time X_train, y_train, X_test, y_test = next(islice(generate_xv_splits(aug_train_set), fold_number, None))
print X_train.shape, y_train.shape, X_test.shape, y_test.shape
CPU times: user 104 ms, sys: 24 ms, total: 128 ms
Wall time: 132 ms
(27270, 53) (27270,) (2943, 53) (2943,)
In [4]:
featurizer = AllCategoricalsFeaturizer()
%time featurizer.fit(X_train)
%time X_train_feats = featurizer.transform(X_train)
%time X_test_feats = featurizer.transform(X_test)
X_train_feats.info(verbose=True)
%time X_train_np = X_train_feats.astype(np.float).values
y_train_np = y_train.values
%time X_test_np = X_test_feats.astype(np.float).values
y_test_np = y_test.values
CPU times: user 1.58 s, sys: 0 ns, total: 1.58 s
Wall time: 1.6 s
CPU times: user 1.34 s, sys: 300 ms, total: 1.64 s
Wall time: 1.65 s
CPU times: user 172 ms, sys: 0 ns, total: 172 ms
Wall time: 176 ms
<class 'pandas.core.frame.DataFrame'>
Int64Index: 27270 entries, 0 to 27269
Data columns (total 599 columns):
annual_usage int64
min_order_quantity int64
bracket_pricing bool
quantity int64
diameter float64
wall_thickness float64
length float64
num_bends int64
bend_radius float64
end_a_1x bool
end_a_2x bool
end_x_1x bool
end_x_2x bool
num_boss int64
num_bracket int64
num_other int64
quote_age float64
adj_quantity int64
adj_bracketing bool
physical_volume float64
inner_radius float64
material_volume float64
end_a_forming bool
end_1x_count int64
end_x_forming bool
end_2x_count int64
end_forming_count int64
unique_feature_count float64
orientation_count float64
groove_count float64
total_component_weight float64
component_max_length float64
component_max_overall_length float64
component_max_bolt_pattern_wide float64
component_max_bolt_pattern_long float64
component_max_thickness float64
component_min_thread_pitch float64
component_min_thread_size float64
supplier XXX_other float64
supplier S-0042 float64
supplier S-0005 float64
supplier S-0026 float64
supplier S-0027 float64
supplier S-0072 float64
supplier S-0062 float64
supplier S-0064 float64
supplier S-0043 float64
supplier S-0066 float64
supplier S-0041 float64
supplier S-0105 float64
supplier S-0080 float64
supplier S-0081 float64
supplier S-0104 float64
supplier S-0013 float64
supplier S-0014 float64
supplier S-0070 float64
supplier S-0031 float64
supplier S-0030 float64
supplier S-0058 float64
supplier S-0054 float64
material_id XXX_other float64
material_id nan float64
material_id SP-0046 float64
material_id SP-0041 float64
material_id SP-0033 float64
material_id SP-0048 float64
material_id SP-0034 float64
material_id SP-0035 float64
material_id SP-0036 float64
material_id SP-0037 float64
material_id SP-0030 float64
material_id SP-0019 float64
material_id SP-0008 float64
material_id SP-0038 float64
material_id SP-0039 float64
material_id SP-0029 float64
material_id SP-0028 float64
end_a XXX_other float64
end_a EF-005 float64
end_a NONE float64
end_a EF-002 float64
end_a EF-003 float64
end_a EF-008 float64
end_a EF-009 float64
end_a EF-023 float64
end_a EF-021 float64
end_a EF-013 float64
end_a EF-012 float64
end_a EF-017 float64
end_a EF-016 float64
end_a EF-015 float64
end_a EF-019 float64
end_a EF-018 float64
end_x XXX_other float64
end_x NONE float64
end_x EF-002 float64
end_x EF-003 float64
end_x EF-008 float64
end_x EF-009 float64
end_x EF-023 float64
end_x EF-021 float64
end_x EF-006 float64
end_x EF-013 float64
end_x EF-012 float64
end_x EF-010 float64
end_x EF-017 float64
end_x EF-016 float64
end_x EF-015 float64
end_x EF-019 float64
end_x EF-018 float64
specs XXX_other float64
specs SP-0065 float64
specs SP-0002 float64
specs SP-0050 float64
specs SP-0051 float64
specs SP-0057 float64
specs SP-0025 float64
specs SP-0058 float64
specs SP-0079 float64
specs SP-0024 float64
specs SP-0070 float64
specs SP-0017 float64
specs SP-0072 float64
specs SP-0016 float64
specs SP-0012 float64
specs SP-0013 float64
specs SP-0076 float64
specs SP-0022 float64
specs SP-0021 float64
specs SP-0063 float64
specs SP-0071 float64
specs SP-0088 float64
specs SP-0080 float64
specs SP-0082 float64
specs SP-0062 float64
specs SP-0010 float64
specs SP-0075 float64
specs SP-0026 float64
specs SP-0069 float64
specs SP-0068 float64
specs SP-0005 float64
specs SP-0004 float64
specs SP-0007 float64
specs SP-0009 float64
specs SP-0061 float64
specs SP-0067 float64
specs SP-0029 float64
components XXX_other float64
components C-1653 float64
components C-0218 float64
components C-0217 float64
components C-0215 float64
components C-0214 float64
components C-0211 float64
components C-0210 float64
components C-1867 float64
components C-1860 float64
components C-1869 float64
components C-0063 float64
components C-1229 float64
components C-1677 float64
components C-1898 float64
components C-1355 float64
components C-1354 float64
components C-1352 float64
components C-1670 float64
components C-0122 float64
components C-0071 float64
components C-1017 float64
components C-0250 float64
components C-0318 float64
components C-1779 float64
components C-0422 float64
components C-1821 float64
components C-0855 float64
components C-0550 float64
components C-0494 float64
components C-0228 float64
components C-0051 float64
components C-1914 float64
components C-1910 float64
components C-1672 float64
components C-1850 float64
components C-1313 float64
components C-1312 float64
components C-1619 float64
components C-0577 float64
components C-1533 float64
components C-1536 float64
components C-0133 float64
components C-0134 float64
components C-1405 float64
components C-1625 float64
components C-1624 float64
components C-1627 float64
components C-1626 float64
components C-1621 float64
components C-1620 float64
components C-1623 float64
components C-1622 float64
components C-1743 float64
components C-1629 float64
components C-1628 float64
components C-1745 float64
components C-2030 float64
components C-0048 float64
components C-1650 float64
components C-0045 float64
components C-1663 float64
components C-0599 float64
components C-0616 float64
components C-1817 float64
components C-1956 float64
components C-1954 float64
components C-1218 float64
components C-1889 float64
components C-1445 float64
components C-1881 float64
components C-1880 float64
components C-1885 float64
components C-1369 float64
components C-1541 float64
components C-1547 float64
components C-1661 float64
components C-1660 float64
components C-0095 float64
components C-0002 float64
components C-0003 float64
components C-0001 float64
components C-0007 float64
components C-0004 float64
components C-0165 float64
components C-1714 float64
components C-1716 float64
components C-1711 float64
components C-1718 float64
components C-1866 float64
components C-2043 float64
components C-1781 float64
components C-1715 float64
components C-0434 float64
components C-0539 float64
components C-0544 float64
components C-0548 float64
components C-1963 float64
components C-1848 float64
components C-1845 float64
components C-1846 float64
components C-1244 float64
components C-1243 float64
components C-1242 float64
components C-0579 float64
components C-0102 float64
components C-1459 float64
components C-0120 float64
components C-1183 float64
components C-1439 float64
components C-1430 float64
components C-1434 float64
components C-1435 float64
components C-1758 float64
components C-1614 float64
components C-1615 float64
components C-1750 float64
components C-2008 float64
components C-2004 float64
components C-2005 float64
components C-2006 float64
components C-2001 float64
components C-2002 float64
components C-2003 float64
components C-0699 float64
components C-0751 float64
components C-1505 float64
components C-2032 float64
components C-0058 float64
components C-1502 float64
components C-0057 float64
components C-0199 float64
components C-0052 float64
components C-0674 float64
components C-1577 float64
components C-0826 float64
components C-0823 float64
components C-1873 float64
components C-0580 float64
components C-0401 float64
components C-0275 float64
components C-1808 float64
components C-0409 float64
components C-1209 float64
components C-1208 float64
components C-1203 float64
components C-1200 float64
components C-1206 float64
components C-1205 float64
components C-0473 float64
components C-1386 float64
components C-1385 float64
components C-1936 float64
components C-1375 float64
components C-1374 float64
components C-1373 float64
components C-1475 float64
components C-1476 float64
components C-1477 float64
components C-1555 float64
components C-1554 float64
components C-0389 float64
components C-0388 float64
components C-1724 float64
components C-1725 float64
components C-1727 float64
components C-1728 float64
components C-0333 float64
components C-1654 float64
components C-1655 float64
components C-1651 float64
components C-1652 float64
components C-1658 float64
components C-1659 float64
components C-0703 float64
components C-0448 float64
components C-0449 float64
components C-0520 float64
components C-0444 float64
components C-0445 float64
components C-0208 float64
components C-0209 float64
components C-1970 float64
components C-1976 float64
components C-1877 float64
components C-1233 float64
components C-1230 float64
components C-1231 float64
components C-1235 float64
components C-1332 float64
components C-0227 float64
components C-1425 float64
components C-1421 float64
components C-1420 float64
components C-1586 float64
components C-1428 float64
components C-1348 float64
components C-1349 float64
components C-1344 float64
components C-1345 float64
components C-1343 float64
components C-1565 float64
components C-2019 float64
components C-2017 float64
components C-1630 float64
components C-0062 float64
components C-0562 float64
components C-0244 float64
components C-1761 float64
components C-0369 float64
components C-1768 float64
components C-0844 float64
components C-1841 float64
components C-1398 float64
components C-1908 float64
components C-1909 float64
components C-1901 float64
components C-1906 float64
components C-1907 float64
components C-1998 float64
components C-1995 float64
components C-1994 float64
components C-1417 float64
components C-1411 float64
components C-1637 float64
components C-1635 float64
components C-1632 float64
components C-1633 float64
components C-1739 float64
components C-1631 float64
components C-1638 float64
components C-1639 float64
components C-2026 float64
components C-2027 float64
components C-2028 float64
components C-2029 float64
components C-1643 float64
components C-1642 float64
components C-1641 float64
components C-1640 float64
components C-1647 float64
components C-1646 float64
components C-1645 float64
components C-1644 float64
components C-1649 float64
components C-1648 float64
bracketing_pattern XXX_other float64
bracketing_pattern (10, 15, 20, 25, 30) float64
bracketing_pattern (5, 10, 20, 50, 100) float64
bracketing_pattern (25, 50, 75, 100, 290, 325, 350) float64
bracketing_pattern (1, 2, 3, 5, 10, 20, 50) float64
bracketing_pattern (1, 3, 5, 7, 10) float64
bracketing_pattern (1, 2, 3, 4, 6) float64
bracketing_pattern (5, 19, 20) float64
bracketing_pattern (1, 3, 5, 7, 9) float64
bracketing_pattern (1, 15) float64
bracketing_pattern (10, 15, 20, 30) float64
bracketing_pattern (1, 3, 5, 10, 25) float64
bracketing_pattern (30, 60, 90, 120) float64
bracketing_pattern (5, 10) float64
bracketing_pattern (1, 2, 4, 9, 19) float64
bracketing_pattern (5, 20) float64
bracketing_pattern (1, 3, 5, 10, 20) float64
bracketing_pattern (2, 3, 4, 6) float64
bracketing_pattern (15, 25, 35) float64
bracketing_pattern (1, 5, 10, 20) float64
bracketing_pattern (1, 5, 10, 20, 50) float64
bracketing_pattern (1, 3, 5, 10, 15, 25) float64
bracketing_pattern (25, 50, 75, 100) float64
bracketing_pattern (3, 5, 7, 9) float64
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250) float64
bracketing_pattern (1, 6) float64
bracketing_pattern (2, 5) float64
bracketing_pattern (1, 2, 3, 5, 10) float64
bracketing_pattern (1, 3, 5) float64
bracketing_pattern (1, 2, 5, 10) float64
bracketing_pattern (1, 10) float64
bracketing_pattern (1, 2, 3, 5, 7) float64
bracketing_pattern (1, 3) float64
bracketing_pattern (5, 10, 15) float64
bracketing_pattern (6, 12, 18, 24) float64
bracketing_pattern (10, 15, 20) float64
bracketing_pattern (1, 4) float64
bracketing_pattern () float64
bracketing_pattern (1, 3, 5, 10, 15) float64
bracketing_pattern (1, 8) float64
bracketing_pattern (10, 20, 30, 40) float64
bracketing_pattern (5, 10, 15, 20, 25) float64
bracketing_pattern (10, 15, 20, 25) float64
bracketing_pattern (50, 50) float64
bracketing_pattern (20, 40, 60, 80) float64
bracketing_pattern (4, 10) float64
bracketing_pattern (1, 2, 4) float64
bracketing_pattern (10, 25, 40, 55, 70) float64
bracketing_pattern (5, 10, 25) float64
bracketing_pattern (1, 2, 3, 4, 5) float64
bracketing_pattern (2, 10, 25, 50, 100) float64
bracketing_pattern (1, 20, 50) float64
bracketing_pattern (1, 2, 3, 5, 10, 20) float64
bracketing_pattern (3, 5, 10) float64
bracketing_pattern (1, 2, 4, 8, 16) float64
bracketing_pattern (2, 3, 4, 5) float64
bracketing_pattern (1, 5, 10, 15, 20) float64
bracketing_pattern (1, 2, 5) float64
bracketing_pattern (8, 16, 24, 32) float64
bracketing_pattern (2, 4, 6, 8) float64
bracketing_pattern (1, 2, 5, 10, 25, 50) float64
bracketing_pattern (1, 2, 3, 4) float64
bracketing_pattern (5, 10, 15, 20) float64
bracketing_pattern (4, 6, 8, 10) float64
bracketing_pattern (1, 12) float64
bracketing_pattern (4, 15) float64
bracketing_pattern (2, 4, 6, 8, 10) float64
bracketing_pattern (2, 3, 4) float64
bracketing_pattern (1, 3, 5, 10) float64
bracketing_pattern (8, 16, 24, 32, 48) float64
bracketing_pattern (1, 5) float64
bracketing_pattern (3, 4, 5, 6) float64
bracketing_pattern (1, 3, 5, 10, 20, 30) float64
bracketing_pattern (1, 2, 5, 10, 25) float64
bracketing_pattern (1, 30) float64
bracketing_pattern (1, 2, 5, 10, 25, 50, 100) float64
bracketing_pattern (1, 6, 20) float64
bracketing_pattern (3, 6, 9, 12) float64
bracketing_pattern (1, 2, 3, 5, 10, 20, 50, 100) float64
bracketing_pattern (3, 5, 10, 20) float64
bracketing_pattern (10, 20, 30) float64
bracketing_pattern (1, 2, 5, 25, 35) float64
bracketing_pattern (1, 2) float64
bracketing_pattern (5, 10, 20) float64
bracketing_pattern (4, 8, 12, 16) float64
bracketing_pattern (10, 15, 25) float64
ends XXX_other float64
ends EF-005 float64
ends NONE float64
ends EF-001 float64
ends EF-002 float64
ends EF-003 float64
ends EF-008 float64
ends EF-009 float64
ends EF-023 float64
ends EF-021 float64
ends EF-006 float64
ends EF-013 float64
ends EF-012 float64
ends EF-011 float64
ends EF-010 float64
ends EF-017 float64
ends EF-016 float64
ends EF-015 float64
ends EF-019 float64
ends EF-018 float64
component_groups XXX_other float64
component_groups threaded float64
component_groups sleeve float64
component_groups adaptor float64
component_groups nut float64
component_groups float float64
component_groups boss float64
component_groups other float64
component_groups hfl float64
component_groups elbow float64
component_groups straight float64
component_types XXX_other float64
component_types CP-004 float64
component_types CP-006 float64
component_types CP-007 float64
component_types CP-001 float64
component_types CP-002 float64
component_types CP-003 float64
component_types CP-008 float64
component_types CP-009 float64
component_types CP-028 float64
component_types CP-022 float64
component_types CP-023 float64
component_types CP-026 float64
component_types CP-027 float64
component_types CP-024 float64
component_types CP-025 float64
component_types other float64
component_types CP-012 float64
component_types CP-011 float64
component_types CP-010 float64
component_types CP-016 float64
component_types CP-015 float64
component_types CP-014 float64
component_types CP-019 float64
component_types CP-018 float64
component_end_forms XXX_other float64
component_end_forms A-007 float64
component_end_forms A-006 float64
component_end_forms A-005 float64
component_end_forms A-004 float64
component_end_forms A-003 float64
component_end_forms A-002 float64
component_end_forms A-001 float64
component_end_forms 9999 float64
component_connection_types XXX_other float64
component_connection_types 9999 float64
component_connection_types B-012 float64
component_connection_types B-011 float64
component_connection_types B-004 float64
component_connection_types B-005 float64
component_connection_types B-006 float64
component_connection_types B-007 float64
component_connection_types B-001 float64
component_connection_types B-002 float64
component_part_names XXX_other float64
component_part_names ADAPTER float64
component_part_names LINK float64
component_part_names ELBOW float64
component_part_names CONNECTOR-WELD float64
component_part_names SLEEVE-CRIMP float64
component_part_names FITTING-NUT float64
component_part_names HEAD-FLANGED float64
component_part_names WASHER-FUEL INJ float64
component_part_names SLEEVE-FLARED float64
component_part_names CONNECTOR-BHD float64
component_part_names BOSS float64
component_part_names NUT-ORFS float64
component_part_names CAP-A/C float64
component_part_names PLATE float64
component_part_names NUT-A/C float64
component_part_names ADAPTER-OIL LIN float64
component_part_names NUT-FUEL LINE float64
component_part_names FLANGE float64
component_part_names WASHER-FUEL LIN float64
component_part_names BLOCK float64
component_part_names NUT-FUEL INJ float64
component_part_names TUBE float64
component_part_names NUT-FLARED float64
component_part_names STUD-WELD float64
component_part_names SEAL-O-RING-ORFS float64
component_part_names CLIP float64
component_part_names BRACKET float64
component_part_names VALVE AS.-A/C float64
component_part_names LUG float64
component_part_names NUT float64
component_part_names PIPE float64
component_part_names FITTING float64
component_part_names NUT-FITTING float64
component_part_names ADAPTER-A/C float64
component_part_names NUT-SWIVEL float64
component_part_names NUT-INJ LINE float64
component_part_names ADAPTER-EXH PIP float64
component_part_names COLLAR float64
component_part_names SLEEVE-FITTING float64
component_part_names SLEEVE float64
component_part_names TUBE AS float64
component_part_names NUT-WELD float64
component_part_names SEAL-O-RING float64
component_part_names WASHER float64
dtypes: bool(8), float64(580), int64(11)
memory usage: 123.4 MB
CPU times: user 256 ms, sys: 312 ms, total: 568 ms
Wall time: 610 ms
CPU times: user 24 ms, sys: 0 ns, total: 24 ms
Wall time: 24.6 ms
In [19]:
regressors = [
# DummyRegressor(strategy='constant', constant=0.0),
# DummyRegressor(strategy='mean'),
# RandomForestRegressor(n_estimators=20),
# RandomForestRegressor(n_estimators=100, max_features=0.4),
# RandomForestRegressor(n_estimators=100),
ExtraTreesRegressor(n_estimators=100),
]
for reg in regressors:
%time train_rmsle, test_rmsle = eval_regressor(reg, X_train_np, y_train_np, X_test_np, y_test_np)
print "{}:".format(reg)
print " train RMSLE {}".format(train_rmsle)
print " test RMSLE {}".format(test_rmsle)
print
CPU times: user 7min 26s, sys: 608 ms, total: 7min 27s
Wall time: 7min 28s
ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
min_samples_split=2, min_weight_fraction_leaf=0.0,
n_estimators=100, n_jobs=1, oob_score=False, random_state=None,
verbose=0, warm_start=False):
train RMSLE 0.00978763787673
test RMSLE 0.260257047736
In [236]:
print_feature_importances(X_train_feats, reg);
adj_quantity 0.445669887953
total_component_weight 0.129993589942
annual_usage 0.0690799475355
diameter 0.0552006095061
min_order_quantity 0.033580347257
quantity 0.0319416827581
length 0.0265664541496
quote_age 0.0231251850538
bend_radius 0.0138240302887
component_groups threaded 0.0115562530966
components XXX_other 0.00988552095144
orientation_count 0.00820385925362
supplier S-0041 0.00761775022894
supplier S-0072 0.00726716360845
wall_thickness 0.007091839631
supplier S-0054 0.00646627057058
ends EF-003 0.00618274011057
supplier S-0026 0.00595234159366
num_bends 0.00581594864658
supplier S-0064 0.003764934154
bracketing_pattern (1, 3, 5, 7, 9) 0.0037302183229
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250) 0.0036370632174
component_types CP-014 0.00307274905
supplier S-0066 0.00295439535234
bracketing_pattern (1, 6, 20) 0.00255389340859
bracketing_pattern () 0.00246319428133
end_2x_count 0.00231656186072
components C-0063 0.00214565288374
bracketing_pattern (1, 2, 3, 4, 5) 0.0017749989302
end_a EF-003 0.00158911897425
bracketing_pattern (5, 19, 20) 0.0015875510113
ends EF-012 0.00148512198588
end_forming_count 0.00147952706007
component_groups straight 0.00138844820558
bracketing_pattern XXX_other 0.00134946131696
components C-1727 0.00118391744879
material_id SP-0038 0.00114986905479
supplier S-0058 0.00108059718185
end_x_2x 0.0010645946194
adj_bracketing 0.00105846154923
material_id SP-0029 0.00102584043988
component_types CP-026 0.00102371363503
unique_feature_count 0.00101628167104
supplier S-0013 0.000988803757284
material_id SP-0008 0.000929772701772
material_id SP-0028 0.000883573874581
component_groups other 0.000865137170779
num_boss 0.000843330043807
specs SP-0080 0.000821682728451
end_x EF-009 0.000793449335683
bracket_pricing 0.000737434668668
supplier S-0062 0.000718437489413
component_types OTHER 0.000707153542157
specs SP-0058 0.00069266963423
specs SP-0070 0.0006902356587
end_a_2x 0.000669674893803
component_groups nut 0.000658371396798
end_x EF-003 0.000648245229808
end_1x_count 0.000617332590151
component_types CP-008 0.000616786768867
component_groups sleeve 0.000600935563491
ends EF-017 0.000596603108998
component_types CP-024 0.000586401074915
bracketing_pattern (1, 2, 3, 4) 0.000583177995305
ends EF-018 0.000581579833981
material_id SP-0035 0.000562048151469
component_groups boss 0.000551718826655
end_a_forming 0.000543852486388
components C-1475 0.00053852740729
ends EF-009 0.000532983550914
component_types CP-025 0.000524119346142
specs SP-0004 0.000484964866687
specs SP-0012 0.000475727193595
specs SP-0069 0.000475217220298
end_x NONE 0.000472200372962
end_x_forming 0.000469192921589
end_a_1x 0.000426969481655
ends NONE 0.00042138805886
specs SP-0026 0.000405004023068
end_x_1x 0.000384578858696
components C-1621 0.000378814589587
supplier XXX_other 0.00037030056523
specs SP-0007 0.00035574417851
component_types CP-004 0.000350733773593
components C-0494 0.000348861488913
ends EF-023 0.000335424419078
bracketing_pattern (5, 10, 15, 20) 0.000322368708267
components C-0211 0.000319064723164
specs SP-0063 0.000318849160018
specs XXX_other 0.000314746093678
component_groups float 0.000310926831613
components C-0388 0.000302278406629
components C-1660 0.000301596569586
components C-1841 0.000292925823786
material_id SP-0037 0.000285887774193
components C-1420 0.000279613221122
component_types CP-022 0.00025549801054
end_a EF-009 0.000252238007571
specs SP-0009 0.000245710348514
component_types CP-006 0.000241945593798
components C-1623 0.00023667153738
num_other 0.000236171443118
components C-1352 0.000234926250705
specs SP-0082 0.000233135750895
specs SP-0068 0.000230188500353
bracketing_pattern (1, 3, 5, 10) 0.000228629101789
end_a NONE 0.000225440109426
specs SP-0024 0.000225079091768
components C-1421 0.000224909925009
ends EF-015 0.000224893601183
supplier S-0030 0.000221546827393
ends EF-021 0.000217283682829
end_a EF-018 0.000216713673082
end_a EF-023 0.000215192396623
end_x EF-017 0.000212236964845
component_types CP-018 0.000210014683481
components C-1624 0.000207964221124
specs SP-0067 0.000205438133653
components C-1477 0.000202425697757
component_types CP-015 0.00020091370904
end_x EF-018 0.000197402454039
end_a EF-017 0.00019734695511
components C-1630 0.000194171285957
supplier S-0070 0.000184619548642
components C-1629 0.000184151890598
components C-0318 0.000179677659614
components C-1628 0.000178318698203
components C-1846 0.000174960195123
components C-1620 0.000174621607547
components C-1622 0.000167417472555
specs SP-0057 0.000165852446567
components C-1631 0.000160765476401
components C-1243 0.000158186336866
supplier S-0081 0.000157078170046
component_types CP-002 0.000157069474538
end_a EF-021 0.000153133839288
bracketing_pattern (1, 2, 3, 5, 10, 20) 0.000150080674443
specs SP-0016 0.000149213289195
components C-1627 0.000149202442291
component_types CP-003 0.000147526465488
bracketing_pattern (3, 5, 7, 9) 0.000147156131163
components C-1332 0.000144051838362
components C-1661 0.000143578629324
specs SP-0061 0.000143299983178
end_a EF-015 0.000141683337143
components C-1344 0.000139285044509
specs SP-0022 0.000133241984543
material_id SP-0019 0.000128828541049
components C-2005 0.000126543787982
bracketing_pattern (3, 6, 9, 12) 0.000126161186173
components C-0218 0.000125239689218
end_x EF-012 0.000124740070055
components C-1369 0.000124726553744
components C-1641 0.000123351286297
end_x EF-023 0.000122732454987
material_id nan 0.000122412940539
components C-1625 0.000122132961337
supplier S-0104 0.000121832481046
end_a EF-012 0.000121610238732
components C-1761 0.000121038707027
components C-1374 0.000118811999634
components C-1206 0.000117423321264
component_groups elbow 0.000114112084562
components C-1244 0.000112151867102
components C-1781 0.000111932618304
components C-1845 0.000111623984398
bracketing_pattern (1, 2) 0.000110656911661
components C-1743 0.000110066235116
supplier S-0014 0.00010947081804
num_bracket 0.000106723321456
components C-2028 0.000106337913465
bracketing_pattern (1, 3, 5, 10, 15) 0.000102278671221
ends EF-002 0.000101515878433
ends EF-019 0.00010046899181
components C-1906 0.000100275675672
material_id SP-0048 9.98245027806e-05
material_id SP-0041 9.16355646926e-05
components C-1873 8.88076977229e-05
components C-0449 8.80252279275e-05
components C-1655 8.74049592549e-05
material_id SP-0046 8.66593449846e-05
components C-0448 8.62551503146e-05
components C-1632 8.45001118816e-05
components C-0001 8.35301791216e-05
bracketing_pattern (1, 3, 5, 7, 10) 8.0705988663e-05
components C-2030 8.05666686674e-05
bracketing_pattern (3, 5, 10) 7.78528423667e-05
bracketing_pattern (10, 15, 20, 25, 30) 7.78194589155e-05
components C-0444 7.66681560261e-05
end_a EF-002 7.61449754127e-05
specs SP-0079 7.37692527884e-05
components C-1654 7.23672534558e-05
bracketing_pattern (1, 3, 5) 7.13336410784e-05
bracketing_pattern (5, 10, 15) 6.98010563729e-05
ends EF-010 6.84411586774e-05
bracketing_pattern (10, 25, 40, 55, 70) 6.81958088958e-05
specs SP-0013 6.81585211016e-05
components C-1860 6.81552331803e-05
components C-1649 6.78224483628e-05
bracketing_pattern (1, 6) 6.77584812443e-05
bracketing_pattern (1, 2, 3, 5, 10) 6.75769474194e-05
ends EF-008 6.74735523004e-05
specs SP-0021 6.63862586721e-05
bracketing_pattern (1, 3, 5, 10, 20, 30) 6.59810251604e-05
bracketing_pattern (1, 2, 5, 10, 25, 50, 100) 6.59412342324e-05
components C-1848 6.59134312896e-05
components C-1638 6.57260692215e-05
end_x EF-015 6.51793111003e-05
components C-0227 6.49307119815e-05
bracketing_pattern (4, 8, 12, 16) 6.4625572095e-05
components C-1639 6.34625628477e-05
bracketing_pattern (1, 5, 10, 15, 20) 6.30275074496e-05
components C-1205 6.25079690917e-05
components C-0409 6.23639541786e-05
components C-1643 6.19386330228e-05
components C-1640 6.13637271779e-05
bracketing_pattern (2, 5) 6.06917499177e-05
components C-1312 6.04549303952e-05
bracketing_pattern (2, 4, 6, 8) 5.9704906945e-05
component_groups adaptor 5.91878098136e-05
material_id SP-0033 5.91578147664e-05
component_types CP-027 5.79498688457e-05
components C-1728 5.70956745968e-05
end_a XXX_other 5.69844108862e-05
component_types CP-010 5.6914224535e-05
components C-1646 5.67361901848e-05
components C-1642 5.52233426553e-05
specs SP-0017 5.47705872318e-05
components C-0562 5.41599956672e-05
specs SP-0005 5.36119663976e-05
components C-2043 5.32189754561e-05
components C-1375 5.26578914515e-05
bracketing_pattern (2, 3, 4, 5) 5.17117471216e-05
component_types CP-028 5.16714611672e-05
components C-0210 4.96615735391e-05
supplier S-0027 4.92960625473e-05
components C-1434 4.92401588998e-05
end_x EF-008 4.79653899644e-05
supplier S-0105 4.78478459061e-05
bracketing_pattern (1, 30) 4.77197416834e-05
end_a EF-008 4.71755994897e-05
components C-0823 4.68311204977e-05
supplier S-0005 4.65959608236e-05
end_x EF-021 4.51880034761e-05
components C-1637 4.42636838435e-05
components C-1476 4.39019661298e-05
bracketing_pattern (1, 3) 4.38449804946e-05
bracketing_pattern (10, 20, 30) 4.26666610726e-05
bracketing_pattern (1, 2, 3, 5, 10, 20, 50) 4.18971983243e-05
components C-1994 4.16140681201e-05
components C-1716 4.12398445971e-05
ends EF-005 3.98406873526e-05
components C-0579 3.76880167459e-05
components C-1645 3.70674971802e-05
bracketing_pattern (2, 4, 6, 8, 10) 3.69935659299e-05
bracketing_pattern (1, 5) 3.69419380658e-05
components C-0580 3.61783664231e-05
components C-1644 3.61336573655e-05
components C-1914 3.59233600435e-05
components C-1714 3.57689452143e-05
components C-1439 3.51261277719e-05
components C-1885 3.41602795397e-05
components C-1635 3.40494843054e-05
components C-1411 3.40337617048e-05
bracketing_pattern (10, 15, 20, 25) 3.36146221918e-05
component_types CP-007 3.34232928364e-05
components C-0007 3.3219241815e-05
components C-1633 3.31345082134e-05
specs SP-0050 3.26372226167e-05
components C-1650 3.25429692057e-05
components C-1866 3.22612030971e-05
specs SP-0029 3.1969773083e-05
components C-1425 3.13253481451e-05
material_id SP-0039 3.08920273007e-05
bracketing_pattern (1, 20, 50) 2.95282167946e-05
components C-1715 2.95140785679e-05
end_x EF-010 2.87600553945e-05
bracketing_pattern (1, 2, 4) 2.84927664615e-05
components C-1647 2.78896247343e-05
specs SP-0025 2.76718036019e-05
component_types CP-016 2.74808984204e-05
bracketing_pattern (1, 3, 5, 10, 20) 2.73253504975e-05
specs SP-0076 2.69730565731e-05
bracketing_pattern (1, 2, 5) 2.67505215525e-05
components C-1229 2.67019187217e-05
ends EF-013 2.66679086365e-05
components C-0228 2.66610374587e-05
components C-1908 2.66106158807e-05
bracketing_pattern (1, 10) 2.64582736945e-05
components C-0445 2.63140297114e-05
components C-1821 2.58221514342e-05
material_id XXX_other 2.56635724412e-05
ends XXX_other 2.54715837113e-05
components C-1209 2.52463521878e-05
specs SP-0088 2.5198598288e-05
bracketing_pattern (5, 10, 20) 2.51546377071e-05
components C-1555 2.468217296e-05
component_types CP-012 2.42581425772e-05
components C-0122 2.41358398817e-05
components C-0048 2.27629717756e-05
component_types CP-001 2.25380533392e-05
components C-1758 2.19335577904e-05
components C-1398 2.13271141308e-05
components C-0004 2.1099415625e-05
components C-0002 2.0867365055e-05
components C-0214 2.08251205393e-05
bracketing_pattern (5, 10, 25) 2.06767912519e-05
bracketing_pattern (1, 4) 2.04724367565e-05
bracketing_pattern (5, 20) 1.98323783791e-05
components C-0199 1.97383075672e-05
components C-0244 1.87982649164e-05
components C-1718 1.87288657612e-05
components C-1907 1.86362141201e-05
bracketing_pattern (1, 2, 5, 10, 25) 1.83260549657e-05
bracketing_pattern (1, 2, 5, 25, 35) 1.8269094164e-05
components C-1445 1.71056483256e-05
components C-1817 1.69585358271e-05
specs SP-0051 1.69404156167e-05
supplier S-0043 1.69304909649e-05
end_a EF-013 1.68044034864e-05
component_types CP-023 1.6785194587e-05
components C-1373 1.66789011576e-05
components C-1910 1.61015029809e-05
bracketing_pattern (5, 10) 1.5877449527e-05
bracketing_pattern (1, 2, 4, 8, 16) 1.54474091609e-05
end_x XXX_other 1.54004606185e-05
bracketing_pattern (1, 2, 3, 5, 7) 1.50464045378e-05
bracketing_pattern (10, 15, 25) 1.47172088736e-05
bracketing_pattern (1, 2, 5, 10, 25, 50) 1.43043446026e-05
components C-1652 1.42203723511e-05
material_id SP-0034 1.4059549659e-05
bracketing_pattern (10, 20, 30, 40) 1.40522811266e-05
components C-1850 1.36417533668e-05
bracketing_pattern (2, 10, 25, 50, 100) 1.34598777078e-05
components C-0751 1.33967612039e-05
components C-0599 1.33865522771e-05
bracketing_pattern (1, 12) 1.3165845777e-05
components C-2004 1.30782815485e-05
bracketing_pattern (5, 10, 15, 20, 25) 1.27823321131e-05
specs SP-0075 1.27190428573e-05
components C-1672 1.26988235051e-05
components C-2027 1.25432801895e-05
bracketing_pattern (1, 3, 5, 10, 15, 25) 1.25365112202e-05
components C-1354 1.2427974104e-05
components C-0550 1.23301683238e-05
component_groups XXX_other 1.22619347135e-05
specs SP-0065 1.22511688427e-05
components C-1355 1.20704158602e-05
components C-1218 1.17079010619e-05
components C-0674 1.13707969193e-05
bracketing_pattern (20, 40, 60, 80) 1.13707013241e-05
components C-1348 1.04493797229e-05
components C-1626 1.03271908662e-05
components C-1417 1.03232750996e-05
component_types CP-009 1.02697965219e-05
components C-1867 1.02450483439e-05
bracketing_pattern (10, 15, 20) 1.01447621441e-05
bracketing_pattern (10, 15, 20, 30) 1.01241157971e-05
components C-1877 1.01061364401e-05
ends EF-011 9.99978593796e-06
components C-1505 9.75158987263e-06
components C-0520 9.68734430525e-06
components C-1619 9.57617580088e-06
specs SP-0062 9.51276762214e-06
components C-2001 9.32808626692e-06
components C-2032 9.27870852225e-06
end_a EF-016 9.22560456961e-06
specs SP-0002 8.93333982699e-06
components C-1677 8.82204399857e-06
bracketing_pattern (4, 10) 8.75373438531e-06
components C-1881 8.64284100586e-06
components C-0539 8.58019785365e-06
bracketing_pattern (1, 3, 5, 10, 25) 8.54966400711e-06
components C-1233 8.51077832382e-06
components C-1235 8.40985698755e-06
components C-1653 8.39388268416e-06
components C-1203 8.35274636061e-06
components C-1577 8.34486399671e-06
bracketing_pattern (8, 16, 24, 32, 48) 8.26359412459e-06
components C-0422 8.21169051539e-06
end_a EF-019 8.11135879727e-06
components C-2006 7.92717020306e-06
components C-0062 7.92145426339e-06
components C-2029 7.91611183633e-06
components C-1711 7.56277397214e-06
bracketing_pattern (3, 4, 5, 6) 7.27805027407e-06
end_x EF-002 7.18876310032e-06
components C-1343 7.15906261445e-06
components C-0401 6.89428540374e-06
end_a EF-005 6.85562009538e-06
components C-0165 6.78825689733e-06
bracketing_pattern (1, 2, 5, 10) 6.778094396e-06
components C-1386 6.62576767073e-06
component_types CP-011 6.53815148452e-06
components C-1385 6.50572258956e-06
components C-1909 6.25020583616e-06
components C-1231 6.04841733843e-06
components C-0102 5.99490483439e-06
component_types XXX_other 5.97279272698e-06
components C-1889 5.91862183882e-06
components C-1648 5.75165182322e-06
bracketing_pattern (6, 12, 18, 24) 5.64250688919e-06
bracketing_pattern (25, 50, 75, 100) 5.62471640837e-06
bracketing_pattern (1, 8) 5.62371586614e-06
components C-1663 5.58949667706e-06
supplier S-0042 5.43757871768e-06
components C-0209 5.37695619636e-06
bracketing_pattern (1, 2, 3, 4, 6) 5.36489054993e-06
components C-1659 5.18456667804e-06
components C-0003 5.08859937885e-06
components C-1658 5.08537351715e-06
components C-1750 4.96944650118e-06
bracketing_pattern (1, 2, 3, 5, 10, 20, 50, 100) 4.96714917057e-06
component_types CP-019 4.8724329727e-06
components C-1435 4.8710757468e-06
supplier S-0080 4.83971910841e-06
specs SP-0010 4.78729290872e-06
components C-1936 4.76115385044e-06
end_x EF-016 4.75760731378e-06
ends EF-016 4.6964228908e-06
components C-2003 4.66937473994e-06
components C-0215 4.64093594543e-06
components C-0133 4.63177556745e-06
ends EF-001 4.5776952742e-06
components C-2002 4.52195155952e-06
components C-0389 4.51805357423e-06
components C-0826 4.24607982843e-06
material_id SP-0036 4.23621410707e-06
specs SP-0072 4.19693038141e-06
bracketing_pattern (8, 16, 24, 32) 4.05716647957e-06
components C-1615 3.96908139709e-06
components C-0052 3.94183801325e-06
components C-1898 3.81253545581e-06
components C-0275 3.79783807643e-06
components C-1745 3.71041108598e-06
components C-0208 3.68745135759e-06
components C-1954 3.57120469616e-06
supplier S-0031 3.43775764052e-06
components C-1880 3.41074715248e-06
bracketing_pattern (1, 5, 10, 20) 3.39875597325e-06
components C-1242 3.37713074278e-06
components C-1779 3.18681945168e-06
bracketing_pattern (3, 5, 10, 20) 3.17301046442e-06
bracketing_pattern (50, 50) 3.15558297937e-06
components C-1670 3.15147739535e-06
components C-1345 3.13155808556e-06
components C-1724 3.00997535693e-06
components C-1554 2.9372211182e-06
components C-1536 2.83792726925e-06
components C-0250 2.72616310495e-06
specs SP-0071 2.56963970547e-06
components C-1970 2.53058124816e-06
components C-1547 2.52951687681e-06
components C-1313 2.51052864946e-06
bracketing_pattern (2, 3, 4, 6) 2.50747293013e-06
end_x EF-006 2.4327474239e-06
components C-1995 2.20049675715e-06
bracketing_pattern (15, 25, 35) 2.01320998601e-06
components C-0616 1.97257090863e-06
bracketing_pattern (5, 10, 20, 50, 100) 1.93054763385e-06
ends EF-006 1.90161104972e-06
components C-1739 1.89264686974e-06
components C-1349 1.81155548825e-06
components C-1901 1.81029091704e-06
components C-1565 1.78699748823e-06
bracketing_pattern (4, 6, 8, 10) 1.76608557563e-06
end_x EF-019 1.62703897381e-06
material_id SP-0030 1.62529958459e-06
components C-0333 1.6158719998e-06
components C-1208 1.4621328468e-06
components C-0051 1.40066871802e-06
components C-1963 1.32719465938e-06
components C-1869 1.31909512111e-06
components C-1586 1.31595010516e-06
bracketing_pattern (1, 5, 10, 20, 50) 1.27424580508e-06
component_groups hfl 1.23668688538e-06
components C-2008 1.2029247874e-06
components C-0473 1.15378375234e-06
components C-0071 1.14807133696e-06
components C-2026 1.13411584041e-06
components C-1405 1.12120590629e-06
components C-1017 1.11198821162e-06
components C-0045 1.05552220215e-06
end_x EF-013 1.03127088356e-06
components C-0120 1.0065910816e-06
components C-1430 9.20207877068e-07
components C-0703 9.11274508229e-07
components C-1502 8.96585932467e-07
bracketing_pattern (4, 15) 8.44596942868e-07
components C-0369 8.44476865999e-07
components C-1533 8.24030991611e-07
components C-0134 7.10079639155e-07
components C-1459 6.46251194746e-07
bracketing_pattern (2, 3, 4) 6.43517156386e-07
components C-2019 6.33373335128e-07
components C-1230 5.92438499702e-07
bracketing_pattern (25, 50, 75, 100, 290, 325, 350) 5.23895151422e-07
components C-1200 4.77132052577e-07
components C-1614 4.70891711452e-07
bracketing_pattern (1, 2, 4, 9, 19) 4.32796095079e-07
bracketing_pattern (1, 15) 4.02967113652e-07
components C-0095 3.67201891047e-07
components C-1428 3.28002742163e-07
components C-0577 3.17016715166e-07
components C-0217 2.33048239881e-07
components C-1956 2.27639437564e-07
components C-1768 2.20931635616e-07
components C-1808 2.20021084155e-07
bracketing_pattern (30, 60, 90, 120) 1.64480313901e-07
components C-1998 1.51853876196e-07
components C-0434 1.31564039263e-07
components C-0548 1.09294312076e-07
components C-1651 8.11810472781e-08
components C-0058 7.46247038508e-08
components C-1725 5.88220384433e-08
components C-1976 4.30401428608e-08
components C-1183 2.88954807013e-08
components C-0844 1.19714040755e-08
components C-0544 3.39478619444e-09
components C-0699 2.4604254084e-09
components C-0855 1.15644465546e-09
components C-0057 6.6522980638e-11
components C-1541 0.0
components C-2017 0.0
In [41]:
print_feature_importances(X_train_feats, reg.estimators_[0])
adj_quantity 0.383636015766
diameter 0.11265538548
quantity 0.102695954927
annual_usage 0.0695016293123
min_order_quantity 0.0562030720638
length 0.0381054124711
components other 0.0315695194441
quote_date_days_since_1900 0.0254373647876
end_a EF-003 0.0230288972112
supplier S-0054 0.0135606124729
bend_radius 0.0120611480668
num_bends 0.00732055109556
wall_thickness 0.00693447963585
bracketing (1, 3, 5, 7, 9) 0.00655966182841
supplier S-0026 0.00653500089742
supplier S-0041 0.00649436623324
end_x EF-003 0.00597924355396
supplier S-0066 0.00425901069624
bracketing other 0.00362096411591
supplier S-0072 0.00351878035727
end_a_2x 0.00322958809483
bracketing (1, 6, 20) 0.00272968076017
supplier S-0064 0.00271985531768
components C-1727 0.00266456998671
specs SP-0061 0.00201049974401
end_x EF-023 0.0019811121209
end_x_2x 0.0018908052284
bracketing (1, 2, 3, 4, 5) 0.00179992430602
components C-1629 0.00175922658999
bracketing (5, 19, 20) 0.00175655502861
components C-1445 0.00169516258566
material_id SP-0038 0.00163143076106
components C-1620 0.00155105265457
components C-1628 0.00151325950589
end_x_1x 0.00149259819424
supplier S-0058 0.00148633126539
specs SP-0080 0.00141176718609
bracketing (1, 2, 5, 10, 25, 50, 100, 250) 0.00138330011763
components C-1631 0.00135927604582
components C-1625 0.00117895468146
components C-1630 0.00117411707906
num_boss 0.00117243981416
material_id SP-0008 0.00116095880197
components C-0494 0.00114248312637
components C-1632 0.00109589686566
specs SP-0013 0.00108569527849
material_id SP-0029 0.00103764825364
components C-1621 0.00102050301895
material_id SP-0035 0.000811347907018
material_id SP-0028 0.000750775056743
end_x EF-009 0.000716770235415
supplier other 0.000708245282395
num_other 0.000677569478728
end_x NONE 0.000670026892566
supplier S-0070 0.000667505136866
end_x EF-017 0.000632267774812
components C-0388 0.0005921997119
components C-0211 0.000571766546675
bracketing (3, 6, 9, 12) 0.000548738341382
specs SP-0004 0.000547858607559
components C-0318 0.000532190637419
bracket_pricing 0.000524308375894
components C-1374 0.000521688947929
components C-1420 0.000516932776341
specs SP-0026 0.000507378840347
components C-1348 0.000505770085087
end_a_1x 0.00049439351786
components C-1642 0.000488114677672
components C-1624 0.000482120119519
components C-1627 0.000473911779081
end_a EF-012 0.000465729893854
specs SP-0012 0.000465232397152
bracketing (5, 10, 15, 20) 0.000463203262157
material_id nan 0.000441544155973
end_x EF-015 0.000429855492134
specs SP-0024 0.000428366074738
material_id SP-0030 0.000413055334099
bracketing (1, 2) 0.000405244200577
end_x EF-018 0.00038643375626
specs SP-0070 0.000369730895964
components C-0214 0.000369159864048
components C-1622 0.000342248013436
components C-1728 0.000339032633253
components C-2005 0.000336874329654
components C-1206 0.000334466267271
end_x EF-010 0.000331669412969
supplier S-0013 0.000329021057539
components C-0007 0.000328949646538
specs SP-0058 0.000319889437619
components C-1633 0.000317508709672
components C-1538 0.000300508463423
components C-1344 0.000298549046394
supplier S-0081 0.000291926723492
components C-1623 0.000291712701129
supplier S-0027 0.000284070729856
specs SP-0069 0.000275461405875
components C-0444 0.000270851172738
specs SP-0082 0.000266782158924
components C-1244 0.000259197043884
components C-1715 0.000246427408757
components C-1994 0.000233236817211
adj_bracketing 0.000230158677171
end_a EF-018 0.000227300393389
components C-1647 0.000222678761935
specs SP-0063 0.000219077848609
components C-1845 0.0002166661071
end_a NONE 0.000211561268169
bracketing (10, 15, 25) 0.000209025800261
components C-1476 0.000204997500582
components C-1761 0.00020241719235
components C-1312 0.000197951714539
end_a EF-017 0.000197072988464
components C-1641 0.000190618686701
specs SP-0007 0.000188878614156
components C-0448 0.000181812230456
components C-0218 0.000181782237602
specs SP-0072 0.000180886449167
bracketing (5, 10, 15) 0.000175899339024
components C-0699 0.000157119091731
material_id SP-0037 0.000151328909491
components C-1988 0.000148460742909
components C-1637 0.000143957561893
components C-0449 0.000138582283306
supplier S-0062 0.000138044091798
components C-1644 0.00013529158223
components C-0199 0.000135042254606
components C-2028 0.000133854275112
bracketing (4, 8, 12, 16) 0.000131422757824
supplier S-0104 0.000116556269951
components C-1332 0.000111880235614
components C-1578 0.000111663633782
components C-2004 0.000110424940103
components C-1714 0.000107088456679
end_a EF-023 0.000106034166815
specs SP-0002 0.000104916105956
components C-1758 0.00010472121398
specs SP-0068 0.000102611421903
components C-1638 0.000101823221063
components C-1649 0.000100476611072
components C-0562 9.92544215329e-05
components C-1639 9.69995245315e-05
components C-0001 9.59954712466e-05
specs SP-0022 9.34276477631e-05
components C-1643 9.18138295022e-05
components C-1475 9.08724576333e-05
components C-1243 8.784350496e-05
components C-1385 8.75250637801e-05
components C-1645 8.63750320936e-05
components C-1846 8.48626085218e-05
components C-1229 8.18437632439e-05
components C-0215 7.78873782512e-05
bracketing (2, 4, 6, 8) 7.64379159596e-05
components C-1640 6.89054470563e-05
bracketing (1, 2, 3, 4) 6.77419289155e-05
components C-1652 6.74438869018e-05
supplier S-0030 6.63294594847e-05
bracketing (1, 1) 6.62146954212e-05
components C-2030 6.5977869423e-05
num_bracket 6.23285184964e-05
components C-0401 6.01990487372e-05
components C-1386 5.90394717556e-05
bracketing (5, 20) 5.86368175499e-05
components C-0369 5.77981712087e-05
components C-0209 5.76716838244e-05
components C-2008 5.7364530528e-05
components C-0228 5.71560273149e-05
components C-0823 5.62869206668e-05
components C-0210 5.22349887708e-05
bracketing (1, 2, 3, 5, 10, 20) 5.12315032196e-05
components C-1421 5.10993528026e-05
components C-1654 5.05708755733e-05
components C-1910 4.98112136935e-05
components C-0616 4.90234617458e-05
components C-1428 4.82279513977e-05
specs SP-0067 4.62225000829e-05
components C-0002 4.59296154156e-05
end_a EF-009 4.57196762653e-05
specs other 4.54241477038e-05
material_id SP-0019 4.29684988937e-05
material_id SP-0033 4.26299145413e-05
material_id SP-0048 4.17607772931e-05
components C-0703 4.16021438175e-05
components C-1209 4.15561108514e-05
components C-0826 4.0761928162e-05
components C-0751 3.97016634044e-05
components C-1411 3.96028499753e-05
components C-1653 3.94804611359e-05
components C-1661 3.82966975231e-05
components C-0674 3.79040818256e-05
components C-1743 3.73130733148e-05
end_x EF-008 3.60616827439e-05
supplier S-0014 3.53314339139e-05
components C-1660 3.42220208744e-05
specs SP-0017 3.40170770736e-05
components C-0208 3.35926198656e-05
components C-1739 3.13372266334e-05
components C-2032 2.98302041225e-05
specs SP-0071 2.98082497328e-05
components C-0389 2.97119256948e-05
supplier S-0042 2.89996188934e-05
components C-1936 2.86199437474e-05
end_a EF-005 2.74271490009e-05
bracketing (1, 2, 5, 10, 25, 50, 100) 2.72839215646e-05
end_x EF-012 2.71983164177e-05
bracketing (10, 20, 30, 40) 2.57954162805e-05
components C-1218 2.55690602847e-05
components C-1375 2.55666826596e-05
components C-1655 2.50971210066e-05
end_x other 2.50834403973e-05
components C-1646 2.42271107274e-05
bracketing (5, 10, 20) 2.38957548547e-05
end_a EF-002 2.38020288622e-05
components C-1547 2.35451379929e-05
bracketing (1, 20, 50) 2.33661768344e-05
components C-1711 2.32432683637e-05
specs SP-0029 2.28986789597e-05
components C-0579 2.16116956201e-05
components C-1821 2.12694962824e-05
bracketing (1, 2, 3, 5, 10) 2.09338083093e-05
components C-1369 2.04286477172e-05
bracketing (10, 20, 30) 2.04147350776e-05
components C-1354 2.00438288676e-05
bracketing (2, 3, 4, 5) 1.90384844659e-05
components C-1848 1.72630540276e-05
components C-1648 1.71875318313e-05
components C-1349 1.70841117094e-05
components C-1841 1.70611766983e-05
components C-0599 1.69053358801e-05
end_a EF-008 1.6396332103e-05
components C-1860 1.6190525044e-05
specs SP-0062 1.59255513044e-05
material_id SP-0039 1.56349077215e-05
bracketing (2, 4, 6, 8, 10) 1.53668312728e-05
components C-1425 1.53451528214e-05
components C-1345 1.53310210595e-05
components C-1718 1.49635114696e-05
end_a other 1.45764598148e-05
supplier S-0105 1.43780989265e-05
supplier S-0031 1.35059395344e-05
components C-1373 1.30821925073e-05
bracketing (1, 2, 5, 10, 25, 50) 1.26868813621e-05
components C-1352 1.22936513956e-05
components C-1355 1.19354887843e-05
specs SP-0088 1.19077441236e-05
components C-1817 1.18295832541e-05
components C-0071 1.16808312004e-05
components C-1907 1.00175013924e-05
end_a EF-015 9.0446731003e-06
components C-2026 8.9964923745e-06
components C-1889 8.92583580494e-06
components C-0520 8.63346069474e-06
material_id SP-0046 8.52064383904e-06
components C-0133 8.32951752599e-06
components C-1635 7.98100532983e-06
components C-1555 7.6699999422e-06
components C-1435 7.46719595893e-06
components C-0004 7.4368328476e-06
specs SP-0075 7.29707174103e-06
components C-1808 6.68963056967e-06
components C-1909 6.58630733314e-06
components C-1502 6.44834882603e-06
components C-0445 6.2963898138e-06
components C-1577 6.14850242092e-06
components C-1716 6.1108696049e-06
components C-1781 6.07760955754e-06
components C-1343 6.0621257779e-06
components C-1200 5.60838120741e-06
components C-1619 5.3414653536e-06
components C-1203 5.33771728596e-06
components C-0120 5.08578872808e-06
specs SP-0016 5.00750718903e-06
bracketing (1, 2, 3, 4, 6) 4.81395754809e-06
components C-0051 4.70336003006e-06
end_a EF-019 4.69028078163e-06
end_x EF-021 4.61472574765e-06
components C-2002 4.52927159263e-06
components C-0580 4.37367008084e-06
components C-2006 4.37249878699e-06
components C-0165 4.18563490484e-06
components C-1998 4.1552887729e-06
components C-1359 3.94586583282e-06
bracketing (1, 3, 5, 10, 20, 30) 3.88733815672e-06
components C-0539 3.34396154182e-06
components C-1205 3.30239416478e-06
components C-1651 2.98401014072e-06
components C-0003 2.72275087761e-06
components C-1995 2.30832995648e-06
components C-1885 2.30266879945e-06
components C-2029 2.1909166846e-06
end_a EF-021 1.99546862239e-06
material_id SP-0036 1.99144350476e-06
components C-1866 1.76173124487e-06
components C-0052 1.56267948326e-06
components C-1906 1.47574336792e-06
components C-1880 1.27404592074e-06
specs SP-0021 1.2637868979e-06
specs SP-0050 1.14342565426e-06
end_a EF-016 1.13547056713e-06
end_x EF-006 1.13302762778e-06
components C-2001 1.13053793398e-06
components C-1658 1.05280596547e-06
end_x EF-005 9.7616460776e-07
components C-1439 9.30722729541e-07
bracketing (10, 25, 40, 55, 70) 9.26520744767e-07
components C-1908 9.01901089776e-07
components C-1745 8.31579829721e-07
components C-0244 7.66989407518e-07
specs SP-0079 7.39455301378e-07
components C-1313 7.09511113469e-07
components C-1430 6.58127795915e-07
end_x EF-002 6.48692433138e-07
components C-0544 5.48127738543e-07
specs SP-0009 5.45082839386e-07
components C-1230 2.99891004885e-07
bracketing (5, 10) 2.7844640635e-07
components C-0250 2.70879707541e-07
components C-2027 2.65268140311e-07
components C-0095 1.74853153489e-07
components C-1233 1.69612173499e-07
components C-0679 1.69026275083e-07
bracketing (1, 3, 5) 1.49497883727e-07
components C-1626 1.35488180424e-07
specs SP-0057 1.34866505179e-07
components C-1873 1.2875652428e-07
components C-1533 1.13859136542e-07
supplier S-0043 1.00844925296e-07
components C-1867 6.49978607155e-08
components C-0409 6.02359440954e-08
components C-0134 3.71968832268e-08
specs SP-0025 1.10235814372e-08
components C-2003 5.99359438464e-09
bracketing (4, 10) 2.56866652346e-10
supplier S-0005 0.0
supplier S-0080 0.0
supplier S-0092 0.0
material_id other 0.0
material_id SP-0041 0.0
material_id SP-0034 0.0
end_x EF-019 0.0
specs SP-0065 0.0
specs SP-0076 0.0
specs SP-0010 0.0
specs SP-0005 0.0
components C-0217 0.0
components C-1869 0.0
components C-1898 0.0
components C-0102 0.0
components C-0122 0.0
components C-1779 0.0
components C-0422 0.0
components C-0227 0.0
components C-0550 0.0
components C-1850 0.0
components C-0577 0.0
components C-1536 0.0
components C-1405 0.0
components C-1663 0.0
components C-1954 0.0
components C-1881 0.0
components C-0434 0.0
components C-1963 0.0
components C-1242 0.0
components C-1902 0.0
components C-1183 0.0
components C-1614 0.0
components C-1615 0.0
components C-1750 0.0
components C-0275 0.0
components C-1723 0.0
components C-0333 0.0
components C-1650 0.0
components C-1970 0.0
components C-1235 0.0
components C-1565 0.0
components C-1672 0.0
Out[41]:
[('adj_quantity', 0.38363601576561601),
('diameter', 0.11265538548010036),
('quantity', 0.10269595492660942),
('annual_usage', 0.06950162931234817),
('min_order_quantity', 0.056203072063815751),
('length', 0.03810541247114544),
('components other', 0.031569519444086327),
('quote_date_days_since_1900', 0.02543736478764887),
('end_a EF-003', 0.0230288972111805),
('supplier S-0054', 0.013560612472883808),
('bend_radius', 0.01206114806682235),
('num_bends', 0.0073205510955589494),
('wall_thickness', 0.0069344796358468477),
('bracketing (1, 3, 5, 7, 9)', 0.0065596618284128761),
('supplier S-0026', 0.0065350008974187574),
('supplier S-0041', 0.0064943662332401595),
('end_x EF-003', 0.0059792435539641866),
('supplier S-0066', 0.004259010696238426),
('bracketing other', 0.0036209641159054905),
('supplier S-0072', 0.0035187803572677184),
('end_a_2x', 0.0032295880948316454),
('bracketing (1, 6, 20)', 0.0027296807601701685),
('supplier S-0064', 0.0027198553176849168),
('components C-1727', 0.0026645699867110899),
('specs SP-0061', 0.0020104997440077992),
('end_x EF-023', 0.0019811121208955433),
('end_x_2x', 0.001890805228403195),
('bracketing (1, 2, 3, 4, 5)', 0.0017999243060214489),
('components C-1629', 0.0017592265899859208),
('bracketing (5, 19, 20)', 0.0017565550286144176),
('components C-1445', 0.0016951625856636871),
('material_id SP-0038', 0.0016314307610550136),
('components C-1620', 0.0015510526545683026),
('components C-1628', 0.0015132595058863689),
('end_x_1x', 0.001492598194244137),
('supplier S-0058', 0.0014863312653891935),
('specs SP-0080', 0.0014117671860884029),
('bracketing (1, 2, 5, 10, 25, 50, 100, 250)', 0.0013833001176273923),
('components C-1631', 0.001359276045822396),
('components C-1625', 0.0011789546814565536),
('components C-1630', 0.0011741170790557848),
('num_boss', 0.0011724398141590591),
('material_id SP-0008', 0.0011609588019658511),
('components C-0494', 0.0011424831263704492),
('components C-1632', 0.0010958968656606582),
('specs SP-0013', 0.001085695278486161),
('material_id SP-0029', 0.0010376482536365375),
('components C-1621', 0.0010205030189489029),
('material_id SP-0035', 0.00081134790701762965),
('material_id SP-0028', 0.0007507750567432701),
('end_x EF-009', 0.00071677023541510108),
('supplier other', 0.00070824528239519255),
('num_other', 0.00067756947872756284),
('end_x NONE', 0.00067002689256649803),
('supplier S-0070', 0.00066750513686611695),
('end_x EF-017', 0.00063226777481218495),
('components C-0388', 0.00059219971189985034),
('components C-0211', 0.00057176654667459787),
('bracketing (3, 6, 9, 12)', 0.0005487383413816377),
('specs SP-0004', 0.00054785860755876851),
('components C-0318', 0.00053219063741945325),
('bracket_pricing', 0.00052430837589377002),
('components C-1374', 0.00052168894792893128),
('components C-1420', 0.000516932776341325),
('specs SP-0026', 0.00050737884034682333),
('components C-1348', 0.00050577008508694888),
('end_a_1x', 0.00049439351786015209),
('components C-1642', 0.00048811467767163224),
('components C-1624', 0.00048212011951929669),
('components C-1627', 0.00047391177908093706),
('end_a EF-012', 0.00046572989385357396),
('specs SP-0012', 0.00046523239715237392),
('bracketing (5, 10, 15, 20)', 0.00046320326215673656),
('material_id nan', 0.00044154415597349192),
('end_x EF-015', 0.00042985549213350461),
('specs SP-0024', 0.00042836607473802416),
('material_id SP-0030', 0.00041305533409860599),
('bracketing (1, 2)', 0.00040524420057737694),
('end_x EF-018', 0.00038643375625969348),
('specs SP-0070', 0.00036973089596437401),
('components C-0214', 0.00036915986404806902),
('components C-1622', 0.00034224801343574787),
('components C-1728', 0.00033903263325254818),
('components C-2005', 0.00033687432965375593),
('components C-1206', 0.00033446626727052153),
('end_x EF-010', 0.00033166941296939044),
('supplier S-0013', 0.00032902105753861637),
('components C-0007', 0.00032894964653796746),
('specs SP-0058', 0.00031988943761883273),
('components C-1633', 0.00031750870967241983),
('components C-1538', 0.00030050846342315673),
('components C-1344', 0.00029854904639384824),
('supplier S-0081', 0.00029192672349205123),
('components C-1623', 0.00029171270112901765),
('supplier S-0027', 0.00028407072985561435),
('specs SP-0069', 0.00027546140587531557),
('components C-0444', 0.00027085117273753783),
('specs SP-0082', 0.000266782158924063),
('components C-1244', 0.00025919704388446287),
('components C-1715', 0.00024642740875693045),
('components C-1994', 0.00023323681721065486),
('adj_bracketing', 0.00023015867717103363),
('end_a EF-018', 0.00022730039338910861),
('components C-1647', 0.0002226787619345109),
('specs SP-0063', 0.00021907784860865947),
('components C-1845', 0.00021666610710039126),
('end_a NONE', 0.00021156126816906468),
('bracketing (10, 15, 25)', 0.00020902580026056791),
('components C-1476', 0.00020499750058238625),
('components C-1761', 0.00020241719235002841),
('components C-1312', 0.00019795171453905819),
('end_a EF-017', 0.00019707298846416597),
('components C-1641', 0.00019061868670103741),
('specs SP-0007', 0.00018887861415579739),
('components C-0448', 0.00018181223045589742),
('components C-0218', 0.00018178223760177958),
('specs SP-0072', 0.00018088644916745448),
('bracketing (5, 10, 15)', 0.00017589933902448471),
('components C-0699', 0.00015711909173146376),
('material_id SP-0037', 0.00015132890949106444),
('components C-1988', 0.00014846074290853441),
('components C-1637', 0.00014395756189296884),
('components C-0449', 0.00013858228330635821),
('supplier S-0062', 0.00013804409179775961),
('components C-1644', 0.0001352915822297109),
('components C-0199', 0.00013504225460629372),
('components C-2028', 0.00013385427511210623),
('bracketing (4, 8, 12, 16)', 0.0001314227578237464),
('supplier S-0104', 0.00011655626995126669),
('components C-1332', 0.00011188023561388369),
('components C-1578', 0.00011166363378198718),
('components C-2004', 0.00011042494010274742),
('components C-1714', 0.00010708845667857778),
('end_a EF-023', 0.00010603416681452122),
('specs SP-0002', 0.00010491610595568139),
('components C-1758', 0.0001047212139798022),
('specs SP-0068', 0.00010261142190343071),
('components C-1638', 0.00010182322106275377),
('components C-1649', 0.00010047661107218406),
('components C-0562', 9.9254421532945928e-05),
('components C-1639', 9.6999524531546938e-05),
('components C-0001', 9.5995471246596885e-05),
('specs SP-0022', 9.3427647763082348e-05),
('components C-1643', 9.1813829502182705e-05),
('components C-1475', 9.0872457633317397e-05),
('components C-1243', 8.7843504959966262e-05),
('components C-1385', 8.7525063780126633e-05),
('components C-1645', 8.6375032093622531e-05),
('components C-1846', 8.4862608521835462e-05),
('components C-1229', 8.1843763243920101e-05),
('components C-0215', 7.7887378251209605e-05),
('bracketing (2, 4, 6, 8)', 7.6437915959587718e-05),
('components C-1640', 6.8905447056347404e-05),
('bracketing (1, 2, 3, 4)', 6.7741928915451355e-05),
('components C-1652', 6.7443886901812663e-05),
('supplier S-0030', 6.6329459484680957e-05),
('bracketing (1, 1)', 6.6214695421222737e-05),
('components C-2030', 6.5977869423018316e-05),
('num_bracket', 6.2328518496429728e-05),
('components C-0401', 6.0199048737159793e-05),
('components C-1386', 5.9039471755595263e-05),
('bracketing (5, 20)', 5.863681754990062e-05),
('components C-0369', 5.7798171208651739e-05),
('components C-0209', 5.7671683824353138e-05),
('components C-2008', 5.7364530527953927e-05),
('components C-0228', 5.7156027314945498e-05),
('components C-0823', 5.6286920666834521e-05),
('components C-0210', 5.2234988770796883e-05),
('bracketing (1, 2, 3, 5, 10, 20)', 5.123150321964883e-05),
('components C-1421', 5.1099352802577456e-05),
('components C-1654', 5.0570875573319772e-05),
('components C-1910', 4.9811213693465316e-05),
('components C-0616', 4.9023461745767274e-05),
('components C-1428', 4.8227951397670014e-05),
('specs SP-0067', 4.6222500082881652e-05),
('components C-0002', 4.5929615415566347e-05),
('end_a EF-009', 4.5719676265270576e-05),
('specs other', 4.5424147703838558e-05),
('material_id SP-0019', 4.2968498893669546e-05),
('material_id SP-0033', 4.2629914541344416e-05),
('material_id SP-0048', 4.1760777293056824e-05),
('components C-0703', 4.1602143817510593e-05),
('components C-1209', 4.1556110851442574e-05),
('components C-0826', 4.0761928161967502e-05),
('components C-0751', 3.9701663404402049e-05),
('components C-1411', 3.9602849975349284e-05),
('components C-1653', 3.9480461135871229e-05),
('components C-1661', 3.8296697523131059e-05),
('components C-0674', 3.7904081825617037e-05),
('components C-1743', 3.7313073314827171e-05),
('end_x EF-008', 3.6061682743945512e-05),
('supplier S-0014', 3.5331433913911906e-05),
('components C-1660', 3.4222020874365969e-05),
('specs SP-0017', 3.4017077073567963e-05),
('components C-0208', 3.3592619865611666e-05),
('components C-1739', 3.1337226633445844e-05),
('components C-2032', 2.9830204122516084e-05),
('specs SP-0071', 2.9808249732775237e-05),
('components C-0389', 2.9711925694797658e-05),
('supplier S-0042', 2.8999618893412895e-05),
('components C-1936', 2.8619943747400447e-05),
('end_a EF-005', 2.7427149000859729e-05),
('bracketing (1, 2, 5, 10, 25, 50, 100)', 2.7283921564600774e-05),
('end_x EF-012', 2.7198316417671747e-05),
('bracketing (10, 20, 30, 40)', 2.5795416280484644e-05),
('components C-1218', 2.5569060284738155e-05),
('components C-1375', 2.5566682659585367e-05),
('components C-1655', 2.5097121006605537e-05),
('end_x other', 2.508344039725272e-05),
('components C-1646', 2.4227110727400143e-05),
('bracketing (5, 10, 20)', 2.3895754854671795e-05),
('end_a EF-002', 2.3802028862154762e-05),
('components C-1547', 2.3545137992938845e-05),
('bracketing (1, 20, 50)', 2.336617683436327e-05),
('components C-1711', 2.3243268363688979e-05),
('specs SP-0029', 2.2898678959665925e-05),
('components C-0579', 2.1611695620057748e-05),
('components C-1821', 2.1269496282435841e-05),
('bracketing (1, 2, 3, 5, 10)', 2.0933808309277324e-05),
('components C-1369', 2.0428647717195669e-05),
('bracketing (10, 20, 30)', 2.0414735077576392e-05),
('components C-1354', 2.0043828867604565e-05),
('bracketing (2, 3, 4, 5)', 1.9038484465894237e-05),
('components C-1848', 1.7263054027566445e-05),
('components C-1648', 1.7187531831338436e-05),
('components C-1349', 1.7084111709426927e-05),
('components C-1841', 1.7061176698274984e-05),
('components C-0599', 1.6905335880127749e-05),
('end_a EF-008', 1.6396332103048729e-05),
('components C-1860', 1.6190525043995357e-05),
('specs SP-0062', 1.592555130437708e-05),
('material_id SP-0039', 1.5634907721467473e-05),
('bracketing (2, 4, 6, 8, 10)', 1.5366831272796879e-05),
('components C-1425', 1.5345152821372032e-05),
('components C-1345', 1.5331021059451152e-05),
('components C-1718', 1.496351146964932e-05),
('end_a other', 1.4576459814811637e-05),
('supplier S-0105', 1.4378098926481268e-05),
('supplier S-0031', 1.3505939534414885e-05),
('components C-1373', 1.3082192507318994e-05),
('bracketing (1, 2, 5, 10, 25, 50)', 1.2686881362126799e-05),
('components C-1352', 1.2293651395585172e-05),
('components C-1355', 1.1935488784326575e-05),
('specs SP-0088', 1.190774412361568e-05),
('components C-1817', 1.1829583254057329e-05),
('components C-0071', 1.1680831200447311e-05),
('components C-1907', 1.0017501392399161e-05),
('end_a EF-015', 9.0446731003030746e-06),
('components C-2026', 8.9964923744964179e-06),
('components C-1889', 8.9258358049411519e-06),
('components C-0520', 8.6334606947359238e-06),
('material_id SP-0046', 8.5206438390382949e-06),
('components C-0133', 8.3295175259919886e-06),
('components C-1635', 7.9810053298269579e-06),
('components C-1555', 7.6699999422009563e-06),
('components C-1435', 7.4671959589332027e-06),
('components C-0004', 7.436832847595492e-06),
('specs SP-0075', 7.2970717410274319e-06),
('components C-1808', 6.6896305696710039e-06),
('components C-1909', 6.5863073331379632e-06),
('components C-1502', 6.4483488260255326e-06),
('components C-0445', 6.2963898137993625e-06),
('components C-1577', 6.1485024209218179e-06),
('components C-1716', 6.1108696048994063e-06),
('components C-1781', 6.077609557539631e-06),
('components C-1343', 6.0621257778970703e-06),
('components C-1200', 5.6083812074147587e-06),
('components C-1619', 5.3414653536029719e-06),
('components C-1203', 5.3377172859598218e-06),
('components C-0120', 5.0857887280759995e-06),
('specs SP-0016', 5.0075071890349423e-06),
('bracketing (1, 2, 3, 4, 6)', 4.8139575480933123e-06),
('components C-0051', 4.7033600300590812e-06),
('end_a EF-019', 4.6902807816288858e-06),
('end_x EF-021', 4.6147257476458062e-06),
('components C-2002', 4.5292715926343441e-06),
('components C-0580', 4.3736700808449129e-06),
('components C-2006', 4.372498786987314e-06),
('components C-0165', 4.185634904838714e-06),
('components C-1998', 4.1552887728990096e-06),
('components C-1359', 3.9458658328242837e-06),
('bracketing (1, 3, 5, 10, 20, 30)', 3.8873381567206547e-06),
('components C-0539', 3.3439615418217404e-06),
('components C-1205', 3.3023941647830721e-06),
('components C-1651', 2.9840101407192924e-06),
('components C-0003', 2.7227508776080257e-06),
('components C-1995', 2.3083299564772169e-06),
('components C-1885', 2.3026687994495322e-06),
('components C-2029', 2.1909166846021083e-06),
('end_a EF-021', 1.995468622385381e-06),
('material_id SP-0036', 1.9914435047555165e-06),
('components C-1866', 1.7617312448668904e-06),
('components C-0052', 1.5626794832614705e-06),
('components C-1906', 1.4757433679219562e-06),
('components C-1880', 1.2740459207425666e-06),
('specs SP-0021', 1.2637868979024802e-06),
('specs SP-0050', 1.1434256542606045e-06),
('end_a EF-016', 1.1354705671299537e-06),
('end_x EF-006', 1.1330276277826296e-06),
('components C-2001', 1.1305379339795579e-06),
('components C-1658', 1.0528059654701323e-06),
('end_x EF-005', 9.7616460775972684e-07),
('components C-1439', 9.3072272954114681e-07),
('bracketing (10, 25, 40, 55, 70)', 9.2652074476708618e-07),
('components C-1908', 9.0190108977616808e-07),
('components C-1745', 8.3157982972105035e-07),
('components C-0244', 7.6698940751803596e-07),
('specs SP-0079', 7.3945530137802046e-07),
('components C-1313', 7.0951111346868505e-07),
('components C-1430', 6.5812779591509811e-07),
('end_x EF-002', 6.4869243313769082e-07),
('components C-0544', 5.4812773854314667e-07),
('specs SP-0009', 5.4508283938590375e-07),
('components C-1230', 2.9989100488473757e-07),
('bracketing (5, 10)', 2.7844640635017077e-07),
('components C-0250', 2.7087970754067563e-07),
('components C-2027', 2.6526814031147468e-07),
('components C-0095', 1.7485315348910783e-07),
('components C-1233', 1.6961217349867145e-07),
('components C-0679', 1.6902627508296353e-07),
('bracketing (1, 3, 5)', 1.4949788372660477e-07),
('components C-1626', 1.3548818042421604e-07),
('specs SP-0057', 1.3486650517932968e-07),
('components C-1873', 1.2875652428046763e-07),
('components C-1533', 1.1385913654231195e-07),
('supplier S-0043', 1.008449252956597e-07),
('components C-1867', 6.4997860715485083e-08),
('components C-0409', 6.0235944095357083e-08),
('components C-0134', 3.7196883226819314e-08),
('specs SP-0025', 1.1023581437243009e-08),
('components C-2003', 5.9935943846444304e-09),
('bracketing (4, 10)', 2.5686665234648127e-10),
('supplier S-0005', 0.0),
('supplier S-0080', 0.0),
('supplier S-0092', 0.0),
('material_id other', 0.0),
('material_id SP-0041', 0.0),
('material_id SP-0034', 0.0),
('end_x EF-019', 0.0),
('specs SP-0065', 0.0),
('specs SP-0076', 0.0),
('specs SP-0010', 0.0),
('specs SP-0005', 0.0),
('components C-0217', 0.0),
('components C-1869', 0.0),
('components C-1898', 0.0),
('components C-0102', 0.0),
('components C-0122', 0.0),
('components C-1779', 0.0),
('components C-0422', 0.0),
('components C-0227', 0.0),
('components C-0550', 0.0),
('components C-1850', 0.0),
('components C-0577', 0.0),
('components C-1536', 0.0),
('components C-1405', 0.0),
('components C-1663', 0.0),
('components C-1954', 0.0),
('components C-1881', 0.0),
('components C-0434', 0.0),
('components C-1963', 0.0),
('components C-1242', 0.0),
('components C-1902', 0.0),
('components C-1183', 0.0),
('components C-1614', 0.0),
('components C-1615', 0.0),
('components C-1750', 0.0),
('components C-0275', 0.0),
('components C-1723', 0.0),
('components C-0333', 0.0),
('components C-1650', 0.0),
('components C-1970', 0.0),
('components C-1235', 0.0),
('components C-1565', 0.0),
('components C-1672', 0.0)]
In [42]:
dump_decision_tree("tree0.pdf", X_train_feats, reg.estimators_[0], max_depth=5)
In [64]:
y_train.hist(bins=100)
y_train.shape
Out[64]:
(27270,)
In [65]:
y_train_pred = reg.predict(X_train_np)
hist(y_train_pred, bins=100)
y_train_pred.shape
Out[65]:
(27270,)
In [66]:
y_test.hist(bins=100)
y_test.shape
Out[66]:
(2943,)
In [67]:
y_test_pred = reg.predict(X_test_np)
hist(y_test_pred, bins=100);
y_test_pred.shape
Out[67]:
(2943,)
In [122]:
df = X_test.copy()
df['log_cost'] = y_test
df['pred_log_cost'] = y_test_pred
df['err2'] = (df['log_cost'] - df['pred_log_cost']) ** 2
df[7:15]
Out[122]:
tube_assembly_id
supplier
quote_date
annual_usage
min_order_quantity
bracket_pricing
quantity
material_id
diameter
wall_thickness
length
num_bends
bend_radius
end_a_1x
end_a_2x
end_x_1x
end_x_2x
end_a
end_x
num_boss
num_bracket
num_other
specs
components
quote_age
adj_quantity
adj_bracketing
bracketing_pattern
log_cost
pred_log_cost
err2
7
TA-00093
S-0066
2013-06-01
0
0
True
1
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
1
True
(1, 2, 5, 10, 25, 50, 100, 250)
3.117823
3.117429
1.552754e-07
8
TA-00093
S-0066
2013-06-01
0
0
True
2
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
2
True
(1, 2, 5, 10, 25, 50, 100, 250)
2.567439
2.567596
2.467026e-08
9
TA-00093
S-0066
2013-06-01
0
0
True
5
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
5
True
(1, 2, 5, 10, 25, 50, 100, 250)
1.986917
1.984847
4.286038e-06
10
TA-00093
S-0066
2013-06-01
0
0
True
10
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
10
True
(1, 2, 5, 10, 25, 50, 100, 250)
1.682495
1.679511
8.904909e-06
11
TA-00093
S-0066
2013-06-01
0
0
True
25
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
25
True
(1, 2, 5, 10, 25, 50, 100, 250)
1.442853
1.439277
1.278186e-05
12
TA-00093
S-0066
2013-06-01
0
0
True
50
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
50
True
(1, 2, 5, 10, 25, 50, 100, 250)
1.364968
1.361431
1.250912e-05
13
TA-00093
S-0066
2013-06-01
0
0
True
100
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
100
True
(1, 2, 5, 10, 25, 50, 100, 250)
1.328059
1.322067
3.590655e-05
14
TA-00093
S-0066
2013-06-01
0
0
True
250
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
250
True
(1, 2, 5, 10, 25, 50, 100, 250)
1.305695
1.304968
5.279536e-07
In [43]:
taids = ['TA-00084', 'TA-00093', 'TA-04118', 'TA-04375', 'TA-20290']
pd.set_option('display.max_columns', None)
aug_train_set[aug_train_set.tube_assembly_id.isin(taids) & (aug_train_set.quantity == 1)]
Out[43]:
tube_assembly_id
supplier
quote_date
annual_usage
min_order_quantity
bracket_pricing
quantity
log_cost
material_id
diameter
wall_thickness
length
num_bends
bend_radius
end_a_1x
end_a_2x
end_x_1x
end_x_2x
end_a
end_x
num_boss
num_bracket
num_other
specs
components
quote_age
adj_quantity
adj_bracketing
bracketing_pattern
dev_fold
160
TA-00084
S-0066
2013-08-11
0
0
True
1
3.117823
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41495
1
True
(1, 2, 5, 10, 25, 50, 100, 250)
5
209
TA-00093
S-0066
2013-06-01
0
0
True
1
3.117823
SP-0039
6.35
0.71
132
4
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41424
1
True
(1, 2, 5, 10, 25, 50, 100, 250)
0
7181
TA-04118
S-0066
2013-06-01
0
0
True
1
3.117823
SP-0028
9.52
0.89
56
3
19.05
False
False
False
False
EF-003
EF-003
0
0
0
[SP-0007, SP-0024, SP-0026, SP-0069, SP-0080, ...
[C-1621, C-1621, C-1628, C-1628]
41424
1
True
(1, 2, 5, 10, 25, 50, 100, 250)
1
7730
TA-04375
S-0066
2014-02-01
1
0
True
1
3.117823
SP-0029
19.05
2.11
130
4
50.80
False
False
False
False
NONE
EF-003
0
0
0
[]
[C-1624, C-1631]
41669
1
True
(1, 2, 5, 10, 25, 50, 100)
6
27791
TA-20290
S-0066
2013-07-21
0
0
True
1
3.117823
SP-0019
6.35
0.71
116
6
19.05
False
False
False
False
EF-008
EF-008
0
0
0
[]
[C-1312, C-1312]
41474
1
True
(1, 2, 5, 10, 25, 50, 100, 250)
1
In [90]:
df.err2.hist(bins=100)
Out[90]:
<matplotlib.axes._subplots.AxesSubplot at 0xcf97710>
In [123]:
df['orig_index'] = df.index
df.sort('err2', ascending=False, inplace=True)
df.reset_index(drop=True, inplace=True)
df['cum_err2'] = df.err2.cumsum()
df['cum_err2_frac'] = df.cum_err2 / df.err2.sum()
df[:5]
Out[123]:
tube_assembly_id
supplier
quote_date
annual_usage
min_order_quantity
bracket_pricing
quantity
material_id
diameter
wall_thickness
length
num_bends
bend_radius
end_a_1x
end_a_2x
end_x_1x
end_x_2x
end_a
end_x
num_boss
num_bracket
num_other
specs
components
quote_age
adj_quantity
adj_bracketing
bracketing_pattern
log_cost
pred_log_cost
err2
orig_index
cum_err2
cum_err2_frac
0
TA-12043
S-0058
2014-08-20
1
50
False
1
SP-0029
9.52
0.89
55
3
31.75
False
False
False
False
EF-003
EF-003
0
0
0
[SP-0007, SP-0012, SP-0024, SP-0026, SP-0080, ...
[C-1621, C-1621, C-1628, C-1628]
41869
50
False
()
3.964544
1.508582
6.031747
1836
6.031747
0.025769
1
TA-05245
S-0090
2005-02-23
0
1
True
1
SP-0035
19.05
1.24
25
1
44.45
False
False
False
False
EF-003
EF-017
0
0
0
[]
[C-0443, C-1486]
38404
1
False
()
0.909402
3.057529
4.614448
1024
10.646195
0.045484
2
TA-01630
S-0072
2013-04-30
5
1
True
1
SP-0035
12.70
1.65
66
4
38.10
False
False
False
False
EF-018
EF-018
0
0
0
[]
[C-0448, C-0448, C-0449, C-0449]
41392
1
False
()
0.800605
2.596843
3.226470
298
13.872665
0.059268
3
TA-20766
S-0066
2013-11-02
1
0
True
250
SP-0029
12.70
0.89
34
3
25.40
False
False
False
True
EF-017
EF-003
0
0
0
[]
[C-1475, C-1476]
41578
250
True
(1, 2, 5, 10, 25, 50, 100, 250)
2.979539
1.185228
3.219551
2853
17.092216
0.073023
4
TA-20766
S-0066
2013-11-02
1
0
True
100
SP-0029
12.70
0.89
34
3
25.40
False
False
False
True
EF-017
EF-003
0
0
0
[]
[C-1475, C-1476]
41578
100
True
(1, 2, 5, 10, 25, 50, 100, 250)
2.985178
1.193087
3.211590
2852
20.303806
0.086744
In [97]:
df.cum_err2_frac.plot()
df.cum_err2_frac[500]
Out[97]:
0.92429426948043092
In [99]:
500. / len(X_test)
Out[99]:
0.16989466530750935
In [104]:
leaves = reg.apply(X_test_np)
print leaves.shape
leaves[:3, :]
(2943, 20)
Out[104]:
array([[11852, 11175, 6808, 11443, 11766, 11186, 11463, 11363, 11621,
11278, 12062, 11921, 11061, 11304, 11812, 11866, 11277, 10961,
11139, 11297],
[30515, 30383, 30573, 28863, 27159, 27289, 27696, 30797, 30418,
28828, 28775, 30492, 30543, 30556, 27405, 30269, 27471, 27666,
27768, 27291],
[ 6300, 6467, 3794, 6338, 3901, 3763, 6504, 6492, 6256,
6307, 6352, 6333, 6315, 6373, 6198, 6235, 6467, 6384,
6206, 6165]])
In [111]:
tree0 = reg.estimators_[0]
tree0.tree_.value[11852]
Out[111]:
array([[ 2.61841793]])
In [128]:
# An example with a good prediction:
sorted_index = 2000
taid = df.tube_assembly_id[sorted_index]
orig_index = df.orig_index[sorted_index]
print taid, orig_index
X = X_test_np[orig_index, :]
df[sorted_index:sorted_index+1]
TA-02848 533
Out[128]:
tube_assembly_id
supplier
quote_date
annual_usage
min_order_quantity
bracket_pricing
quantity
material_id
diameter
wall_thickness
length
num_bends
bend_radius
end_a_1x
end_a_2x
end_x_1x
end_x_2x
end_a
end_x
num_boss
num_bracket
num_other
specs
components
quote_age
adj_quantity
adj_bracketing
bracketing_pattern
log_cost
pred_log_cost
err2
orig_index
cum_err2
cum_err2_frac
2000
TA-02848
S-0066
2013-06-16
0
0
True
1
SP-0029
12.7
2.11
194
3
38.1
False
False
False
False
EF-003
EF-003
0
0
0
[]
[C-1622, C-1622, C-1629, C-1629]
41439
1
True
(1, 2, 5, 10, 25, 50, 100, 250)
3.185876
3.198435
0.000158
533
234.038015
0.999876
In [135]:
preds = [tree.predict([X])[0] for tree in reg.estimators_]
plt.hist(preds, bins=20)
print "true value:", df.log_cost[sorted_index]
print "predicted value: ", df.pred_log_cost[sorted_index]
print "mean prediction:", np.mean(preds)
print "std prediction:", np.std(preds)
print "median prediction:", np.median(preds)
list(sorted(preds))
true value: 3.18587575272
predicted value: 3.19843466946
mean prediction: 3.19843466946
std prediction: 0.0525703175091
median prediction: 3.19090993474
Out[135]:
[3.1402231616168312,
3.1669501882068678,
3.1669501882068678,
3.1697593609672392,
3.1730267862956474,
3.1730267862956474,
3.1730267862956474,
3.1730267862956474,
3.1730655913324717,
3.1732015107511389,
3.208618358723752,
3.208618358723752,
3.208618358723752,
3.208618358723752,
3.208618358723752,
3.208618358723752,
3.208618358723752,
3.208618358723752,
3.208618358723752,
3.4088710144764689]
In [199]:
# An example with a bad prediction:
sorted_index = 3
taid = df.tube_assembly_id[sorted_index]
orig_index = df.orig_index[sorted_index]
print taid, orig_index
X = X_test_np[orig_index, :]
df[sorted_index:sorted_index+1]
TA-20766 2853
Out[199]:
tube_assembly_id
supplier
quote_date
annual_usage
min_order_quantity
bracket_pricing
quantity
material_id
diameter
wall_thickness
length
num_bends
bend_radius
end_a_1x
end_a_2x
end_x_1x
end_x_2x
end_a
end_x
num_boss
num_bracket
num_other
specs
components
quote_age
adj_quantity
adj_bracketing
bracketing_pattern
log_cost
pred_log_cost
err2
orig_index
cum_err2
cum_err2_frac
3
TA-20766
S-0066
2013-11-02
1
0
True
250
SP-0029
12.7
0.89
34
3
25.4
False
False
False
True
EF-017
EF-003
0
0
0
[]
[C-1475, C-1476]
41578
250
True
(1, 2, 5, 10, 25, 50, 100, 250)
2.979539
1.185228
3.219551
2853
17.092216
0.073023
In [200]:
preds = [tree.predict([X])[0] for tree in reg.estimators_]
plt.hist(preds, bins=20)
print "true value:", df.log_cost[sorted_index]
print "predicted value: ", df.pred_log_cost[sorted_index]
print "mean prediction:", np.mean(preds)
print "std prediction:", np.std(preds)
print "median prediction:", np.median(preds)
list(sorted(preds))
true value: 2.97953909449
predicted value: 1.18522842511
mean prediction: 1.18522842511
std prediction: 0.168485067327
median prediction: 1.15848801051
Out[200]:
[1.0085303733153852,
1.0256343351377077,
1.0325929872389481,
1.0325929872389481,
1.0325929872389481,
1.0325929872389481,
1.040486887399783,
1.040486887399783,
1.1152653617836521,
1.1152653617836521,
1.2017106592374496,
1.2017106592374496,
1.2657028961062973,
1.2882028483323655,
1.2882028483323655,
1.3019179132978025,
1.3079536358293984,
1.3243664520578047,
1.3513764578455651,
1.6973829760988586]
In [165]:
# Error by supplier:
tmp = df[['supplier', 'err2']]
supplier_errs = {}
for supplier, indices in tmp.groupby('supplier').groups.iteritems():
supplier_errs[supplier] = tmp.err2[indices].values
tmp2 = pd.DataFrame(tmp.supplier.value_counts(), columns=['test_count'])
tmp2['train_count'] = aug_train_set.supplier.value_counts()
tmp2['avg_err'] = [np.mean(supplier_errs[s]) for s in tmp2.index]
tmp2['std_err'] = [np.std(supplier_errs[s]) for s in tmp2.index]
tmp2
Out[165]:
test_count
train_count
avg_err
std_err
S-0066
1963
20553
0.052389
0.231572
S-0041
331
3323
0.035265
0.100797
S-0072
235
2317
0.103101
0.281039
S-0054
89
838
0.186193
0.540001
S-0026
66
727
0.327643
0.423543
S-0013
56
554
0.116684
0.236197
S-0058
52
516
0.276922
0.905321
S-0064
49
446
0.191596
0.323623
S-0062
23
264
0.200803
0.210129
S-0014
16
139
0.107703
0.121820
S-0030
14
111
0.321913
0.377729
S-0104
9
90
0.058832
0.094781
S-0081
8
64
0.282328
0.285143
S-0105
4
33
0.161739
0.208492
S-0005
3
24
0.154184
0.126919
S-0027
3
20
0.257985
0.364597
S-0090
3
3
1.580293
2.145723
S-0042
3
19
0.820279
1.081185
S-0031
3
21
0.087334
0.089615
S-0018
2
10
0.011786
0.003316
S-0070
2
18
0.031476
0.030841
S-0043
2
14
0.100296
0.055897
S-0060
1
3
0.020633
0.000000
S-0108
1
1
0.827935
0.000000
S-0092
1
10
1.022800
0.000000
S-0059
1
4
0.040013
0.000000
S-0111
1
2
0.221840
0.000000
S-0009
1
4
1.404222
0.000000
S-0107
1
2
0.000239
0.000000
In [163]:
tmp2.avg_err.plot()
Out[163]:
<matplotlib.axes._subplots.AxesSubplot at 0xed5a5d0>
In [189]:
# Error by bracketing_pattern:
tmp = df[['bracketing_pattern', 'err2']]
supplier_errs = {}
for supplier, indices in tmp.groupby('bracketing_pattern').groups.iteritems():
supplier_errs[supplier] = tmp.err2[indices].values
tmp2 = pd.DataFrame(tmp.bracketing_pattern.value_counts(), columns=['test_count'])
tmp2['train_count'] = aug_train_set.bracketing_pattern.value_counts()
tmp2['avg_err'] = [np.mean(supplier_errs[s]) for s in tmp2.index]
tmp2['std_err'] = [np.std(supplier_errs[s]) for s in tmp2.index]
tmp2
Out[189]:
test_count
train_count
avg_err
std_err
(1, 2, 5, 10, 25, 50, 100, 250)
1648
17640
0.022776
0.174391
()
493
4742
0.228562
0.511073
(1, 6, 20)
210
2022
0.019721
0.079115
(1, 2, 3, 5, 10, 20)
54
516
0.029540
0.046008
(1, 2, 5, 10, 25, 50)
30
186
0.006778
0.017881
(1, 3, 5, 7, 9)
30
175
0.099165
0.152343
(5, 19, 20)
30
330
0.000445
0.001012
(1, 2, 3, 4)
20
120
0.104339
0.173503
(5, 10, 15, 20)
20
136
0.072327
0.101721
(1, 3, 5, 10, 20, 30)
18
60
0.256981
0.387375
(8, 16, 24, 32)
16
28
0.013001
0.016008
(1, 2, 5, 10, 25, 50, 100)
14
497
0.000206
0.000215
(4, 10)
10
36
0.017218
0.014437
(1, 5)
10
38
0.085447
0.090503
(2, 5, 10, 15, 20)
10
10
0.553939
0.400946
(1, 2, 3, 5, 10)
10
100
0.012244
0.014006
(10, 15, 25)
9
63
0.016103
0.022121
(10, 20, 30, 40)
8
44
0.048567
0.052577
(4, 8, 12, 16)
8
84
0.021947
0.020074
(15, 30, 50, 60, 100, 200, 300, 400)
8
8
0.157400
0.127607
(4, 6, 8, 12)
8
12
0.006613
0.007463
(2, 4, 10, 20, 30, 40, 60, 120)
8
8
0.020654
0.014236
(2, 3, 4, 5)
8
108
0.045536
0.029376
(1, 2)
8
32
0.124233
0.199902
(2, 4, 6, 8)
8
140
0.087071
0.065799
(1, 2, 3, 5, 10, 20, 50)
7
63
1.903072
0.671554
(2, 3, 5, 10, 20, 50, 100)
7
14
0.083622
0.069043
(5, 20)
6
40
0.425893
0.884674
(1, 3, 5, 10, 15, 20)
6
6
0.236029
0.314758
(1, 3, 5)
6
39
0.775450
0.814073
...
...
...
...
...
(24, 36, 48)
3
6
0.452870
0.098456
(1, 20, 50)
3
51
0.062258
0.045777
(5, 8, 25)
3
3
0.111653
0.064003
(2, 5, 10)
3
3
0.733939
0.356436
(10, 15, 20)
3
27
0.006270
0.008671
(150, 200, 250)
3
6
0.018716
0.010475
(10, 25, 50)
3
6
0.092934
0.128661
(12, 34)
2
2
0.014541
0.002379
(4, 20)
2
8
0.201888
0.183252
(50, 50)
2
14
0.055851
0.019788
(5, 5)
2
2
0.155732
0.022023
(2, 10)
2
4
0.062378
0.035518
(50, 100)
2
2
0.063215
0.040215
(1, 46)
2
2
0.140241
0.020219
(1, 236)
2
2
0.130659
0.071168
(10, 100)
2
2
0.000176
0.000175
(1, 7)
2
8
0.010982
0.001652
(1, 18)
2
4
0.202517
0.091270
(2, 12)
2
2
0.020890
0.012808
(75, 100)
2
2
0.059882
0.046640
(1, 33)
2
4
0.095490
0.050908
(1, 16)
2
2
0.137253
0.012813
(1, 70)
2
4
0.076254
0.076161
(25, 205)
2
2
0.127125
0.057401
(5, 10)
2
30
0.020345
0.003985
(2, 50)
2
2
0.545023
0.478044
(2, 20)
2
4
0.018351
0.006631
(1, 47)
2
4
0.058113
0.009561
(1, 30)
2
18
0.251660
0.156888
(1, 6)
2
20
0.072902
0.025474
94 rows × 4 columns
In [195]:
xs = np.arange(len(tmp2))
ys = tmp2.avg_err.values
ybars = tmp2.std_err.values
plt.errorbar(xs, ys, yerr=ybars)
plt.ylim(-1, 1)
plt.xlim(-1, 10)
Out[195]:
(-1, 10)
In [198]:
brapa = (1, 2, 5, 10, 25, 50, 100, 250)
hist(supplier_errs[brapa], bins=100, log=True);
In [ ]:
In [181]:
X_actual_test_feats = featurizer.transform(aug_test_set)
In [226]:
col_name = 'component_types CP-015'
print "train:"
print X_train_feats[col_name].value_counts()
print X_train_feats[col_name].value_counts(normalize=True)
print
print "test:"
print X_test_feats[col_name].value_counts()
print X_test_feats[col_name].value_counts(normalize=True)
print
print "actual test:"
print X_actual_test_feats[col_name].value_counts()
print X_actual_test_feats[col_name].value_counts(normalize=True)
train:
0 26355
1 856
2 59
dtype: int64
0 0.966447
1 0.031390
2 0.002164
dtype: float64
test:
0 2884
1 59
dtype: int64
0 0.979952
1 0.020048
dtype: float64
actual test:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-226-d1347d4a8df4> in <module>()
9 print
10 print "actual test:"
---> 11 print X_actual_test_feats[col_name].value_counts()
12 print X_actual_test_feats[col_name].value_counts(normalize=True)
/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
1795 return self._getitem_multilevel(key)
1796 else:
-> 1797 return self._getitem_column(key)
1798
1799 def _getitem_column(self, key):
/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
1802 # get column
1803 if self.columns.is_unique:
-> 1804 return self._get_item_cache(key)
1805
1806 # duplicate columns & possible reduce dimensionaility
/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
1082 res = cache.get(item)
1083 if res is None:
-> 1084 values = self._data.get(item)
1085 res = self._box_item_values(item, values)
1086 cache[item] = res
/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
2849
2850 if not isnull(item):
-> 2851 loc = self.items.get_loc(item)
2852 else:
2853 indexer = np.arange(len(self.items))[isnull(self.items)]
/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/index.pyc in get_loc(self, key, method)
1570 """
1571 if method is None:
-> 1572 return self._engine.get_loc(_values_from_object(key))
1573
1574 indexer = self.get_indexer([key], method=method)
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3824)()
pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3704)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12280)()
pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12231)()
KeyError: 'component_types CP-015'
In [ ]:
Content source: arorahardeep/kaggle-caterpillar
Similar notebooks: