In [15]:
%pylab inline

from sklearn.dummy import DummyRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import pandas as pd

from soln.dataset import AllCategoricalsFeaturizer
from soln.dataset import generate_xv_splits
from soln.dataset import get_augmented_train_and_test_set
from soln.utils import dump_decision_tree
from soln.utils import eval_regressor
from soln.utils import print_feature_importances


Populating the interactive namespace from numpy and matplotlib

In [2]:
%time aug_train_set, aug_test_set = get_augmented_train_and_test_set()


CPU times: user 13.7 s, sys: 148 ms, total: 13.8 s
Wall time: 14 s

In [3]:
from itertools import islice
fold_number = 0
%time X_train, y_train, X_test, y_test = next(islice(generate_xv_splits(aug_train_set), fold_number, None))
print X_train.shape, y_train.shape, X_test.shape, y_test.shape


CPU times: user 104 ms, sys: 24 ms, total: 128 ms
Wall time: 132 ms
(27270, 53) (27270,) (2943, 53) (2943,)

In [4]:
featurizer = AllCategoricalsFeaturizer()
%time featurizer.fit(X_train)
%time X_train_feats = featurizer.transform(X_train)
%time X_test_feats = featurizer.transform(X_test)
X_train_feats.info(verbose=True)

%time X_train_np = X_train_feats.astype(np.float).values
y_train_np = y_train.values
%time X_test_np = X_test_feats.astype(np.float).values
y_test_np = y_test.values


CPU times: user 1.58 s, sys: 0 ns, total: 1.58 s
Wall time: 1.6 s
CPU times: user 1.34 s, sys: 300 ms, total: 1.64 s
Wall time: 1.65 s
CPU times: user 172 ms, sys: 0 ns, total: 172 ms
Wall time: 176 ms
<class 'pandas.core.frame.DataFrame'>
Int64Index: 27270 entries, 0 to 27269
Data columns (total 599 columns):
annual_usage                                           int64
min_order_quantity                                     int64
bracket_pricing                                        bool
quantity                                               int64
diameter                                               float64
wall_thickness                                         float64
length                                                 float64
num_bends                                              int64
bend_radius                                            float64
end_a_1x                                               bool
end_a_2x                                               bool
end_x_1x                                               bool
end_x_2x                                               bool
num_boss                                               int64
num_bracket                                            int64
num_other                                              int64
quote_age                                              float64
adj_quantity                                           int64
adj_bracketing                                         bool
physical_volume                                        float64
inner_radius                                           float64
material_volume                                        float64
end_a_forming                                          bool
end_1x_count                                           int64
end_x_forming                                          bool
end_2x_count                                           int64
end_forming_count                                      int64
unique_feature_count                                   float64
orientation_count                                      float64
groove_count                                           float64
total_component_weight                                 float64
component_max_length                                   float64
component_max_overall_length                           float64
component_max_bolt_pattern_wide                        float64
component_max_bolt_pattern_long                        float64
component_max_thickness                                float64
component_min_thread_pitch                             float64
component_min_thread_size                              float64
supplier XXX_other                                     float64
supplier S-0042                                        float64
supplier S-0005                                        float64
supplier S-0026                                        float64
supplier S-0027                                        float64
supplier S-0072                                        float64
supplier S-0062                                        float64
supplier S-0064                                        float64
supplier S-0043                                        float64
supplier S-0066                                        float64
supplier S-0041                                        float64
supplier S-0105                                        float64
supplier S-0080                                        float64
supplier S-0081                                        float64
supplier S-0104                                        float64
supplier S-0013                                        float64
supplier S-0014                                        float64
supplier S-0070                                        float64
supplier S-0031                                        float64
supplier S-0030                                        float64
supplier S-0058                                        float64
supplier S-0054                                        float64
material_id XXX_other                                  float64
material_id nan                                        float64
material_id SP-0046                                    float64
material_id SP-0041                                    float64
material_id SP-0033                                    float64
material_id SP-0048                                    float64
material_id SP-0034                                    float64
material_id SP-0035                                    float64
material_id SP-0036                                    float64
material_id SP-0037                                    float64
material_id SP-0030                                    float64
material_id SP-0019                                    float64
material_id SP-0008                                    float64
material_id SP-0038                                    float64
material_id SP-0039                                    float64
material_id SP-0029                                    float64
material_id SP-0028                                    float64
end_a XXX_other                                        float64
end_a EF-005                                           float64
end_a NONE                                             float64
end_a EF-002                                           float64
end_a EF-003                                           float64
end_a EF-008                                           float64
end_a EF-009                                           float64
end_a EF-023                                           float64
end_a EF-021                                           float64
end_a EF-013                                           float64
end_a EF-012                                           float64
end_a EF-017                                           float64
end_a EF-016                                           float64
end_a EF-015                                           float64
end_a EF-019                                           float64
end_a EF-018                                           float64
end_x XXX_other                                        float64
end_x NONE                                             float64
end_x EF-002                                           float64
end_x EF-003                                           float64
end_x EF-008                                           float64
end_x EF-009                                           float64
end_x EF-023                                           float64
end_x EF-021                                           float64
end_x EF-006                                           float64
end_x EF-013                                           float64
end_x EF-012                                           float64
end_x EF-010                                           float64
end_x EF-017                                           float64
end_x EF-016                                           float64
end_x EF-015                                           float64
end_x EF-019                                           float64
end_x EF-018                                           float64
specs XXX_other                                        float64
specs SP-0065                                          float64
specs SP-0002                                          float64
specs SP-0050                                          float64
specs SP-0051                                          float64
specs SP-0057                                          float64
specs SP-0025                                          float64
specs SP-0058                                          float64
specs SP-0079                                          float64
specs SP-0024                                          float64
specs SP-0070                                          float64
specs SP-0017                                          float64
specs SP-0072                                          float64
specs SP-0016                                          float64
specs SP-0012                                          float64
specs SP-0013                                          float64
specs SP-0076                                          float64
specs SP-0022                                          float64
specs SP-0021                                          float64
specs SP-0063                                          float64
specs SP-0071                                          float64
specs SP-0088                                          float64
specs SP-0080                                          float64
specs SP-0082                                          float64
specs SP-0062                                          float64
specs SP-0010                                          float64
specs SP-0075                                          float64
specs SP-0026                                          float64
specs SP-0069                                          float64
specs SP-0068                                          float64
specs SP-0005                                          float64
specs SP-0004                                          float64
specs SP-0007                                          float64
specs SP-0009                                          float64
specs SP-0061                                          float64
specs SP-0067                                          float64
specs SP-0029                                          float64
components XXX_other                                   float64
components C-1653                                      float64
components C-0218                                      float64
components C-0217                                      float64
components C-0215                                      float64
components C-0214                                      float64
components C-0211                                      float64
components C-0210                                      float64
components C-1867                                      float64
components C-1860                                      float64
components C-1869                                      float64
components C-0063                                      float64
components C-1229                                      float64
components C-1677                                      float64
components C-1898                                      float64
components C-1355                                      float64
components C-1354                                      float64
components C-1352                                      float64
components C-1670                                      float64
components C-0122                                      float64
components C-0071                                      float64
components C-1017                                      float64
components C-0250                                      float64
components C-0318                                      float64
components C-1779                                      float64
components C-0422                                      float64
components C-1821                                      float64
components C-0855                                      float64
components C-0550                                      float64
components C-0494                                      float64
components C-0228                                      float64
components C-0051                                      float64
components C-1914                                      float64
components C-1910                                      float64
components C-1672                                      float64
components C-1850                                      float64
components C-1313                                      float64
components C-1312                                      float64
components C-1619                                      float64
components C-0577                                      float64
components C-1533                                      float64
components C-1536                                      float64
components C-0133                                      float64
components C-0134                                      float64
components C-1405                                      float64
components C-1625                                      float64
components C-1624                                      float64
components C-1627                                      float64
components C-1626                                      float64
components C-1621                                      float64
components C-1620                                      float64
components C-1623                                      float64
components C-1622                                      float64
components C-1743                                      float64
components C-1629                                      float64
components C-1628                                      float64
components C-1745                                      float64
components C-2030                                      float64
components C-0048                                      float64
components C-1650                                      float64
components C-0045                                      float64
components C-1663                                      float64
components C-0599                                      float64
components C-0616                                      float64
components C-1817                                      float64
components C-1956                                      float64
components C-1954                                      float64
components C-1218                                      float64
components C-1889                                      float64
components C-1445                                      float64
components C-1881                                      float64
components C-1880                                      float64
components C-1885                                      float64
components C-1369                                      float64
components C-1541                                      float64
components C-1547                                      float64
components C-1661                                      float64
components C-1660                                      float64
components C-0095                                      float64
components C-0002                                      float64
components C-0003                                      float64
components C-0001                                      float64
components C-0007                                      float64
components C-0004                                      float64
components C-0165                                      float64
components C-1714                                      float64
components C-1716                                      float64
components C-1711                                      float64
components C-1718                                      float64
components C-1866                                      float64
components C-2043                                      float64
components C-1781                                      float64
components C-1715                                      float64
components C-0434                                      float64
components C-0539                                      float64
components C-0544                                      float64
components C-0548                                      float64
components C-1963                                      float64
components C-1848                                      float64
components C-1845                                      float64
components C-1846                                      float64
components C-1244                                      float64
components C-1243                                      float64
components C-1242                                      float64
components C-0579                                      float64
components C-0102                                      float64
components C-1459                                      float64
components C-0120                                      float64
components C-1183                                      float64
components C-1439                                      float64
components C-1430                                      float64
components C-1434                                      float64
components C-1435                                      float64
components C-1758                                      float64
components C-1614                                      float64
components C-1615                                      float64
components C-1750                                      float64
components C-2008                                      float64
components C-2004                                      float64
components C-2005                                      float64
components C-2006                                      float64
components C-2001                                      float64
components C-2002                                      float64
components C-2003                                      float64
components C-0699                                      float64
components C-0751                                      float64
components C-1505                                      float64
components C-2032                                      float64
components C-0058                                      float64
components C-1502                                      float64
components C-0057                                      float64
components C-0199                                      float64
components C-0052                                      float64
components C-0674                                      float64
components C-1577                                      float64
components C-0826                                      float64
components C-0823                                      float64
components C-1873                                      float64
components C-0580                                      float64
components C-0401                                      float64
components C-0275                                      float64
components C-1808                                      float64
components C-0409                                      float64
components C-1209                                      float64
components C-1208                                      float64
components C-1203                                      float64
components C-1200                                      float64
components C-1206                                      float64
components C-1205                                      float64
components C-0473                                      float64
components C-1386                                      float64
components C-1385                                      float64
components C-1936                                      float64
components C-1375                                      float64
components C-1374                                      float64
components C-1373                                      float64
components C-1475                                      float64
components C-1476                                      float64
components C-1477                                      float64
components C-1555                                      float64
components C-1554                                      float64
components C-0389                                      float64
components C-0388                                      float64
components C-1724                                      float64
components C-1725                                      float64
components C-1727                                      float64
components C-1728                                      float64
components C-0333                                      float64
components C-1654                                      float64
components C-1655                                      float64
components C-1651                                      float64
components C-1652                                      float64
components C-1658                                      float64
components C-1659                                      float64
components C-0703                                      float64
components C-0448                                      float64
components C-0449                                      float64
components C-0520                                      float64
components C-0444                                      float64
components C-0445                                      float64
components C-0208                                      float64
components C-0209                                      float64
components C-1970                                      float64
components C-1976                                      float64
components C-1877                                      float64
components C-1233                                      float64
components C-1230                                      float64
components C-1231                                      float64
components C-1235                                      float64
components C-1332                                      float64
components C-0227                                      float64
components C-1425                                      float64
components C-1421                                      float64
components C-1420                                      float64
components C-1586                                      float64
components C-1428                                      float64
components C-1348                                      float64
components C-1349                                      float64
components C-1344                                      float64
components C-1345                                      float64
components C-1343                                      float64
components C-1565                                      float64
components C-2019                                      float64
components C-2017                                      float64
components C-1630                                      float64
components C-0062                                      float64
components C-0562                                      float64
components C-0244                                      float64
components C-1761                                      float64
components C-0369                                      float64
components C-1768                                      float64
components C-0844                                      float64
components C-1841                                      float64
components C-1398                                      float64
components C-1908                                      float64
components C-1909                                      float64
components C-1901                                      float64
components C-1906                                      float64
components C-1907                                      float64
components C-1998                                      float64
components C-1995                                      float64
components C-1994                                      float64
components C-1417                                      float64
components C-1411                                      float64
components C-1637                                      float64
components C-1635                                      float64
components C-1632                                      float64
components C-1633                                      float64
components C-1739                                      float64
components C-1631                                      float64
components C-1638                                      float64
components C-1639                                      float64
components C-2026                                      float64
components C-2027                                      float64
components C-2028                                      float64
components C-2029                                      float64
components C-1643                                      float64
components C-1642                                      float64
components C-1641                                      float64
components C-1640                                      float64
components C-1647                                      float64
components C-1646                                      float64
components C-1645                                      float64
components C-1644                                      float64
components C-1649                                      float64
components C-1648                                      float64
bracketing_pattern XXX_other                           float64
bracketing_pattern (10, 15, 20, 25, 30)                float64
bracketing_pattern (5, 10, 20, 50, 100)                float64
bracketing_pattern (25, 50, 75, 100, 290, 325, 350)    float64
bracketing_pattern (1, 2, 3, 5, 10, 20, 50)            float64
bracketing_pattern (1, 3, 5, 7, 10)                    float64
bracketing_pattern (1, 2, 3, 4, 6)                     float64
bracketing_pattern (5, 19, 20)                         float64
bracketing_pattern (1, 3, 5, 7, 9)                     float64
bracketing_pattern (1, 15)                             float64
bracketing_pattern (10, 15, 20, 30)                    float64
bracketing_pattern (1, 3, 5, 10, 25)                   float64
bracketing_pattern (30, 60, 90, 120)                   float64
bracketing_pattern (5, 10)                             float64
bracketing_pattern (1, 2, 4, 9, 19)                    float64
bracketing_pattern (5, 20)                             float64
bracketing_pattern (1, 3, 5, 10, 20)                   float64
bracketing_pattern (2, 3, 4, 6)                        float64
bracketing_pattern (15, 25, 35)                        float64
bracketing_pattern (1, 5, 10, 20)                      float64
bracketing_pattern (1, 5, 10, 20, 50)                  float64
bracketing_pattern (1, 3, 5, 10, 15, 25)               float64
bracketing_pattern (25, 50, 75, 100)                   float64
bracketing_pattern (3, 5, 7, 9)                        float64
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250)     float64
bracketing_pattern (1, 6)                              float64
bracketing_pattern (2, 5)                              float64
bracketing_pattern (1, 2, 3, 5, 10)                    float64
bracketing_pattern (1, 3, 5)                           float64
bracketing_pattern (1, 2, 5, 10)                       float64
bracketing_pattern (1, 10)                             float64
bracketing_pattern (1, 2, 3, 5, 7)                     float64
bracketing_pattern (1, 3)                              float64
bracketing_pattern (5, 10, 15)                         float64
bracketing_pattern (6, 12, 18, 24)                     float64
bracketing_pattern (10, 15, 20)                        float64
bracketing_pattern (1, 4)                              float64
bracketing_pattern ()                                  float64
bracketing_pattern (1, 3, 5, 10, 15)                   float64
bracketing_pattern (1, 8)                              float64
bracketing_pattern (10, 20, 30, 40)                    float64
bracketing_pattern (5, 10, 15, 20, 25)                 float64
bracketing_pattern (10, 15, 20, 25)                    float64
bracketing_pattern (50, 50)                            float64
bracketing_pattern (20, 40, 60, 80)                    float64
bracketing_pattern (4, 10)                             float64
bracketing_pattern (1, 2, 4)                           float64
bracketing_pattern (10, 25, 40, 55, 70)                float64
bracketing_pattern (5, 10, 25)                         float64
bracketing_pattern (1, 2, 3, 4, 5)                     float64
bracketing_pattern (2, 10, 25, 50, 100)                float64
bracketing_pattern (1, 20, 50)                         float64
bracketing_pattern (1, 2, 3, 5, 10, 20)                float64
bracketing_pattern (3, 5, 10)                          float64
bracketing_pattern (1, 2, 4, 8, 16)                    float64
bracketing_pattern (2, 3, 4, 5)                        float64
bracketing_pattern (1, 5, 10, 15, 20)                  float64
bracketing_pattern (1, 2, 5)                           float64
bracketing_pattern (8, 16, 24, 32)                     float64
bracketing_pattern (2, 4, 6, 8)                        float64
bracketing_pattern (1, 2, 5, 10, 25, 50)               float64
bracketing_pattern (1, 2, 3, 4)                        float64
bracketing_pattern (5, 10, 15, 20)                     float64
bracketing_pattern (4, 6, 8, 10)                       float64
bracketing_pattern (1, 12)                             float64
bracketing_pattern (4, 15)                             float64
bracketing_pattern (2, 4, 6, 8, 10)                    float64
bracketing_pattern (2, 3, 4)                           float64
bracketing_pattern (1, 3, 5, 10)                       float64
bracketing_pattern (8, 16, 24, 32, 48)                 float64
bracketing_pattern (1, 5)                              float64
bracketing_pattern (3, 4, 5, 6)                        float64
bracketing_pattern (1, 3, 5, 10, 20, 30)               float64
bracketing_pattern (1, 2, 5, 10, 25)                   float64
bracketing_pattern (1, 30)                             float64
bracketing_pattern (1, 2, 5, 10, 25, 50, 100)          float64
bracketing_pattern (1, 6, 20)                          float64
bracketing_pattern (3, 6, 9, 12)                       float64
bracketing_pattern (1, 2, 3, 5, 10, 20, 50, 100)       float64
bracketing_pattern (3, 5, 10, 20)                      float64
bracketing_pattern (10, 20, 30)                        float64
bracketing_pattern (1, 2, 5, 25, 35)                   float64
bracketing_pattern (1, 2)                              float64
bracketing_pattern (5, 10, 20)                         float64
bracketing_pattern (4, 8, 12, 16)                      float64
bracketing_pattern (10, 15, 25)                        float64
ends XXX_other                                         float64
ends EF-005                                            float64
ends NONE                                              float64
ends EF-001                                            float64
ends EF-002                                            float64
ends EF-003                                            float64
ends EF-008                                            float64
ends EF-009                                            float64
ends EF-023                                            float64
ends EF-021                                            float64
ends EF-006                                            float64
ends EF-013                                            float64
ends EF-012                                            float64
ends EF-011                                            float64
ends EF-010                                            float64
ends EF-017                                            float64
ends EF-016                                            float64
ends EF-015                                            float64
ends EF-019                                            float64
ends EF-018                                            float64
component_groups XXX_other                             float64
component_groups threaded                              float64
component_groups sleeve                                float64
component_groups adaptor                               float64
component_groups nut                                   float64
component_groups float                                 float64
component_groups boss                                  float64
component_groups other                                 float64
component_groups hfl                                   float64
component_groups elbow                                 float64
component_groups straight                              float64
component_types XXX_other                              float64
component_types CP-004                                 float64
component_types CP-006                                 float64
component_types CP-007                                 float64
component_types CP-001                                 float64
component_types CP-002                                 float64
component_types CP-003                                 float64
component_types CP-008                                 float64
component_types CP-009                                 float64
component_types CP-028                                 float64
component_types CP-022                                 float64
component_types CP-023                                 float64
component_types CP-026                                 float64
component_types CP-027                                 float64
component_types CP-024                                 float64
component_types CP-025                                 float64
component_types other                                  float64
component_types CP-012                                 float64
component_types CP-011                                 float64
component_types CP-010                                 float64
component_types CP-016                                 float64
component_types CP-015                                 float64
component_types CP-014                                 float64
component_types CP-019                                 float64
component_types CP-018                                 float64
component_end_forms XXX_other                          float64
component_end_forms A-007                              float64
component_end_forms A-006                              float64
component_end_forms A-005                              float64
component_end_forms A-004                              float64
component_end_forms A-003                              float64
component_end_forms A-002                              float64
component_end_forms A-001                              float64
component_end_forms 9999                               float64
component_connection_types XXX_other                   float64
component_connection_types 9999                        float64
component_connection_types B-012                       float64
component_connection_types B-011                       float64
component_connection_types B-004                       float64
component_connection_types B-005                       float64
component_connection_types B-006                       float64
component_connection_types B-007                       float64
component_connection_types B-001                       float64
component_connection_types B-002                       float64
component_part_names XXX_other                         float64
component_part_names ADAPTER                           float64
component_part_names LINK                              float64
component_part_names ELBOW                             float64
component_part_names CONNECTOR-WELD                    float64
component_part_names SLEEVE-CRIMP                      float64
component_part_names FITTING-NUT                       float64
component_part_names HEAD-FLANGED                      float64
component_part_names WASHER-FUEL INJ                   float64
component_part_names SLEEVE-FLARED                     float64
component_part_names CONNECTOR-BHD                     float64
component_part_names BOSS                              float64
component_part_names NUT-ORFS                          float64
component_part_names CAP-A/C                           float64
component_part_names PLATE                             float64
component_part_names NUT-A/C                           float64
component_part_names ADAPTER-OIL LIN                   float64
component_part_names NUT-FUEL LINE                     float64
component_part_names FLANGE                            float64
component_part_names WASHER-FUEL LIN                   float64
component_part_names BLOCK                             float64
component_part_names NUT-FUEL INJ                      float64
component_part_names TUBE                              float64
component_part_names NUT-FLARED                        float64
component_part_names STUD-WELD                         float64
component_part_names SEAL-O-RING-ORFS                  float64
component_part_names CLIP                              float64
component_part_names BRACKET                           float64
component_part_names VALVE AS.-A/C                     float64
component_part_names LUG                               float64
component_part_names NUT                               float64
component_part_names PIPE                              float64
component_part_names FITTING                           float64
component_part_names NUT-FITTING                       float64
component_part_names ADAPTER-A/C                       float64
component_part_names NUT-SWIVEL                        float64
component_part_names NUT-INJ LINE                      float64
component_part_names ADAPTER-EXH PIP                   float64
component_part_names COLLAR                            float64
component_part_names SLEEVE-FITTING                    float64
component_part_names SLEEVE                            float64
component_part_names TUBE AS                           float64
component_part_names NUT-WELD                          float64
component_part_names SEAL-O-RING                       float64
component_part_names WASHER                            float64
dtypes: bool(8), float64(580), int64(11)
memory usage: 123.4 MB
CPU times: user 256 ms, sys: 312 ms, total: 568 ms
Wall time: 610 ms
CPU times: user 24 ms, sys: 0 ns, total: 24 ms
Wall time: 24.6 ms

In [19]:
regressors = [
    # DummyRegressor(strategy='constant', constant=0.0),
    # DummyRegressor(strategy='mean'),
    # RandomForestRegressor(n_estimators=20),
    # RandomForestRegressor(n_estimators=100, max_features=0.4),
    # RandomForestRegressor(n_estimators=100),
    ExtraTreesRegressor(n_estimators=100),
]

for reg in regressors:
    %time train_rmsle, test_rmsle = eval_regressor(reg, X_train_np, y_train_np, X_test_np, y_test_np)
    print "{}:".format(reg)
    print "    train RMSLE {}".format(train_rmsle)
    print "    test RMSLE {}".format(test_rmsle)
    print


CPU times: user 7min 26s, sys: 608 ms, total: 7min 27s
Wall time: 7min 28s
ExtraTreesRegressor(bootstrap=False, criterion='mse', max_depth=None,
          max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
          min_samples_split=2, min_weight_fraction_leaf=0.0,
          n_estimators=100, n_jobs=1, oob_score=False, random_state=None,
          verbose=0, warm_start=False):
    train RMSLE 0.00978763787673
    test RMSLE 0.260257047736


In [236]:
print_feature_importances(X_train_feats, reg);


adj_quantity 0.445669887953
total_component_weight 0.129993589942
annual_usage 0.0690799475355
diameter 0.0552006095061
min_order_quantity 0.033580347257
quantity 0.0319416827581
length 0.0265664541496
quote_age 0.0231251850538
bend_radius 0.0138240302887
component_groups threaded 0.0115562530966
components XXX_other 0.00988552095144
orientation_count 0.00820385925362
supplier S-0041 0.00761775022894
supplier S-0072 0.00726716360845
wall_thickness 0.007091839631
supplier S-0054 0.00646627057058
ends EF-003 0.00618274011057
supplier S-0026 0.00595234159366
num_bends 0.00581594864658
supplier S-0064 0.003764934154
bracketing_pattern (1, 3, 5, 7, 9) 0.0037302183229
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250) 0.0036370632174
component_types CP-014 0.00307274905
supplier S-0066 0.00295439535234
bracketing_pattern (1, 6, 20) 0.00255389340859
bracketing_pattern () 0.00246319428133
end_2x_count 0.00231656186072
components C-0063 0.00214565288374
bracketing_pattern (1, 2, 3, 4, 5) 0.0017749989302
end_a EF-003 0.00158911897425
bracketing_pattern (5, 19, 20) 0.0015875510113
ends EF-012 0.00148512198588
end_forming_count 0.00147952706007
component_groups straight 0.00138844820558
bracketing_pattern XXX_other 0.00134946131696
components C-1727 0.00118391744879
material_id SP-0038 0.00114986905479
supplier S-0058 0.00108059718185
end_x_2x 0.0010645946194
adj_bracketing 0.00105846154923
material_id SP-0029 0.00102584043988
component_types CP-026 0.00102371363503
unique_feature_count 0.00101628167104
supplier S-0013 0.000988803757284
material_id SP-0008 0.000929772701772
material_id SP-0028 0.000883573874581
component_groups other 0.000865137170779
num_boss 0.000843330043807
specs SP-0080 0.000821682728451
end_x EF-009 0.000793449335683
bracket_pricing 0.000737434668668
supplier S-0062 0.000718437489413
component_types OTHER 0.000707153542157
specs SP-0058 0.00069266963423
specs SP-0070 0.0006902356587
end_a_2x 0.000669674893803
component_groups nut 0.000658371396798
end_x EF-003 0.000648245229808
end_1x_count 0.000617332590151
component_types CP-008 0.000616786768867
component_groups sleeve 0.000600935563491
ends EF-017 0.000596603108998
component_types CP-024 0.000586401074915
bracketing_pattern (1, 2, 3, 4) 0.000583177995305
ends EF-018 0.000581579833981
material_id SP-0035 0.000562048151469
component_groups boss 0.000551718826655
end_a_forming 0.000543852486388
components C-1475 0.00053852740729
ends EF-009 0.000532983550914
component_types CP-025 0.000524119346142
specs SP-0004 0.000484964866687
specs SP-0012 0.000475727193595
specs SP-0069 0.000475217220298
end_x NONE 0.000472200372962
end_x_forming 0.000469192921589
end_a_1x 0.000426969481655
ends NONE 0.00042138805886
specs SP-0026 0.000405004023068
end_x_1x 0.000384578858696
components C-1621 0.000378814589587
supplier XXX_other 0.00037030056523
specs SP-0007 0.00035574417851
component_types CP-004 0.000350733773593
components C-0494 0.000348861488913
ends EF-023 0.000335424419078
bracketing_pattern (5, 10, 15, 20) 0.000322368708267
components C-0211 0.000319064723164
specs SP-0063 0.000318849160018
specs XXX_other 0.000314746093678
component_groups float 0.000310926831613
components C-0388 0.000302278406629
components C-1660 0.000301596569586
components C-1841 0.000292925823786
material_id SP-0037 0.000285887774193
components C-1420 0.000279613221122
component_types CP-022 0.00025549801054
end_a EF-009 0.000252238007571
specs SP-0009 0.000245710348514
component_types CP-006 0.000241945593798
components C-1623 0.00023667153738
num_other 0.000236171443118
components C-1352 0.000234926250705
specs SP-0082 0.000233135750895
specs SP-0068 0.000230188500353
bracketing_pattern (1, 3, 5, 10) 0.000228629101789
end_a NONE 0.000225440109426
specs SP-0024 0.000225079091768
components C-1421 0.000224909925009
ends EF-015 0.000224893601183
supplier S-0030 0.000221546827393
ends EF-021 0.000217283682829
end_a EF-018 0.000216713673082
end_a EF-023 0.000215192396623
end_x EF-017 0.000212236964845
component_types CP-018 0.000210014683481
components C-1624 0.000207964221124
specs SP-0067 0.000205438133653
components C-1477 0.000202425697757
component_types CP-015 0.00020091370904
end_x EF-018 0.000197402454039
end_a EF-017 0.00019734695511
components C-1630 0.000194171285957
supplier S-0070 0.000184619548642
components C-1629 0.000184151890598
components C-0318 0.000179677659614
components C-1628 0.000178318698203
components C-1846 0.000174960195123
components C-1620 0.000174621607547
components C-1622 0.000167417472555
specs SP-0057 0.000165852446567
components C-1631 0.000160765476401
components C-1243 0.000158186336866
supplier S-0081 0.000157078170046
component_types CP-002 0.000157069474538
end_a EF-021 0.000153133839288
bracketing_pattern (1, 2, 3, 5, 10, 20) 0.000150080674443
specs SP-0016 0.000149213289195
components C-1627 0.000149202442291
component_types CP-003 0.000147526465488
bracketing_pattern (3, 5, 7, 9) 0.000147156131163
components C-1332 0.000144051838362
components C-1661 0.000143578629324
specs SP-0061 0.000143299983178
end_a EF-015 0.000141683337143
components C-1344 0.000139285044509
specs SP-0022 0.000133241984543
material_id SP-0019 0.000128828541049
components C-2005 0.000126543787982
bracketing_pattern (3, 6, 9, 12) 0.000126161186173
components C-0218 0.000125239689218
end_x EF-012 0.000124740070055
components C-1369 0.000124726553744
components C-1641 0.000123351286297
end_x EF-023 0.000122732454987
material_id nan 0.000122412940539
components C-1625 0.000122132961337
supplier S-0104 0.000121832481046
end_a EF-012 0.000121610238732
components C-1761 0.000121038707027
components C-1374 0.000118811999634
components C-1206 0.000117423321264
component_groups elbow 0.000114112084562
components C-1244 0.000112151867102
components C-1781 0.000111932618304
components C-1845 0.000111623984398
bracketing_pattern (1, 2) 0.000110656911661
components C-1743 0.000110066235116
supplier S-0014 0.00010947081804
num_bracket 0.000106723321456
components C-2028 0.000106337913465
bracketing_pattern (1, 3, 5, 10, 15) 0.000102278671221
ends EF-002 0.000101515878433
ends EF-019 0.00010046899181
components C-1906 0.000100275675672
material_id SP-0048 9.98245027806e-05
material_id SP-0041 9.16355646926e-05
components C-1873 8.88076977229e-05
components C-0449 8.80252279275e-05
components C-1655 8.74049592549e-05
material_id SP-0046 8.66593449846e-05
components C-0448 8.62551503146e-05
components C-1632 8.45001118816e-05
components C-0001 8.35301791216e-05
bracketing_pattern (1, 3, 5, 7, 10) 8.0705988663e-05
components C-2030 8.05666686674e-05
bracketing_pattern (3, 5, 10) 7.78528423667e-05
bracketing_pattern (10, 15, 20, 25, 30) 7.78194589155e-05
components C-0444 7.66681560261e-05
end_a EF-002 7.61449754127e-05
specs SP-0079 7.37692527884e-05
components C-1654 7.23672534558e-05
bracketing_pattern (1, 3, 5) 7.13336410784e-05
bracketing_pattern (5, 10, 15) 6.98010563729e-05
ends EF-010 6.84411586774e-05
bracketing_pattern (10, 25, 40, 55, 70) 6.81958088958e-05
specs SP-0013 6.81585211016e-05
components C-1860 6.81552331803e-05
components C-1649 6.78224483628e-05
bracketing_pattern (1, 6) 6.77584812443e-05
bracketing_pattern (1, 2, 3, 5, 10) 6.75769474194e-05
ends EF-008 6.74735523004e-05
specs SP-0021 6.63862586721e-05
bracketing_pattern (1, 3, 5, 10, 20, 30) 6.59810251604e-05
bracketing_pattern (1, 2, 5, 10, 25, 50, 100) 6.59412342324e-05
components C-1848 6.59134312896e-05
components C-1638 6.57260692215e-05
end_x EF-015 6.51793111003e-05
components C-0227 6.49307119815e-05
bracketing_pattern (4, 8, 12, 16) 6.4625572095e-05
components C-1639 6.34625628477e-05
bracketing_pattern (1, 5, 10, 15, 20) 6.30275074496e-05
components C-1205 6.25079690917e-05
components C-0409 6.23639541786e-05
components C-1643 6.19386330228e-05
components C-1640 6.13637271779e-05
bracketing_pattern (2, 5) 6.06917499177e-05
components C-1312 6.04549303952e-05
bracketing_pattern (2, 4, 6, 8) 5.9704906945e-05
component_groups adaptor 5.91878098136e-05
material_id SP-0033 5.91578147664e-05
component_types CP-027 5.79498688457e-05
components C-1728 5.70956745968e-05
end_a XXX_other 5.69844108862e-05
component_types CP-010 5.6914224535e-05
components C-1646 5.67361901848e-05
components C-1642 5.52233426553e-05
specs SP-0017 5.47705872318e-05
components C-0562 5.41599956672e-05
specs SP-0005 5.36119663976e-05
components C-2043 5.32189754561e-05
components C-1375 5.26578914515e-05
bracketing_pattern (2, 3, 4, 5) 5.17117471216e-05
component_types CP-028 5.16714611672e-05
components C-0210 4.96615735391e-05
supplier S-0027 4.92960625473e-05
components C-1434 4.92401588998e-05
end_x EF-008 4.79653899644e-05
supplier S-0105 4.78478459061e-05
bracketing_pattern (1, 30) 4.77197416834e-05
end_a EF-008 4.71755994897e-05
components C-0823 4.68311204977e-05
supplier S-0005 4.65959608236e-05
end_x EF-021 4.51880034761e-05
components C-1637 4.42636838435e-05
components C-1476 4.39019661298e-05
bracketing_pattern (1, 3) 4.38449804946e-05
bracketing_pattern (10, 20, 30) 4.26666610726e-05
bracketing_pattern (1, 2, 3, 5, 10, 20, 50) 4.18971983243e-05
components C-1994 4.16140681201e-05
components C-1716 4.12398445971e-05
ends EF-005 3.98406873526e-05
components C-0579 3.76880167459e-05
components C-1645 3.70674971802e-05
bracketing_pattern (2, 4, 6, 8, 10) 3.69935659299e-05
bracketing_pattern (1, 5) 3.69419380658e-05
components C-0580 3.61783664231e-05
components C-1644 3.61336573655e-05
components C-1914 3.59233600435e-05
components C-1714 3.57689452143e-05
components C-1439 3.51261277719e-05
components C-1885 3.41602795397e-05
components C-1635 3.40494843054e-05
components C-1411 3.40337617048e-05
bracketing_pattern (10, 15, 20, 25) 3.36146221918e-05
component_types CP-007 3.34232928364e-05
components C-0007 3.3219241815e-05
components C-1633 3.31345082134e-05
specs SP-0050 3.26372226167e-05
components C-1650 3.25429692057e-05
components C-1866 3.22612030971e-05
specs SP-0029 3.1969773083e-05
components C-1425 3.13253481451e-05
material_id SP-0039 3.08920273007e-05
bracketing_pattern (1, 20, 50) 2.95282167946e-05
components C-1715 2.95140785679e-05
end_x EF-010 2.87600553945e-05
bracketing_pattern (1, 2, 4) 2.84927664615e-05
components C-1647 2.78896247343e-05
specs SP-0025 2.76718036019e-05
component_types CP-016 2.74808984204e-05
bracketing_pattern (1, 3, 5, 10, 20) 2.73253504975e-05
specs SP-0076 2.69730565731e-05
bracketing_pattern (1, 2, 5) 2.67505215525e-05
components C-1229 2.67019187217e-05
ends EF-013 2.66679086365e-05
components C-0228 2.66610374587e-05
components C-1908 2.66106158807e-05
bracketing_pattern (1, 10) 2.64582736945e-05
components C-0445 2.63140297114e-05
components C-1821 2.58221514342e-05
material_id XXX_other 2.56635724412e-05
ends XXX_other 2.54715837113e-05
components C-1209 2.52463521878e-05
specs SP-0088 2.5198598288e-05
bracketing_pattern (5, 10, 20) 2.51546377071e-05
components C-1555 2.468217296e-05
component_types CP-012 2.42581425772e-05
components C-0122 2.41358398817e-05
components C-0048 2.27629717756e-05
component_types CP-001 2.25380533392e-05
components C-1758 2.19335577904e-05
components C-1398 2.13271141308e-05
components C-0004 2.1099415625e-05
components C-0002 2.0867365055e-05
components C-0214 2.08251205393e-05
bracketing_pattern (5, 10, 25) 2.06767912519e-05
bracketing_pattern (1, 4) 2.04724367565e-05
bracketing_pattern (5, 20) 1.98323783791e-05
components C-0199 1.97383075672e-05
components C-0244 1.87982649164e-05
components C-1718 1.87288657612e-05
components C-1907 1.86362141201e-05
bracketing_pattern (1, 2, 5, 10, 25) 1.83260549657e-05
bracketing_pattern (1, 2, 5, 25, 35) 1.8269094164e-05
components C-1445 1.71056483256e-05
components C-1817 1.69585358271e-05
specs SP-0051 1.69404156167e-05
supplier S-0043 1.69304909649e-05
end_a EF-013 1.68044034864e-05
component_types CP-023 1.6785194587e-05
components C-1373 1.66789011576e-05
components C-1910 1.61015029809e-05
bracketing_pattern (5, 10) 1.5877449527e-05
bracketing_pattern (1, 2, 4, 8, 16) 1.54474091609e-05
end_x XXX_other 1.54004606185e-05
bracketing_pattern (1, 2, 3, 5, 7) 1.50464045378e-05
bracketing_pattern (10, 15, 25) 1.47172088736e-05
bracketing_pattern (1, 2, 5, 10, 25, 50) 1.43043446026e-05
components C-1652 1.42203723511e-05
material_id SP-0034 1.4059549659e-05
bracketing_pattern (10, 20, 30, 40) 1.40522811266e-05
components C-1850 1.36417533668e-05
bracketing_pattern (2, 10, 25, 50, 100) 1.34598777078e-05
components C-0751 1.33967612039e-05
components C-0599 1.33865522771e-05
bracketing_pattern (1, 12) 1.3165845777e-05
components C-2004 1.30782815485e-05
bracketing_pattern (5, 10, 15, 20, 25) 1.27823321131e-05
specs SP-0075 1.27190428573e-05
components C-1672 1.26988235051e-05
components C-2027 1.25432801895e-05
bracketing_pattern (1, 3, 5, 10, 15, 25) 1.25365112202e-05
components C-1354 1.2427974104e-05
components C-0550 1.23301683238e-05
component_groups XXX_other 1.22619347135e-05
specs SP-0065 1.22511688427e-05
components C-1355 1.20704158602e-05
components C-1218 1.17079010619e-05
components C-0674 1.13707969193e-05
bracketing_pattern (20, 40, 60, 80) 1.13707013241e-05
components C-1348 1.04493797229e-05
components C-1626 1.03271908662e-05
components C-1417 1.03232750996e-05
component_types CP-009 1.02697965219e-05
components C-1867 1.02450483439e-05
bracketing_pattern (10, 15, 20) 1.01447621441e-05
bracketing_pattern (10, 15, 20, 30) 1.01241157971e-05
components C-1877 1.01061364401e-05
ends EF-011 9.99978593796e-06
components C-1505 9.75158987263e-06
components C-0520 9.68734430525e-06
components C-1619 9.57617580088e-06
specs SP-0062 9.51276762214e-06
components C-2001 9.32808626692e-06
components C-2032 9.27870852225e-06
end_a EF-016 9.22560456961e-06
specs SP-0002 8.93333982699e-06
components C-1677 8.82204399857e-06
bracketing_pattern (4, 10) 8.75373438531e-06
components C-1881 8.64284100586e-06
components C-0539 8.58019785365e-06
bracketing_pattern (1, 3, 5, 10, 25) 8.54966400711e-06
components C-1233 8.51077832382e-06
components C-1235 8.40985698755e-06
components C-1653 8.39388268416e-06
components C-1203 8.35274636061e-06
components C-1577 8.34486399671e-06
bracketing_pattern (8, 16, 24, 32, 48) 8.26359412459e-06
components C-0422 8.21169051539e-06
end_a EF-019 8.11135879727e-06
components C-2006 7.92717020306e-06
components C-0062 7.92145426339e-06
components C-2029 7.91611183633e-06
components C-1711 7.56277397214e-06
bracketing_pattern (3, 4, 5, 6) 7.27805027407e-06
end_x EF-002 7.18876310032e-06
components C-1343 7.15906261445e-06
components C-0401 6.89428540374e-06
end_a EF-005 6.85562009538e-06
components C-0165 6.78825689733e-06
bracketing_pattern (1, 2, 5, 10) 6.778094396e-06
components C-1386 6.62576767073e-06
component_types CP-011 6.53815148452e-06
components C-1385 6.50572258956e-06
components C-1909 6.25020583616e-06
components C-1231 6.04841733843e-06
components C-0102 5.99490483439e-06
component_types XXX_other 5.97279272698e-06
components C-1889 5.91862183882e-06
components C-1648 5.75165182322e-06
bracketing_pattern (6, 12, 18, 24) 5.64250688919e-06
bracketing_pattern (25, 50, 75, 100) 5.62471640837e-06
bracketing_pattern (1, 8) 5.62371586614e-06
components C-1663 5.58949667706e-06
supplier S-0042 5.43757871768e-06
components C-0209 5.37695619636e-06
bracketing_pattern (1, 2, 3, 4, 6) 5.36489054993e-06
components C-1659 5.18456667804e-06
components C-0003 5.08859937885e-06
components C-1658 5.08537351715e-06
components C-1750 4.96944650118e-06
bracketing_pattern (1, 2, 3, 5, 10, 20, 50, 100) 4.96714917057e-06
component_types CP-019 4.8724329727e-06
components C-1435 4.8710757468e-06
supplier S-0080 4.83971910841e-06
specs SP-0010 4.78729290872e-06
components C-1936 4.76115385044e-06
end_x EF-016 4.75760731378e-06
ends EF-016 4.6964228908e-06
components C-2003 4.66937473994e-06
components C-0215 4.64093594543e-06
components C-0133 4.63177556745e-06
ends EF-001 4.5776952742e-06
components C-2002 4.52195155952e-06
components C-0389 4.51805357423e-06
components C-0826 4.24607982843e-06
material_id SP-0036 4.23621410707e-06
specs SP-0072 4.19693038141e-06
bracketing_pattern (8, 16, 24, 32) 4.05716647957e-06
components C-1615 3.96908139709e-06
components C-0052 3.94183801325e-06
components C-1898 3.81253545581e-06
components C-0275 3.79783807643e-06
components C-1745 3.71041108598e-06
components C-0208 3.68745135759e-06
components C-1954 3.57120469616e-06
supplier S-0031 3.43775764052e-06
components C-1880 3.41074715248e-06
bracketing_pattern (1, 5, 10, 20) 3.39875597325e-06
components C-1242 3.37713074278e-06
components C-1779 3.18681945168e-06
bracketing_pattern (3, 5, 10, 20) 3.17301046442e-06
bracketing_pattern (50, 50) 3.15558297937e-06
components C-1670 3.15147739535e-06
components C-1345 3.13155808556e-06
components C-1724 3.00997535693e-06
components C-1554 2.9372211182e-06
components C-1536 2.83792726925e-06
components C-0250 2.72616310495e-06
specs SP-0071 2.56963970547e-06
components C-1970 2.53058124816e-06
components C-1547 2.52951687681e-06
components C-1313 2.51052864946e-06
bracketing_pattern (2, 3, 4, 6) 2.50747293013e-06
end_x EF-006 2.4327474239e-06
components C-1995 2.20049675715e-06
bracketing_pattern (15, 25, 35) 2.01320998601e-06
components C-0616 1.97257090863e-06
bracketing_pattern (5, 10, 20, 50, 100) 1.93054763385e-06
ends EF-006 1.90161104972e-06
components C-1739 1.89264686974e-06
components C-1349 1.81155548825e-06
components C-1901 1.81029091704e-06
components C-1565 1.78699748823e-06
bracketing_pattern (4, 6, 8, 10) 1.76608557563e-06
end_x EF-019 1.62703897381e-06
material_id SP-0030 1.62529958459e-06
components C-0333 1.6158719998e-06
components C-1208 1.4621328468e-06
components C-0051 1.40066871802e-06
components C-1963 1.32719465938e-06
components C-1869 1.31909512111e-06
components C-1586 1.31595010516e-06
bracketing_pattern (1, 5, 10, 20, 50) 1.27424580508e-06
component_groups hfl 1.23668688538e-06
components C-2008 1.2029247874e-06
components C-0473 1.15378375234e-06
components C-0071 1.14807133696e-06
components C-2026 1.13411584041e-06
components C-1405 1.12120590629e-06
components C-1017 1.11198821162e-06
components C-0045 1.05552220215e-06
end_x EF-013 1.03127088356e-06
components C-0120 1.0065910816e-06
components C-1430 9.20207877068e-07
components C-0703 9.11274508229e-07
components C-1502 8.96585932467e-07
bracketing_pattern (4, 15) 8.44596942868e-07
components C-0369 8.44476865999e-07
components C-1533 8.24030991611e-07
components C-0134 7.10079639155e-07
components C-1459 6.46251194746e-07
bracketing_pattern (2, 3, 4) 6.43517156386e-07
components C-2019 6.33373335128e-07
components C-1230 5.92438499702e-07
bracketing_pattern (25, 50, 75, 100, 290, 325, 350) 5.23895151422e-07
components C-1200 4.77132052577e-07
components C-1614 4.70891711452e-07
bracketing_pattern (1, 2, 4, 9, 19) 4.32796095079e-07
bracketing_pattern (1, 15) 4.02967113652e-07
components C-0095 3.67201891047e-07
components C-1428 3.28002742163e-07
components C-0577 3.17016715166e-07
components C-0217 2.33048239881e-07
components C-1956 2.27639437564e-07
components C-1768 2.20931635616e-07
components C-1808 2.20021084155e-07
bracketing_pattern (30, 60, 90, 120) 1.64480313901e-07
components C-1998 1.51853876196e-07
components C-0434 1.31564039263e-07
components C-0548 1.09294312076e-07
components C-1651 8.11810472781e-08
components C-0058 7.46247038508e-08
components C-1725 5.88220384433e-08
components C-1976 4.30401428608e-08
components C-1183 2.88954807013e-08
components C-0844 1.19714040755e-08
components C-0544 3.39478619444e-09
components C-0699 2.4604254084e-09
components C-0855 1.15644465546e-09
components C-0057 6.6522980638e-11
components C-1541 0.0
components C-2017 0.0

In [41]:
print_feature_importances(X_train_feats, reg.estimators_[0])


adj_quantity 0.383636015766
diameter 0.11265538548
quantity 0.102695954927
annual_usage 0.0695016293123
min_order_quantity 0.0562030720638
length 0.0381054124711
components other 0.0315695194441
quote_date_days_since_1900 0.0254373647876
end_a EF-003 0.0230288972112
supplier S-0054 0.0135606124729
bend_radius 0.0120611480668
num_bends 0.00732055109556
wall_thickness 0.00693447963585
bracketing (1, 3, 5, 7, 9) 0.00655966182841
supplier S-0026 0.00653500089742
supplier S-0041 0.00649436623324
end_x EF-003 0.00597924355396
supplier S-0066 0.00425901069624
bracketing other 0.00362096411591
supplier S-0072 0.00351878035727
end_a_2x 0.00322958809483
bracketing (1, 6, 20) 0.00272968076017
supplier S-0064 0.00271985531768
components C-1727 0.00266456998671
specs SP-0061 0.00201049974401
end_x EF-023 0.0019811121209
end_x_2x 0.0018908052284
bracketing (1, 2, 3, 4, 5) 0.00179992430602
components C-1629 0.00175922658999
bracketing (5, 19, 20) 0.00175655502861
components C-1445 0.00169516258566
material_id SP-0038 0.00163143076106
components C-1620 0.00155105265457
components C-1628 0.00151325950589
end_x_1x 0.00149259819424
supplier S-0058 0.00148633126539
specs SP-0080 0.00141176718609
bracketing (1, 2, 5, 10, 25, 50, 100, 250) 0.00138330011763
components C-1631 0.00135927604582
components C-1625 0.00117895468146
components C-1630 0.00117411707906
num_boss 0.00117243981416
material_id SP-0008 0.00116095880197
components C-0494 0.00114248312637
components C-1632 0.00109589686566
specs SP-0013 0.00108569527849
material_id SP-0029 0.00103764825364
components C-1621 0.00102050301895
material_id SP-0035 0.000811347907018
material_id SP-0028 0.000750775056743
end_x EF-009 0.000716770235415
supplier other 0.000708245282395
num_other 0.000677569478728
end_x NONE 0.000670026892566
supplier S-0070 0.000667505136866
end_x EF-017 0.000632267774812
components C-0388 0.0005921997119
components C-0211 0.000571766546675
bracketing (3, 6, 9, 12) 0.000548738341382
specs SP-0004 0.000547858607559
components C-0318 0.000532190637419
bracket_pricing 0.000524308375894
components C-1374 0.000521688947929
components C-1420 0.000516932776341
specs SP-0026 0.000507378840347
components C-1348 0.000505770085087
end_a_1x 0.00049439351786
components C-1642 0.000488114677672
components C-1624 0.000482120119519
components C-1627 0.000473911779081
end_a EF-012 0.000465729893854
specs SP-0012 0.000465232397152
bracketing (5, 10, 15, 20) 0.000463203262157
material_id nan 0.000441544155973
end_x EF-015 0.000429855492134
specs SP-0024 0.000428366074738
material_id SP-0030 0.000413055334099
bracketing (1, 2) 0.000405244200577
end_x EF-018 0.00038643375626
specs SP-0070 0.000369730895964
components C-0214 0.000369159864048
components C-1622 0.000342248013436
components C-1728 0.000339032633253
components C-2005 0.000336874329654
components C-1206 0.000334466267271
end_x EF-010 0.000331669412969
supplier S-0013 0.000329021057539
components C-0007 0.000328949646538
specs SP-0058 0.000319889437619
components C-1633 0.000317508709672
components C-1538 0.000300508463423
components C-1344 0.000298549046394
supplier S-0081 0.000291926723492
components C-1623 0.000291712701129
supplier S-0027 0.000284070729856
specs SP-0069 0.000275461405875
components C-0444 0.000270851172738
specs SP-0082 0.000266782158924
components C-1244 0.000259197043884
components C-1715 0.000246427408757
components C-1994 0.000233236817211
adj_bracketing 0.000230158677171
end_a EF-018 0.000227300393389
components C-1647 0.000222678761935
specs SP-0063 0.000219077848609
components C-1845 0.0002166661071
end_a NONE 0.000211561268169
bracketing (10, 15, 25) 0.000209025800261
components C-1476 0.000204997500582
components C-1761 0.00020241719235
components C-1312 0.000197951714539
end_a EF-017 0.000197072988464
components C-1641 0.000190618686701
specs SP-0007 0.000188878614156
components C-0448 0.000181812230456
components C-0218 0.000181782237602
specs SP-0072 0.000180886449167
bracketing (5, 10, 15) 0.000175899339024
components C-0699 0.000157119091731
material_id SP-0037 0.000151328909491
components C-1988 0.000148460742909
components C-1637 0.000143957561893
components C-0449 0.000138582283306
supplier S-0062 0.000138044091798
components C-1644 0.00013529158223
components C-0199 0.000135042254606
components C-2028 0.000133854275112
bracketing (4, 8, 12, 16) 0.000131422757824
supplier S-0104 0.000116556269951
components C-1332 0.000111880235614
components C-1578 0.000111663633782
components C-2004 0.000110424940103
components C-1714 0.000107088456679
end_a EF-023 0.000106034166815
specs SP-0002 0.000104916105956
components C-1758 0.00010472121398
specs SP-0068 0.000102611421903
components C-1638 0.000101823221063
components C-1649 0.000100476611072
components C-0562 9.92544215329e-05
components C-1639 9.69995245315e-05
components C-0001 9.59954712466e-05
specs SP-0022 9.34276477631e-05
components C-1643 9.18138295022e-05
components C-1475 9.08724576333e-05
components C-1243 8.784350496e-05
components C-1385 8.75250637801e-05
components C-1645 8.63750320936e-05
components C-1846 8.48626085218e-05
components C-1229 8.18437632439e-05
components C-0215 7.78873782512e-05
bracketing (2, 4, 6, 8) 7.64379159596e-05
components C-1640 6.89054470563e-05
bracketing (1, 2, 3, 4) 6.77419289155e-05
components C-1652 6.74438869018e-05
supplier S-0030 6.63294594847e-05
bracketing (1, 1) 6.62146954212e-05
components C-2030 6.5977869423e-05
num_bracket 6.23285184964e-05
components C-0401 6.01990487372e-05
components C-1386 5.90394717556e-05
bracketing (5, 20) 5.86368175499e-05
components C-0369 5.77981712087e-05
components C-0209 5.76716838244e-05
components C-2008 5.7364530528e-05
components C-0228 5.71560273149e-05
components C-0823 5.62869206668e-05
components C-0210 5.22349887708e-05
bracketing (1, 2, 3, 5, 10, 20) 5.12315032196e-05
components C-1421 5.10993528026e-05
components C-1654 5.05708755733e-05
components C-1910 4.98112136935e-05
components C-0616 4.90234617458e-05
components C-1428 4.82279513977e-05
specs SP-0067 4.62225000829e-05
components C-0002 4.59296154156e-05
end_a EF-009 4.57196762653e-05
specs other 4.54241477038e-05
material_id SP-0019 4.29684988937e-05
material_id SP-0033 4.26299145413e-05
material_id SP-0048 4.17607772931e-05
components C-0703 4.16021438175e-05
components C-1209 4.15561108514e-05
components C-0826 4.0761928162e-05
components C-0751 3.97016634044e-05
components C-1411 3.96028499753e-05
components C-1653 3.94804611359e-05
components C-1661 3.82966975231e-05
components C-0674 3.79040818256e-05
components C-1743 3.73130733148e-05
end_x EF-008 3.60616827439e-05
supplier S-0014 3.53314339139e-05
components C-1660 3.42220208744e-05
specs SP-0017 3.40170770736e-05
components C-0208 3.35926198656e-05
components C-1739 3.13372266334e-05
components C-2032 2.98302041225e-05
specs SP-0071 2.98082497328e-05
components C-0389 2.97119256948e-05
supplier S-0042 2.89996188934e-05
components C-1936 2.86199437474e-05
end_a EF-005 2.74271490009e-05
bracketing (1, 2, 5, 10, 25, 50, 100) 2.72839215646e-05
end_x EF-012 2.71983164177e-05
bracketing (10, 20, 30, 40) 2.57954162805e-05
components C-1218 2.55690602847e-05
components C-1375 2.55666826596e-05
components C-1655 2.50971210066e-05
end_x other 2.50834403973e-05
components C-1646 2.42271107274e-05
bracketing (5, 10, 20) 2.38957548547e-05
end_a EF-002 2.38020288622e-05
components C-1547 2.35451379929e-05
bracketing (1, 20, 50) 2.33661768344e-05
components C-1711 2.32432683637e-05
specs SP-0029 2.28986789597e-05
components C-0579 2.16116956201e-05
components C-1821 2.12694962824e-05
bracketing (1, 2, 3, 5, 10) 2.09338083093e-05
components C-1369 2.04286477172e-05
bracketing (10, 20, 30) 2.04147350776e-05
components C-1354 2.00438288676e-05
bracketing (2, 3, 4, 5) 1.90384844659e-05
components C-1848 1.72630540276e-05
components C-1648 1.71875318313e-05
components C-1349 1.70841117094e-05
components C-1841 1.70611766983e-05
components C-0599 1.69053358801e-05
end_a EF-008 1.6396332103e-05
components C-1860 1.6190525044e-05
specs SP-0062 1.59255513044e-05
material_id SP-0039 1.56349077215e-05
bracketing (2, 4, 6, 8, 10) 1.53668312728e-05
components C-1425 1.53451528214e-05
components C-1345 1.53310210595e-05
components C-1718 1.49635114696e-05
end_a other 1.45764598148e-05
supplier S-0105 1.43780989265e-05
supplier S-0031 1.35059395344e-05
components C-1373 1.30821925073e-05
bracketing (1, 2, 5, 10, 25, 50) 1.26868813621e-05
components C-1352 1.22936513956e-05
components C-1355 1.19354887843e-05
specs SP-0088 1.19077441236e-05
components C-1817 1.18295832541e-05
components C-0071 1.16808312004e-05
components C-1907 1.00175013924e-05
end_a EF-015 9.0446731003e-06
components C-2026 8.9964923745e-06
components C-1889 8.92583580494e-06
components C-0520 8.63346069474e-06
material_id SP-0046 8.52064383904e-06
components C-0133 8.32951752599e-06
components C-1635 7.98100532983e-06
components C-1555 7.6699999422e-06
components C-1435 7.46719595893e-06
components C-0004 7.4368328476e-06
specs SP-0075 7.29707174103e-06
components C-1808 6.68963056967e-06
components C-1909 6.58630733314e-06
components C-1502 6.44834882603e-06
components C-0445 6.2963898138e-06
components C-1577 6.14850242092e-06
components C-1716 6.1108696049e-06
components C-1781 6.07760955754e-06
components C-1343 6.0621257779e-06
components C-1200 5.60838120741e-06
components C-1619 5.3414653536e-06
components C-1203 5.33771728596e-06
components C-0120 5.08578872808e-06
specs SP-0016 5.00750718903e-06
bracketing (1, 2, 3, 4, 6) 4.81395754809e-06
components C-0051 4.70336003006e-06
end_a EF-019 4.69028078163e-06
end_x EF-021 4.61472574765e-06
components C-2002 4.52927159263e-06
components C-0580 4.37367008084e-06
components C-2006 4.37249878699e-06
components C-0165 4.18563490484e-06
components C-1998 4.1552887729e-06
components C-1359 3.94586583282e-06
bracketing (1, 3, 5, 10, 20, 30) 3.88733815672e-06
components C-0539 3.34396154182e-06
components C-1205 3.30239416478e-06
components C-1651 2.98401014072e-06
components C-0003 2.72275087761e-06
components C-1995 2.30832995648e-06
components C-1885 2.30266879945e-06
components C-2029 2.1909166846e-06
end_a EF-021 1.99546862239e-06
material_id SP-0036 1.99144350476e-06
components C-1866 1.76173124487e-06
components C-0052 1.56267948326e-06
components C-1906 1.47574336792e-06
components C-1880 1.27404592074e-06
specs SP-0021 1.2637868979e-06
specs SP-0050 1.14342565426e-06
end_a EF-016 1.13547056713e-06
end_x EF-006 1.13302762778e-06
components C-2001 1.13053793398e-06
components C-1658 1.05280596547e-06
end_x EF-005 9.7616460776e-07
components C-1439 9.30722729541e-07
bracketing (10, 25, 40, 55, 70) 9.26520744767e-07
components C-1908 9.01901089776e-07
components C-1745 8.31579829721e-07
components C-0244 7.66989407518e-07
specs SP-0079 7.39455301378e-07
components C-1313 7.09511113469e-07
components C-1430 6.58127795915e-07
end_x EF-002 6.48692433138e-07
components C-0544 5.48127738543e-07
specs SP-0009 5.45082839386e-07
components C-1230 2.99891004885e-07
bracketing (5, 10) 2.7844640635e-07
components C-0250 2.70879707541e-07
components C-2027 2.65268140311e-07
components C-0095 1.74853153489e-07
components C-1233 1.69612173499e-07
components C-0679 1.69026275083e-07
bracketing (1, 3, 5) 1.49497883727e-07
components C-1626 1.35488180424e-07
specs SP-0057 1.34866505179e-07
components C-1873 1.2875652428e-07
components C-1533 1.13859136542e-07
supplier S-0043 1.00844925296e-07
components C-1867 6.49978607155e-08
components C-0409 6.02359440954e-08
components C-0134 3.71968832268e-08
specs SP-0025 1.10235814372e-08
components C-2003 5.99359438464e-09
bracketing (4, 10) 2.56866652346e-10
supplier S-0005 0.0
supplier S-0080 0.0
supplier S-0092 0.0
material_id other 0.0
material_id SP-0041 0.0
material_id SP-0034 0.0
end_x EF-019 0.0
specs SP-0065 0.0
specs SP-0076 0.0
specs SP-0010 0.0
specs SP-0005 0.0
components C-0217 0.0
components C-1869 0.0
components C-1898 0.0
components C-0102 0.0
components C-0122 0.0
components C-1779 0.0
components C-0422 0.0
components C-0227 0.0
components C-0550 0.0
components C-1850 0.0
components C-0577 0.0
components C-1536 0.0
components C-1405 0.0
components C-1663 0.0
components C-1954 0.0
components C-1881 0.0
components C-0434 0.0
components C-1963 0.0
components C-1242 0.0
components C-1902 0.0
components C-1183 0.0
components C-1614 0.0
components C-1615 0.0
components C-1750 0.0
components C-0275 0.0
components C-1723 0.0
components C-0333 0.0
components C-1650 0.0
components C-1970 0.0
components C-1235 0.0
components C-1565 0.0
components C-1672 0.0
Out[41]:
[('adj_quantity', 0.38363601576561601),
 ('diameter', 0.11265538548010036),
 ('quantity', 0.10269595492660942),
 ('annual_usage', 0.06950162931234817),
 ('min_order_quantity', 0.056203072063815751),
 ('length', 0.03810541247114544),
 ('components other', 0.031569519444086327),
 ('quote_date_days_since_1900', 0.02543736478764887),
 ('end_a EF-003', 0.0230288972111805),
 ('supplier S-0054', 0.013560612472883808),
 ('bend_radius', 0.01206114806682235),
 ('num_bends', 0.0073205510955589494),
 ('wall_thickness', 0.0069344796358468477),
 ('bracketing (1, 3, 5, 7, 9)', 0.0065596618284128761),
 ('supplier S-0026', 0.0065350008974187574),
 ('supplier S-0041', 0.0064943662332401595),
 ('end_x EF-003', 0.0059792435539641866),
 ('supplier S-0066', 0.004259010696238426),
 ('bracketing other', 0.0036209641159054905),
 ('supplier S-0072', 0.0035187803572677184),
 ('end_a_2x', 0.0032295880948316454),
 ('bracketing (1, 6, 20)', 0.0027296807601701685),
 ('supplier S-0064', 0.0027198553176849168),
 ('components C-1727', 0.0026645699867110899),
 ('specs SP-0061', 0.0020104997440077992),
 ('end_x EF-023', 0.0019811121208955433),
 ('end_x_2x', 0.001890805228403195),
 ('bracketing (1, 2, 3, 4, 5)', 0.0017999243060214489),
 ('components C-1629', 0.0017592265899859208),
 ('bracketing (5, 19, 20)', 0.0017565550286144176),
 ('components C-1445', 0.0016951625856636871),
 ('material_id SP-0038', 0.0016314307610550136),
 ('components C-1620', 0.0015510526545683026),
 ('components C-1628', 0.0015132595058863689),
 ('end_x_1x', 0.001492598194244137),
 ('supplier S-0058', 0.0014863312653891935),
 ('specs SP-0080', 0.0014117671860884029),
 ('bracketing (1, 2, 5, 10, 25, 50, 100, 250)', 0.0013833001176273923),
 ('components C-1631', 0.001359276045822396),
 ('components C-1625', 0.0011789546814565536),
 ('components C-1630', 0.0011741170790557848),
 ('num_boss', 0.0011724398141590591),
 ('material_id SP-0008', 0.0011609588019658511),
 ('components C-0494', 0.0011424831263704492),
 ('components C-1632', 0.0010958968656606582),
 ('specs SP-0013', 0.001085695278486161),
 ('material_id SP-0029', 0.0010376482536365375),
 ('components C-1621', 0.0010205030189489029),
 ('material_id SP-0035', 0.00081134790701762965),
 ('material_id SP-0028', 0.0007507750567432701),
 ('end_x EF-009', 0.00071677023541510108),
 ('supplier other', 0.00070824528239519255),
 ('num_other', 0.00067756947872756284),
 ('end_x NONE', 0.00067002689256649803),
 ('supplier S-0070', 0.00066750513686611695),
 ('end_x EF-017', 0.00063226777481218495),
 ('components C-0388', 0.00059219971189985034),
 ('components C-0211', 0.00057176654667459787),
 ('bracketing (3, 6, 9, 12)', 0.0005487383413816377),
 ('specs SP-0004', 0.00054785860755876851),
 ('components C-0318', 0.00053219063741945325),
 ('bracket_pricing', 0.00052430837589377002),
 ('components C-1374', 0.00052168894792893128),
 ('components C-1420', 0.000516932776341325),
 ('specs SP-0026', 0.00050737884034682333),
 ('components C-1348', 0.00050577008508694888),
 ('end_a_1x', 0.00049439351786015209),
 ('components C-1642', 0.00048811467767163224),
 ('components C-1624', 0.00048212011951929669),
 ('components C-1627', 0.00047391177908093706),
 ('end_a EF-012', 0.00046572989385357396),
 ('specs SP-0012', 0.00046523239715237392),
 ('bracketing (5, 10, 15, 20)', 0.00046320326215673656),
 ('material_id nan', 0.00044154415597349192),
 ('end_x EF-015', 0.00042985549213350461),
 ('specs SP-0024', 0.00042836607473802416),
 ('material_id SP-0030', 0.00041305533409860599),
 ('bracketing (1, 2)', 0.00040524420057737694),
 ('end_x EF-018', 0.00038643375625969348),
 ('specs SP-0070', 0.00036973089596437401),
 ('components C-0214', 0.00036915986404806902),
 ('components C-1622', 0.00034224801343574787),
 ('components C-1728', 0.00033903263325254818),
 ('components C-2005', 0.00033687432965375593),
 ('components C-1206', 0.00033446626727052153),
 ('end_x EF-010', 0.00033166941296939044),
 ('supplier S-0013', 0.00032902105753861637),
 ('components C-0007', 0.00032894964653796746),
 ('specs SP-0058', 0.00031988943761883273),
 ('components C-1633', 0.00031750870967241983),
 ('components C-1538', 0.00030050846342315673),
 ('components C-1344', 0.00029854904639384824),
 ('supplier S-0081', 0.00029192672349205123),
 ('components C-1623', 0.00029171270112901765),
 ('supplier S-0027', 0.00028407072985561435),
 ('specs SP-0069', 0.00027546140587531557),
 ('components C-0444', 0.00027085117273753783),
 ('specs SP-0082', 0.000266782158924063),
 ('components C-1244', 0.00025919704388446287),
 ('components C-1715', 0.00024642740875693045),
 ('components C-1994', 0.00023323681721065486),
 ('adj_bracketing', 0.00023015867717103363),
 ('end_a EF-018', 0.00022730039338910861),
 ('components C-1647', 0.0002226787619345109),
 ('specs SP-0063', 0.00021907784860865947),
 ('components C-1845', 0.00021666610710039126),
 ('end_a NONE', 0.00021156126816906468),
 ('bracketing (10, 15, 25)', 0.00020902580026056791),
 ('components C-1476', 0.00020499750058238625),
 ('components C-1761', 0.00020241719235002841),
 ('components C-1312', 0.00019795171453905819),
 ('end_a EF-017', 0.00019707298846416597),
 ('components C-1641', 0.00019061868670103741),
 ('specs SP-0007', 0.00018887861415579739),
 ('components C-0448', 0.00018181223045589742),
 ('components C-0218', 0.00018178223760177958),
 ('specs SP-0072', 0.00018088644916745448),
 ('bracketing (5, 10, 15)', 0.00017589933902448471),
 ('components C-0699', 0.00015711909173146376),
 ('material_id SP-0037', 0.00015132890949106444),
 ('components C-1988', 0.00014846074290853441),
 ('components C-1637', 0.00014395756189296884),
 ('components C-0449', 0.00013858228330635821),
 ('supplier S-0062', 0.00013804409179775961),
 ('components C-1644', 0.0001352915822297109),
 ('components C-0199', 0.00013504225460629372),
 ('components C-2028', 0.00013385427511210623),
 ('bracketing (4, 8, 12, 16)', 0.0001314227578237464),
 ('supplier S-0104', 0.00011655626995126669),
 ('components C-1332', 0.00011188023561388369),
 ('components C-1578', 0.00011166363378198718),
 ('components C-2004', 0.00011042494010274742),
 ('components C-1714', 0.00010708845667857778),
 ('end_a EF-023', 0.00010603416681452122),
 ('specs SP-0002', 0.00010491610595568139),
 ('components C-1758', 0.0001047212139798022),
 ('specs SP-0068', 0.00010261142190343071),
 ('components C-1638', 0.00010182322106275377),
 ('components C-1649', 0.00010047661107218406),
 ('components C-0562', 9.9254421532945928e-05),
 ('components C-1639', 9.6999524531546938e-05),
 ('components C-0001', 9.5995471246596885e-05),
 ('specs SP-0022', 9.3427647763082348e-05),
 ('components C-1643', 9.1813829502182705e-05),
 ('components C-1475', 9.0872457633317397e-05),
 ('components C-1243', 8.7843504959966262e-05),
 ('components C-1385', 8.7525063780126633e-05),
 ('components C-1645', 8.6375032093622531e-05),
 ('components C-1846', 8.4862608521835462e-05),
 ('components C-1229', 8.1843763243920101e-05),
 ('components C-0215', 7.7887378251209605e-05),
 ('bracketing (2, 4, 6, 8)', 7.6437915959587718e-05),
 ('components C-1640', 6.8905447056347404e-05),
 ('bracketing (1, 2, 3, 4)', 6.7741928915451355e-05),
 ('components C-1652', 6.7443886901812663e-05),
 ('supplier S-0030', 6.6329459484680957e-05),
 ('bracketing (1, 1)', 6.6214695421222737e-05),
 ('components C-2030', 6.5977869423018316e-05),
 ('num_bracket', 6.2328518496429728e-05),
 ('components C-0401', 6.0199048737159793e-05),
 ('components C-1386', 5.9039471755595263e-05),
 ('bracketing (5, 20)', 5.863681754990062e-05),
 ('components C-0369', 5.7798171208651739e-05),
 ('components C-0209', 5.7671683824353138e-05),
 ('components C-2008', 5.7364530527953927e-05),
 ('components C-0228', 5.7156027314945498e-05),
 ('components C-0823', 5.6286920666834521e-05),
 ('components C-0210', 5.2234988770796883e-05),
 ('bracketing (1, 2, 3, 5, 10, 20)', 5.123150321964883e-05),
 ('components C-1421', 5.1099352802577456e-05),
 ('components C-1654', 5.0570875573319772e-05),
 ('components C-1910', 4.9811213693465316e-05),
 ('components C-0616', 4.9023461745767274e-05),
 ('components C-1428', 4.8227951397670014e-05),
 ('specs SP-0067', 4.6222500082881652e-05),
 ('components C-0002', 4.5929615415566347e-05),
 ('end_a EF-009', 4.5719676265270576e-05),
 ('specs other', 4.5424147703838558e-05),
 ('material_id SP-0019', 4.2968498893669546e-05),
 ('material_id SP-0033', 4.2629914541344416e-05),
 ('material_id SP-0048', 4.1760777293056824e-05),
 ('components C-0703', 4.1602143817510593e-05),
 ('components C-1209', 4.1556110851442574e-05),
 ('components C-0826', 4.0761928161967502e-05),
 ('components C-0751', 3.9701663404402049e-05),
 ('components C-1411', 3.9602849975349284e-05),
 ('components C-1653', 3.9480461135871229e-05),
 ('components C-1661', 3.8296697523131059e-05),
 ('components C-0674', 3.7904081825617037e-05),
 ('components C-1743', 3.7313073314827171e-05),
 ('end_x EF-008', 3.6061682743945512e-05),
 ('supplier S-0014', 3.5331433913911906e-05),
 ('components C-1660', 3.4222020874365969e-05),
 ('specs SP-0017', 3.4017077073567963e-05),
 ('components C-0208', 3.3592619865611666e-05),
 ('components C-1739', 3.1337226633445844e-05),
 ('components C-2032', 2.9830204122516084e-05),
 ('specs SP-0071', 2.9808249732775237e-05),
 ('components C-0389', 2.9711925694797658e-05),
 ('supplier S-0042', 2.8999618893412895e-05),
 ('components C-1936', 2.8619943747400447e-05),
 ('end_a EF-005', 2.7427149000859729e-05),
 ('bracketing (1, 2, 5, 10, 25, 50, 100)', 2.7283921564600774e-05),
 ('end_x EF-012', 2.7198316417671747e-05),
 ('bracketing (10, 20, 30, 40)', 2.5795416280484644e-05),
 ('components C-1218', 2.5569060284738155e-05),
 ('components C-1375', 2.5566682659585367e-05),
 ('components C-1655', 2.5097121006605537e-05),
 ('end_x other', 2.508344039725272e-05),
 ('components C-1646', 2.4227110727400143e-05),
 ('bracketing (5, 10, 20)', 2.3895754854671795e-05),
 ('end_a EF-002', 2.3802028862154762e-05),
 ('components C-1547', 2.3545137992938845e-05),
 ('bracketing (1, 20, 50)', 2.336617683436327e-05),
 ('components C-1711', 2.3243268363688979e-05),
 ('specs SP-0029', 2.2898678959665925e-05),
 ('components C-0579', 2.1611695620057748e-05),
 ('components C-1821', 2.1269496282435841e-05),
 ('bracketing (1, 2, 3, 5, 10)', 2.0933808309277324e-05),
 ('components C-1369', 2.0428647717195669e-05),
 ('bracketing (10, 20, 30)', 2.0414735077576392e-05),
 ('components C-1354', 2.0043828867604565e-05),
 ('bracketing (2, 3, 4, 5)', 1.9038484465894237e-05),
 ('components C-1848', 1.7263054027566445e-05),
 ('components C-1648', 1.7187531831338436e-05),
 ('components C-1349', 1.7084111709426927e-05),
 ('components C-1841', 1.7061176698274984e-05),
 ('components C-0599', 1.6905335880127749e-05),
 ('end_a EF-008', 1.6396332103048729e-05),
 ('components C-1860', 1.6190525043995357e-05),
 ('specs SP-0062', 1.592555130437708e-05),
 ('material_id SP-0039', 1.5634907721467473e-05),
 ('bracketing (2, 4, 6, 8, 10)', 1.5366831272796879e-05),
 ('components C-1425', 1.5345152821372032e-05),
 ('components C-1345', 1.5331021059451152e-05),
 ('components C-1718', 1.496351146964932e-05),
 ('end_a other', 1.4576459814811637e-05),
 ('supplier S-0105', 1.4378098926481268e-05),
 ('supplier S-0031', 1.3505939534414885e-05),
 ('components C-1373', 1.3082192507318994e-05),
 ('bracketing (1, 2, 5, 10, 25, 50)', 1.2686881362126799e-05),
 ('components C-1352', 1.2293651395585172e-05),
 ('components C-1355', 1.1935488784326575e-05),
 ('specs SP-0088', 1.190774412361568e-05),
 ('components C-1817', 1.1829583254057329e-05),
 ('components C-0071', 1.1680831200447311e-05),
 ('components C-1907', 1.0017501392399161e-05),
 ('end_a EF-015', 9.0446731003030746e-06),
 ('components C-2026', 8.9964923744964179e-06),
 ('components C-1889', 8.9258358049411519e-06),
 ('components C-0520', 8.6334606947359238e-06),
 ('material_id SP-0046', 8.5206438390382949e-06),
 ('components C-0133', 8.3295175259919886e-06),
 ('components C-1635', 7.9810053298269579e-06),
 ('components C-1555', 7.6699999422009563e-06),
 ('components C-1435', 7.4671959589332027e-06),
 ('components C-0004', 7.436832847595492e-06),
 ('specs SP-0075', 7.2970717410274319e-06),
 ('components C-1808', 6.6896305696710039e-06),
 ('components C-1909', 6.5863073331379632e-06),
 ('components C-1502', 6.4483488260255326e-06),
 ('components C-0445', 6.2963898137993625e-06),
 ('components C-1577', 6.1485024209218179e-06),
 ('components C-1716', 6.1108696048994063e-06),
 ('components C-1781', 6.077609557539631e-06),
 ('components C-1343', 6.0621257778970703e-06),
 ('components C-1200', 5.6083812074147587e-06),
 ('components C-1619', 5.3414653536029719e-06),
 ('components C-1203', 5.3377172859598218e-06),
 ('components C-0120', 5.0857887280759995e-06),
 ('specs SP-0016', 5.0075071890349423e-06),
 ('bracketing (1, 2, 3, 4, 6)', 4.8139575480933123e-06),
 ('components C-0051', 4.7033600300590812e-06),
 ('end_a EF-019', 4.6902807816288858e-06),
 ('end_x EF-021', 4.6147257476458062e-06),
 ('components C-2002', 4.5292715926343441e-06),
 ('components C-0580', 4.3736700808449129e-06),
 ('components C-2006', 4.372498786987314e-06),
 ('components C-0165', 4.185634904838714e-06),
 ('components C-1998', 4.1552887728990096e-06),
 ('components C-1359', 3.9458658328242837e-06),
 ('bracketing (1, 3, 5, 10, 20, 30)', 3.8873381567206547e-06),
 ('components C-0539', 3.3439615418217404e-06),
 ('components C-1205', 3.3023941647830721e-06),
 ('components C-1651', 2.9840101407192924e-06),
 ('components C-0003', 2.7227508776080257e-06),
 ('components C-1995', 2.3083299564772169e-06),
 ('components C-1885', 2.3026687994495322e-06),
 ('components C-2029', 2.1909166846021083e-06),
 ('end_a EF-021', 1.995468622385381e-06),
 ('material_id SP-0036', 1.9914435047555165e-06),
 ('components C-1866', 1.7617312448668904e-06),
 ('components C-0052', 1.5626794832614705e-06),
 ('components C-1906', 1.4757433679219562e-06),
 ('components C-1880', 1.2740459207425666e-06),
 ('specs SP-0021', 1.2637868979024802e-06),
 ('specs SP-0050', 1.1434256542606045e-06),
 ('end_a EF-016', 1.1354705671299537e-06),
 ('end_x EF-006', 1.1330276277826296e-06),
 ('components C-2001', 1.1305379339795579e-06),
 ('components C-1658', 1.0528059654701323e-06),
 ('end_x EF-005', 9.7616460775972684e-07),
 ('components C-1439', 9.3072272954114681e-07),
 ('bracketing (10, 25, 40, 55, 70)', 9.2652074476708618e-07),
 ('components C-1908', 9.0190108977616808e-07),
 ('components C-1745', 8.3157982972105035e-07),
 ('components C-0244', 7.6698940751803596e-07),
 ('specs SP-0079', 7.3945530137802046e-07),
 ('components C-1313', 7.0951111346868505e-07),
 ('components C-1430', 6.5812779591509811e-07),
 ('end_x EF-002', 6.4869243313769082e-07),
 ('components C-0544', 5.4812773854314667e-07),
 ('specs SP-0009', 5.4508283938590375e-07),
 ('components C-1230', 2.9989100488473757e-07),
 ('bracketing (5, 10)', 2.7844640635017077e-07),
 ('components C-0250', 2.7087970754067563e-07),
 ('components C-2027', 2.6526814031147468e-07),
 ('components C-0095', 1.7485315348910783e-07),
 ('components C-1233', 1.6961217349867145e-07),
 ('components C-0679', 1.6902627508296353e-07),
 ('bracketing (1, 3, 5)', 1.4949788372660477e-07),
 ('components C-1626', 1.3548818042421604e-07),
 ('specs SP-0057', 1.3486650517932968e-07),
 ('components C-1873', 1.2875652428046763e-07),
 ('components C-1533', 1.1385913654231195e-07),
 ('supplier S-0043', 1.008449252956597e-07),
 ('components C-1867', 6.4997860715485083e-08),
 ('components C-0409', 6.0235944095357083e-08),
 ('components C-0134', 3.7196883226819314e-08),
 ('specs SP-0025', 1.1023581437243009e-08),
 ('components C-2003', 5.9935943846444304e-09),
 ('bracketing (4, 10)', 2.5686665234648127e-10),
 ('supplier S-0005', 0.0),
 ('supplier S-0080', 0.0),
 ('supplier S-0092', 0.0),
 ('material_id other', 0.0),
 ('material_id SP-0041', 0.0),
 ('material_id SP-0034', 0.0),
 ('end_x EF-019', 0.0),
 ('specs SP-0065', 0.0),
 ('specs SP-0076', 0.0),
 ('specs SP-0010', 0.0),
 ('specs SP-0005', 0.0),
 ('components C-0217', 0.0),
 ('components C-1869', 0.0),
 ('components C-1898', 0.0),
 ('components C-0102', 0.0),
 ('components C-0122', 0.0),
 ('components C-1779', 0.0),
 ('components C-0422', 0.0),
 ('components C-0227', 0.0),
 ('components C-0550', 0.0),
 ('components C-1850', 0.0),
 ('components C-0577', 0.0),
 ('components C-1536', 0.0),
 ('components C-1405', 0.0),
 ('components C-1663', 0.0),
 ('components C-1954', 0.0),
 ('components C-1881', 0.0),
 ('components C-0434', 0.0),
 ('components C-1963', 0.0),
 ('components C-1242', 0.0),
 ('components C-1902', 0.0),
 ('components C-1183', 0.0),
 ('components C-1614', 0.0),
 ('components C-1615', 0.0),
 ('components C-1750', 0.0),
 ('components C-0275', 0.0),
 ('components C-1723', 0.0),
 ('components C-0333', 0.0),
 ('components C-1650', 0.0),
 ('components C-1970', 0.0),
 ('components C-1235', 0.0),
 ('components C-1565', 0.0),
 ('components C-1672', 0.0)]

In [42]:
dump_decision_tree("tree0.pdf", X_train_feats, reg.estimators_[0], max_depth=5)

In [64]:
y_train.hist(bins=100)
y_train.shape


Out[64]:
(27270,)

In [65]:
y_train_pred = reg.predict(X_train_np)
hist(y_train_pred, bins=100)
y_train_pred.shape


Out[65]:
(27270,)

In [66]:
y_test.hist(bins=100)
y_test.shape


Out[66]:
(2943,)

In [67]:
y_test_pred = reg.predict(X_test_np)
hist(y_test_pred, bins=100);
y_test_pred.shape


Out[67]:
(2943,)

In [122]:
df = X_test.copy()
df['log_cost'] = y_test
df['pred_log_cost'] = y_test_pred
df['err2'] = (df['log_cost'] - df['pred_log_cost']) ** 2
df[7:15]


Out[122]:
tube_assembly_id supplier quote_date annual_usage min_order_quantity bracket_pricing quantity material_id diameter wall_thickness length num_bends bend_radius end_a_1x end_a_2x end_x_1x end_x_2x end_a end_x num_boss num_bracket num_other specs components quote_age adj_quantity adj_bracketing bracketing_pattern log_cost pred_log_cost err2
7 TA-00093 S-0066 2013-06-01 0 0 True 1 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 1 True (1, 2, 5, 10, 25, 50, 100, 250) 3.117823 3.117429 1.552754e-07
8 TA-00093 S-0066 2013-06-01 0 0 True 2 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 2 True (1, 2, 5, 10, 25, 50, 100, 250) 2.567439 2.567596 2.467026e-08
9 TA-00093 S-0066 2013-06-01 0 0 True 5 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 5 True (1, 2, 5, 10, 25, 50, 100, 250) 1.986917 1.984847 4.286038e-06
10 TA-00093 S-0066 2013-06-01 0 0 True 10 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 10 True (1, 2, 5, 10, 25, 50, 100, 250) 1.682495 1.679511 8.904909e-06
11 TA-00093 S-0066 2013-06-01 0 0 True 25 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 25 True (1, 2, 5, 10, 25, 50, 100, 250) 1.442853 1.439277 1.278186e-05
12 TA-00093 S-0066 2013-06-01 0 0 True 50 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 50 True (1, 2, 5, 10, 25, 50, 100, 250) 1.364968 1.361431 1.250912e-05
13 TA-00093 S-0066 2013-06-01 0 0 True 100 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 100 True (1, 2, 5, 10, 25, 50, 100, 250) 1.328059 1.322067 3.590655e-05
14 TA-00093 S-0066 2013-06-01 0 0 True 250 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 250 True (1, 2, 5, 10, 25, 50, 100, 250) 1.305695 1.304968 5.279536e-07

In [43]:
taids = ['TA-00084', 'TA-00093', 'TA-04118', 'TA-04375', 'TA-20290']
pd.set_option('display.max_columns', None)
aug_train_set[aug_train_set.tube_assembly_id.isin(taids) & (aug_train_set.quantity == 1)]


Out[43]:
tube_assembly_id supplier quote_date annual_usage min_order_quantity bracket_pricing quantity log_cost material_id diameter wall_thickness length num_bends bend_radius end_a_1x end_a_2x end_x_1x end_x_2x end_a end_x num_boss num_bracket num_other specs components quote_age adj_quantity adj_bracketing bracketing_pattern dev_fold
160 TA-00084 S-0066 2013-08-11 0 0 True 1 3.117823 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41495 1 True (1, 2, 5, 10, 25, 50, 100, 250) 5
209 TA-00093 S-0066 2013-06-01 0 0 True 1 3.117823 SP-0039 6.35 0.71 132 4 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41424 1 True (1, 2, 5, 10, 25, 50, 100, 250) 0
7181 TA-04118 S-0066 2013-06-01 0 0 True 1 3.117823 SP-0028 9.52 0.89 56 3 19.05 False False False False EF-003 EF-003 0 0 0 [SP-0007, SP-0024, SP-0026, SP-0069, SP-0080, ... [C-1621, C-1621, C-1628, C-1628] 41424 1 True (1, 2, 5, 10, 25, 50, 100, 250) 1
7730 TA-04375 S-0066 2014-02-01 1 0 True 1 3.117823 SP-0029 19.05 2.11 130 4 50.80 False False False False NONE EF-003 0 0 0 [] [C-1624, C-1631] 41669 1 True (1, 2, 5, 10, 25, 50, 100) 6
27791 TA-20290 S-0066 2013-07-21 0 0 True 1 3.117823 SP-0019 6.35 0.71 116 6 19.05 False False False False EF-008 EF-008 0 0 0 [] [C-1312, C-1312] 41474 1 True (1, 2, 5, 10, 25, 50, 100, 250) 1

In [90]:
df.err2.hist(bins=100)


Out[90]:
<matplotlib.axes._subplots.AxesSubplot at 0xcf97710>

In [123]:
df['orig_index'] = df.index
df.sort('err2', ascending=False, inplace=True)
df.reset_index(drop=True, inplace=True)
df['cum_err2'] = df.err2.cumsum()
df['cum_err2_frac'] = df.cum_err2 / df.err2.sum()
df[:5]


Out[123]:
tube_assembly_id supplier quote_date annual_usage min_order_quantity bracket_pricing quantity material_id diameter wall_thickness length num_bends bend_radius end_a_1x end_a_2x end_x_1x end_x_2x end_a end_x num_boss num_bracket num_other specs components quote_age adj_quantity adj_bracketing bracketing_pattern log_cost pred_log_cost err2 orig_index cum_err2 cum_err2_frac
0 TA-12043 S-0058 2014-08-20 1 50 False 1 SP-0029 9.52 0.89 55 3 31.75 False False False False EF-003 EF-003 0 0 0 [SP-0007, SP-0012, SP-0024, SP-0026, SP-0080, ... [C-1621, C-1621, C-1628, C-1628] 41869 50 False () 3.964544 1.508582 6.031747 1836 6.031747 0.025769
1 TA-05245 S-0090 2005-02-23 0 1 True 1 SP-0035 19.05 1.24 25 1 44.45 False False False False EF-003 EF-017 0 0 0 [] [C-0443, C-1486] 38404 1 False () 0.909402 3.057529 4.614448 1024 10.646195 0.045484
2 TA-01630 S-0072 2013-04-30 5 1 True 1 SP-0035 12.70 1.65 66 4 38.10 False False False False EF-018 EF-018 0 0 0 [] [C-0448, C-0448, C-0449, C-0449] 41392 1 False () 0.800605 2.596843 3.226470 298 13.872665 0.059268
3 TA-20766 S-0066 2013-11-02 1 0 True 250 SP-0029 12.70 0.89 34 3 25.40 False False False True EF-017 EF-003 0 0 0 [] [C-1475, C-1476] 41578 250 True (1, 2, 5, 10, 25, 50, 100, 250) 2.979539 1.185228 3.219551 2853 17.092216 0.073023
4 TA-20766 S-0066 2013-11-02 1 0 True 100 SP-0029 12.70 0.89 34 3 25.40 False False False True EF-017 EF-003 0 0 0 [] [C-1475, C-1476] 41578 100 True (1, 2, 5, 10, 25, 50, 100, 250) 2.985178 1.193087 3.211590 2852 20.303806 0.086744

In [97]:
df.cum_err2_frac.plot()
df.cum_err2_frac[500]


Out[97]:
0.92429426948043092

In [99]:
500. / len(X_test)


Out[99]:
0.16989466530750935

In [104]:
leaves = reg.apply(X_test_np)
print leaves.shape
leaves[:3, :]


(2943, 20)
Out[104]:
array([[11852, 11175,  6808, 11443, 11766, 11186, 11463, 11363, 11621,
        11278, 12062, 11921, 11061, 11304, 11812, 11866, 11277, 10961,
        11139, 11297],
       [30515, 30383, 30573, 28863, 27159, 27289, 27696, 30797, 30418,
        28828, 28775, 30492, 30543, 30556, 27405, 30269, 27471, 27666,
        27768, 27291],
       [ 6300,  6467,  3794,  6338,  3901,  3763,  6504,  6492,  6256,
         6307,  6352,  6333,  6315,  6373,  6198,  6235,  6467,  6384,
         6206,  6165]])

In [111]:
tree0 = reg.estimators_[0]
tree0.tree_.value[11852]


Out[111]:
array([[ 2.61841793]])

In [128]:
# An example with a good prediction:
sorted_index = 2000
taid = df.tube_assembly_id[sorted_index]
orig_index = df.orig_index[sorted_index]
print taid, orig_index
X = X_test_np[orig_index, :]
df[sorted_index:sorted_index+1]


TA-02848 533
Out[128]:
tube_assembly_id supplier quote_date annual_usage min_order_quantity bracket_pricing quantity material_id diameter wall_thickness length num_bends bend_radius end_a_1x end_a_2x end_x_1x end_x_2x end_a end_x num_boss num_bracket num_other specs components quote_age adj_quantity adj_bracketing bracketing_pattern log_cost pred_log_cost err2 orig_index cum_err2 cum_err2_frac
2000 TA-02848 S-0066 2013-06-16 0 0 True 1 SP-0029 12.7 2.11 194 3 38.1 False False False False EF-003 EF-003 0 0 0 [] [C-1622, C-1622, C-1629, C-1629] 41439 1 True (1, 2, 5, 10, 25, 50, 100, 250) 3.185876 3.198435 0.000158 533 234.038015 0.999876

In [135]:
preds = [tree.predict([X])[0] for tree in reg.estimators_]
plt.hist(preds, bins=20)
print "true value:", df.log_cost[sorted_index]
print "predicted value: ", df.pred_log_cost[sorted_index]
print "mean prediction:", np.mean(preds)
print "std prediction:", np.std(preds)
print "median prediction:", np.median(preds)
list(sorted(preds))


true value: 3.18587575272
predicted value:  3.19843466946
mean prediction: 3.19843466946
std prediction: 0.0525703175091
median prediction: 3.19090993474
Out[135]:
[3.1402231616168312,
 3.1669501882068678,
 3.1669501882068678,
 3.1697593609672392,
 3.1730267862956474,
 3.1730267862956474,
 3.1730267862956474,
 3.1730267862956474,
 3.1730655913324717,
 3.1732015107511389,
 3.208618358723752,
 3.208618358723752,
 3.208618358723752,
 3.208618358723752,
 3.208618358723752,
 3.208618358723752,
 3.208618358723752,
 3.208618358723752,
 3.208618358723752,
 3.4088710144764689]

In [199]:
# An example with a bad prediction:
sorted_index = 3
taid = df.tube_assembly_id[sorted_index]
orig_index = df.orig_index[sorted_index]
print taid, orig_index
X = X_test_np[orig_index, :]
df[sorted_index:sorted_index+1]


TA-20766 2853
Out[199]:
tube_assembly_id supplier quote_date annual_usage min_order_quantity bracket_pricing quantity material_id diameter wall_thickness length num_bends bend_radius end_a_1x end_a_2x end_x_1x end_x_2x end_a end_x num_boss num_bracket num_other specs components quote_age adj_quantity adj_bracketing bracketing_pattern log_cost pred_log_cost err2 orig_index cum_err2 cum_err2_frac
3 TA-20766 S-0066 2013-11-02 1 0 True 250 SP-0029 12.7 0.89 34 3 25.4 False False False True EF-017 EF-003 0 0 0 [] [C-1475, C-1476] 41578 250 True (1, 2, 5, 10, 25, 50, 100, 250) 2.979539 1.185228 3.219551 2853 17.092216 0.073023

In [200]:
preds = [tree.predict([X])[0] for tree in reg.estimators_]
plt.hist(preds, bins=20)
print "true value:", df.log_cost[sorted_index]
print "predicted value: ", df.pred_log_cost[sorted_index]
print "mean prediction:", np.mean(preds)
print "std prediction:", np.std(preds)
print "median prediction:", np.median(preds)
list(sorted(preds))


true value: 2.97953909449
predicted value:  1.18522842511
mean prediction: 1.18522842511
std prediction: 0.168485067327
median prediction: 1.15848801051
Out[200]:
[1.0085303733153852,
 1.0256343351377077,
 1.0325929872389481,
 1.0325929872389481,
 1.0325929872389481,
 1.0325929872389481,
 1.040486887399783,
 1.040486887399783,
 1.1152653617836521,
 1.1152653617836521,
 1.2017106592374496,
 1.2017106592374496,
 1.2657028961062973,
 1.2882028483323655,
 1.2882028483323655,
 1.3019179132978025,
 1.3079536358293984,
 1.3243664520578047,
 1.3513764578455651,
 1.6973829760988586]

In [165]:
# Error by supplier:
tmp = df[['supplier', 'err2']]
supplier_errs = {}
for supplier, indices in tmp.groupby('supplier').groups.iteritems():
    supplier_errs[supplier] = tmp.err2[indices].values

tmp2 = pd.DataFrame(tmp.supplier.value_counts(), columns=['test_count'])
tmp2['train_count'] = aug_train_set.supplier.value_counts()
tmp2['avg_err'] = [np.mean(supplier_errs[s]) for s in tmp2.index]
tmp2['std_err'] = [np.std(supplier_errs[s]) for s in tmp2.index]

tmp2


Out[165]:
test_count train_count avg_err std_err
S-0066 1963 20553 0.052389 0.231572
S-0041 331 3323 0.035265 0.100797
S-0072 235 2317 0.103101 0.281039
S-0054 89 838 0.186193 0.540001
S-0026 66 727 0.327643 0.423543
S-0013 56 554 0.116684 0.236197
S-0058 52 516 0.276922 0.905321
S-0064 49 446 0.191596 0.323623
S-0062 23 264 0.200803 0.210129
S-0014 16 139 0.107703 0.121820
S-0030 14 111 0.321913 0.377729
S-0104 9 90 0.058832 0.094781
S-0081 8 64 0.282328 0.285143
S-0105 4 33 0.161739 0.208492
S-0005 3 24 0.154184 0.126919
S-0027 3 20 0.257985 0.364597
S-0090 3 3 1.580293 2.145723
S-0042 3 19 0.820279 1.081185
S-0031 3 21 0.087334 0.089615
S-0018 2 10 0.011786 0.003316
S-0070 2 18 0.031476 0.030841
S-0043 2 14 0.100296 0.055897
S-0060 1 3 0.020633 0.000000
S-0108 1 1 0.827935 0.000000
S-0092 1 10 1.022800 0.000000
S-0059 1 4 0.040013 0.000000
S-0111 1 2 0.221840 0.000000
S-0009 1 4 1.404222 0.000000
S-0107 1 2 0.000239 0.000000

In [163]:
tmp2.avg_err.plot()


Out[163]:
<matplotlib.axes._subplots.AxesSubplot at 0xed5a5d0>

In [189]:
# Error by bracketing_pattern:
tmp = df[['bracketing_pattern', 'err2']]
supplier_errs = {}
for supplier, indices in tmp.groupby('bracketing_pattern').groups.iteritems():
    supplier_errs[supplier] = tmp.err2[indices].values

tmp2 = pd.DataFrame(tmp.bracketing_pattern.value_counts(), columns=['test_count'])
tmp2['train_count'] = aug_train_set.bracketing_pattern.value_counts()
tmp2['avg_err'] = [np.mean(supplier_errs[s]) for s in tmp2.index]
tmp2['std_err'] = [np.std(supplier_errs[s]) for s in tmp2.index]

tmp2


Out[189]:
test_count train_count avg_err std_err
(1, 2, 5, 10, 25, 50, 100, 250) 1648 17640 0.022776 0.174391
() 493 4742 0.228562 0.511073
(1, 6, 20) 210 2022 0.019721 0.079115
(1, 2, 3, 5, 10, 20) 54 516 0.029540 0.046008
(1, 2, 5, 10, 25, 50) 30 186 0.006778 0.017881
(1, 3, 5, 7, 9) 30 175 0.099165 0.152343
(5, 19, 20) 30 330 0.000445 0.001012
(1, 2, 3, 4) 20 120 0.104339 0.173503
(5, 10, 15, 20) 20 136 0.072327 0.101721
(1, 3, 5, 10, 20, 30) 18 60 0.256981 0.387375
(8, 16, 24, 32) 16 28 0.013001 0.016008
(1, 2, 5, 10, 25, 50, 100) 14 497 0.000206 0.000215
(4, 10) 10 36 0.017218 0.014437
(1, 5) 10 38 0.085447 0.090503
(2, 5, 10, 15, 20) 10 10 0.553939 0.400946
(1, 2, 3, 5, 10) 10 100 0.012244 0.014006
(10, 15, 25) 9 63 0.016103 0.022121
(10, 20, 30, 40) 8 44 0.048567 0.052577
(4, 8, 12, 16) 8 84 0.021947 0.020074
(15, 30, 50, 60, 100, 200, 300, 400) 8 8 0.157400 0.127607
(4, 6, 8, 12) 8 12 0.006613 0.007463
(2, 4, 10, 20, 30, 40, 60, 120) 8 8 0.020654 0.014236
(2, 3, 4, 5) 8 108 0.045536 0.029376
(1, 2) 8 32 0.124233 0.199902
(2, 4, 6, 8) 8 140 0.087071 0.065799
(1, 2, 3, 5, 10, 20, 50) 7 63 1.903072 0.671554
(2, 3, 5, 10, 20, 50, 100) 7 14 0.083622 0.069043
(5, 20) 6 40 0.425893 0.884674
(1, 3, 5, 10, 15, 20) 6 6 0.236029 0.314758
(1, 3, 5) 6 39 0.775450 0.814073
... ... ... ... ...
(24, 36, 48) 3 6 0.452870 0.098456
(1, 20, 50) 3 51 0.062258 0.045777
(5, 8, 25) 3 3 0.111653 0.064003
(2, 5, 10) 3 3 0.733939 0.356436
(10, 15, 20) 3 27 0.006270 0.008671
(150, 200, 250) 3 6 0.018716 0.010475
(10, 25, 50) 3 6 0.092934 0.128661
(12, 34) 2 2 0.014541 0.002379
(4, 20) 2 8 0.201888 0.183252
(50, 50) 2 14 0.055851 0.019788
(5, 5) 2 2 0.155732 0.022023
(2, 10) 2 4 0.062378 0.035518
(50, 100) 2 2 0.063215 0.040215
(1, 46) 2 2 0.140241 0.020219
(1, 236) 2 2 0.130659 0.071168
(10, 100) 2 2 0.000176 0.000175
(1, 7) 2 8 0.010982 0.001652
(1, 18) 2 4 0.202517 0.091270
(2, 12) 2 2 0.020890 0.012808
(75, 100) 2 2 0.059882 0.046640
(1, 33) 2 4 0.095490 0.050908
(1, 16) 2 2 0.137253 0.012813
(1, 70) 2 4 0.076254 0.076161
(25, 205) 2 2 0.127125 0.057401
(5, 10) 2 30 0.020345 0.003985
(2, 50) 2 2 0.545023 0.478044
(2, 20) 2 4 0.018351 0.006631
(1, 47) 2 4 0.058113 0.009561
(1, 30) 2 18 0.251660 0.156888
(1, 6) 2 20 0.072902 0.025474

94 rows × 4 columns


In [195]:
xs = np.arange(len(tmp2))
ys = tmp2.avg_err.values
ybars = tmp2.std_err.values
plt.errorbar(xs, ys, yerr=ybars)
plt.ylim(-1, 1)
plt.xlim(-1, 10)


Out[195]:
(-1, 10)

In [198]:
brapa = (1, 2, 5, 10, 25, 50, 100, 250)
hist(supplier_errs[brapa], bins=100, log=True);



In [ ]:


In [181]:
X_actual_test_feats = featurizer.transform(aug_test_set)

In [226]:
col_name = 'component_types CP-015'
print "train:"
print X_train_feats[col_name].value_counts()
print X_train_feats[col_name].value_counts(normalize=True)
print
print "test:"
print X_test_feats[col_name].value_counts()
print X_test_feats[col_name].value_counts(normalize=True)
print
print "actual test:"
print X_actual_test_feats[col_name].value_counts()
print X_actual_test_feats[col_name].value_counts(normalize=True)


train:
0    26355
1      856
2       59
dtype: int64
0    0.966447
1    0.031390
2    0.002164
dtype: float64

test:
0    2884
1      59
dtype: int64
0    0.979952
1    0.020048
dtype: float64

actual test:
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-226-d1347d4a8df4> in <module>()
      9 print
     10 print "actual test:"
---> 11 print X_actual_test_feats[col_name].value_counts()
     12 print X_actual_test_feats[col_name].value_counts(normalize=True)

/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1795             return self._getitem_multilevel(key)
   1796         else:
-> 1797             return self._getitem_column(key)
   1798 
   1799     def _getitem_column(self, key):

/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_column(self, key)
   1802         # get column
   1803         if self.columns.is_unique:
-> 1804             return self._get_item_cache(key)
   1805 
   1806         # duplicate columns & possible reduce dimensionaility

/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/generic.pyc in _get_item_cache(self, item)
   1082         res = cache.get(item)
   1083         if res is None:
-> 1084             values = self._data.get(item)
   1085             res = self._box_item_values(item, values)
   1086             cache[item] = res

/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/internals.pyc in get(self, item, fastpath)
   2849 
   2850             if not isnull(item):
-> 2851                 loc = self.items.get_loc(item)
   2852             else:
   2853                 indexer = np.arange(len(self.items))[isnull(self.items)]

/home/cberzan/src/virtualenvs/kaggle/lib/python2.7/site-packages/pandas/core/index.pyc in get_loc(self, key, method)
   1570         """
   1571         if method is None:
-> 1572             return self._engine.get_loc(_values_from_object(key))
   1573 
   1574         indexer = self.get_indexer([key], method=method)

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3824)()

pandas/index.pyx in pandas.index.IndexEngine.get_loc (pandas/index.c:3704)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12280)()

pandas/hashtable.pyx in pandas.hashtable.PyObjectHashTable.get_item (pandas/hashtable.c:12231)()

KeyError: 'component_types CP-015'

In [ ]: