In [1]:
%pylab inline

from collections import Counter
from collections import defaultdict
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
import pandas as pd

from soln.dataset import AllCategoricalsFeaturizer
from soln.dataset import generate_xv_splits
from soln.dataset import get_augmented_train_and_test_set
from soln.dataset import inverse_log_transform_y
from soln.dataset import log_transform_y
from soln.bracket import brapa
from soln.bracket import fc_vals
from soln.bracket import generate_bracket_csv
from soln.utils import eval_regressor
from soln.utils import print_brackets
from soln.utils import print_feature_importances


Populating the interactive namespace from numpy and matplotlib

In [3]:
%time aug_train_set, aug_test_set = get_augmented_train_and_test_set()


CPU times: user 6.48 s, sys: 100 ms, total: 6.58 s
Wall time: 6.66 s

In [4]:
%time generate_bracket_csv(aug_train_set)


CPU times: user 6.43 s, sys: 16 ms, total: 6.44 s
Wall time: 6.47 s

In [6]:
bracket = pd.read_csv('bracket.csv')
print bracket.shape
bracket[:5]


(2205, 4)
Out[6]:
tube_assembly_id fixed_cost_class fixed_cost var_cost
0 TA-18908 1 19.043385 2.764790
1 TA-18906 1 19.043385 2.868769
2 TA-18907 1 19.043385 3.430745
3 TA-18902 1 19.043385 1.759423
4 TA-18903 1 19.043385 1.825497

In [60]:
# Check that if we know the true fixed_cost and var_cost,
# we can recover the total cost with tiny error.
print aug_train_set.shape
df = aug_train_set[aug_train_set.bracketing_pattern == brapa]
print df.bracketing_pattern.value_counts()
print df.shape
df = df.merge(bracket, on='tube_assembly_id')
print df.shape
df['pred_cost'] = df['fixed_cost'] / df['adj_quantity'] + df['var_cost']
df['pred_log_cost'] = log_transform_y(df['pred_cost'])
print np.sqrt(mean_squared_error(df.log_cost, df.pred_log_cost))
df['err2'] = (df.log_cost - df.pred_log_cost) ** 2
print df.err2.describe()
df.sort('err2', ascending=False, inplace=True)
df[:10]


(30213, 30)
(1, 2, 5, 10, 25, 50, 100, 250)    17640
dtype: int64
(17640, 30)
(17640, 33)
0.0103660178673
count    1.764000e+04
mean     1.074543e-04
std      1.710715e-04
min      4.519863e-09
25%      1.856582e-06
50%      3.797760e-05
75%      1.439752e-04
max      1.383129e-03
Name: err2, dtype: float64
Out[60]:
tube_assembly_id supplier quote_date annual_usage min_order_quantity bracket_pricing quantity log_cost material_id diameter ... adj_quantity adj_bracketing bracketing_pattern dev_fold fixed_cost_class fixed_cost var_cost pred_cost pred_log_cost err2
16167 TA-20557 S-0066 2013-12-02 1 0 True 250 0.864718 SP-0029 19.05 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 8 0 15.704975 1.224836 1.287656 0.827528 0.001383
16351 TA-20621 S-0066 2013-12-02 1 0 True 250 0.849369 SP-0029 19.05 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 4 0 15.704975 1.190408 1.253228 0.812364 0.001369
759 TA-00688 S-0066 2013-07-28 0 0 True 250 0.898134 SP-0028 6.35 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 8 1 19.043385 1.291690 1.367863 0.861988 0.001307
15319 TA-20246 S-0066 2013-08-01 0 0 True 250 0.884442 SP-0039 9.52 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 6 1 19.043385 1.260739 1.336913 0.848831 0.001268
13071 TA-18947 S-0066 2013-04-14 0 0 True 250 0.917215 SP-0039 12.70 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 4 1 19.043385 1.338985 1.415158 0.881765 0.001257
17343 TA-20992 S-0066 2013-09-01 0 0 True 250 0.921652 SP-0019 4.76 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 6 1 19.043385 1.350113 1.426286 0.886362 0.001245
17607 TA-21085 S-0066 2013-08-04 0 0 True 250 0.901528 SP-0029 6.35 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 0 1 19.043385 1.301775 1.377948 0.866238 0.001245
16175 TA-20558 S-0066 2013-12-02 1 0 True 250 0.903784 SP-0029 19.05 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 2 0 15.704975 1.321165 1.383985 0.868774 0.001226
9407 TA-08661 S-0066 2013-12-02 1 0 True 250 0.906035 SP-0029 22.22 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 7 0 15.704975 1.327077 1.389897 0.871250 0.001210
17567 TA-21056 S-0066 2013-06-30 0 0 True 250 0.935939 SP-0019 6.35 ... 250 True (1, 2, 5, 10, 25, 50, 100, 250) 7 1 19.043385 1.386280 1.462453 0.901158 0.001210

10 rows × 36 columns


In [39]:
# Get train and test set only for the well-behaved bracket.

X_train, y_train, X_test, y_test = next(generate_xv_splits(aug_train_set))
X_train['log_cost'] = y_train
X_test['log_cost'] = y_test
print X_train.shape, X_test.shape

X_train = X_train[X_train.bracketing_pattern == brapa]
X_test = X_test[X_test.bracketing_pattern == brapa]
print X_train.shape, X_test.shape

y_train = X_train.pop('log_cost')
y_test = X_test.pop('log_cost')

print X_train.bracketing_pattern.value_counts()
print X_test.bracketing_pattern.value_counts()
print X_train.supplier.value_counts()
print X_test.supplier.value_counts()


(27270, 29) (2943, 29)
(15992, 29) (1648, 29)
(1, 2, 5, 10, 25, 50, 100, 250)    15992
dtype: int64
(1, 2, 5, 10, 25, 50, 100, 250)    1648
dtype: int64
S-0066    15992
dtype: int64
S-0066    1648
dtype: int64

In [40]:
# Evaluate original RF on the well-behaved bracket.

featurizer = AllCategoricalsFeaturizer()
%time featurizer.fit(X_train)
%time X_train_feats = featurizer.transform(X_train)
%time X_test_feats = featurizer.transform(X_test)
X_train_feats.info(verbose=True)

%time X_train_np = X_train_feats.astype(np.float).values
y_train_np = y_train.values
%time X_test_np = X_test_feats.astype(np.float).values
y_test_np = y_test.values

reg = RandomForestRegressor(n_estimators=20)
%time train_rmsle, test_rmsle = eval_regressor(reg, X_train_np, y_train_np, X_test_np, y_test_np)
print "{}:".format(reg)
print "    train RMSLE {}".format(train_rmsle)
print "    test RMSLE {}".format(test_rmsle)
print


CPU times: user 256 ms, sys: 0 ns, total: 256 ms
Wall time: 268 ms
CPU times: user 316 ms, sys: 0 ns, total: 316 ms
Wall time: 316 ms
CPU times: user 52 ms, sys: 0 ns, total: 52 ms
Wall time: 50.7 ms
<class 'pandas.core.frame.DataFrame'>
Int64Index: 15992 entries, 0 to 27223
Data columns (total 181 columns):
annual_usage                                          int64
min_order_quantity                                    int64
bracket_pricing                                       bool
quantity                                              int64
diameter                                              float64
wall_thickness                                        float64
length                                                float64
num_bends                                             int64
bend_radius                                           float64
end_a_1x                                              bool
end_a_2x                                              bool
end_x_1x                                              bool
end_x_2x                                              bool
num_boss                                              int64
num_bracket                                           int64
num_other                                             int64
quote_age                                             float64
adj_quantity                                          int64
adj_bracketing                                        bool
supplier other                                        float64
supplier S-0066                                       float64
material_id other                                     float64
material_id nan                                       float64
material_id SP-0035                                   float64
material_id SP-0036                                   float64
material_id SP-0019                                   float64
material_id SP-0039                                   float64
material_id SP-0029                                   float64
material_id SP-0028                                   float64
end_a other                                           float64
end_a EF-013                                          float64
end_a NONE                                            float64
end_a EF-017                                          float64
end_a EF-003                                          float64
end_a EF-018                                          float64
end_a EF-008                                          float64
end_a EF-009                                          float64
end_a EF-023                                          float64
end_x other                                           float64
end_x EF-013                                          float64
end_x NONE                                            float64
end_x EF-017                                          float64
end_x EF-003                                          float64
end_x EF-018                                          float64
end_x EF-008                                          float64
end_x EF-006                                          float64
end_x EF-023                                          float64
specs other                                           float64
specs SP-0088                                         float64
specs SP-0067                                         float64
specs SP-0080                                         float64
specs SP-0082                                         float64
specs SP-0025                                         float64
specs SP-0026                                         float64
specs SP-0069                                         float64
specs SP-0024                                         float64
specs SP-0029                                         float64
specs SP-0007                                         float64
specs SP-0068                                         float64
specs SP-0063                                         float64
specs SP-0012                                         float64
specs SP-0076                                         float64
specs SP-0070                                         float64
components other                                      float64
components C-1625                                     float64
components C-1624                                     float64
components C-1627                                     float64
components C-1621                                     float64
components C-1620                                     float64
components C-1623                                     float64
components C-1622                                     float64
components C-1743                                     float64
components C-1629                                     float64
components C-1628                                     float64
components C-2030                                     float64
components C-2032                                     float64
components C-1230                                     float64
components C-1435                                     float64
components C-0211                                     float64
components C-1758                                     float64
components C-1869                                     float64
components C-2008                                     float64
components C-2004                                     float64
components C-2005                                     float64
components C-2006                                     float64
components C-2001                                     float64
components C-2002                                     float64
components C-2003                                     float64
components C-1421                                     float64
components C-1420                                     float64
components C-1229                                     float64
components C-1428                                     float64
components C-0616                                     float64
components C-1502                                     float64
components C-1349                                     float64
components C-1344                                     float64
components C-1345                                     float64
components C-0051                                     float64
components C-1343                                     float64
components C-1639                                     float64
components C-0826                                     float64
components C-0449                                     float64
components C-0823                                     float64
components C-1218                                     float64
components C-1630                                     float64
components C-0102                                     float64
components C-0401                                     float64
components C-1808                                     float64
components C-0409                                     float64
components C-0001                                     float64
components C-1638                                     float64
components C-1536                                     float64
components C-1889                                     float64
components C-1445                                     float64
components C-1200                                     float64
components C-1885                                     float64
components C-1206                                     float64
components C-1205                                     float64
components C-1761                                     float64
components C-1848                                     float64
components C-1547                                     float64
components C-1661                                     float64
components C-1660                                     float64
components C-1936                                     float64
components C-0002                                     float64
components C-2029                                     float64
components C-0007                                     float64
components C-0004                                     float64
components C-1727                                     float64
components C-1715                                     float64
components C-1714                                     float64
components C-1716                                     float64
components C-1711                                     float64
components C-1821                                     float64
components C-1718                                     float64
components C-1908                                     float64
components C-1909                                     float64
components C-1970                                     float64
components C-0388                                     float64
components C-1907                                     float64
components C-0494                                     float64
components C-1910                                     float64
components C-1728                                     float64
components C-0434                                     float64
components C-0539                                     float64
components C-1313                                     float64
components C-1312                                     float64
components C-1998                                     float64
components C-1995                                     float64
components C-1654                                     float64
components C-1655                                     float64
components C-1651                                     float64
components C-1652                                     float64
components C-1658                                     float64
components C-0703                                     float64
components C-1637                                     float64
components C-0448                                     float64
components C-1635                                     float64
components C-1632                                     float64
components C-1645                                     float64
components C-1631                                     float64
components C-0444                                     float64
components C-0445                                     float64
components C-2026                                     float64
components C-2027                                     float64
components C-1845                                     float64
components C-2028                                     float64
components C-1846                                     float64
components C-1841                                     float64
components C-1244                                     float64
components C-1243                                     float64
components C-1642                                     float64
components C-1641                                     float64
components C-1640                                     float64
components C-1647                                     float64
components C-1646                                     float64
components C-1644                                     float64
components C-1649                                     float64
components C-1648                                     float64
bracketing_pattern other                              float64
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250)    float64
dtypes: bool(6), float64(167), int64(8)
memory usage: 21.6 MB
CPU times: user 32 ms, sys: 4 ms, total: 36 ms
Wall time: 42.3 ms
CPU times: user 8 ms, sys: 0 ns, total: 8 ms
Wall time: 9.77 ms
CPU times: user 12.9 s, sys: 12 ms, total: 12.9 s
Wall time: 13.1 s
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=20, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False):
    train RMSLE 0.032424983953
    test RMSLE 0.1371261352


In [81]:
print bracket.var_cost.describe()
np.log(bracket.var_cost + 1).hist(bins=100)


count    2205.000000
mean        3.945455
std         3.970139
min         1.190408
25%         2.418076
50%         3.125414
75%         4.701353
max       121.339874
Name: var_cost, dtype: float64
Out[81]:
<matplotlib.axes._subplots.AxesSubplot at 0x8f740d0>

In [90]:
# Get training set only for well-behaved bracket, and only for adj_quantity=250.

X_train, y_train, X_test, y_test = next(generate_xv_splits(aug_train_set))
X_train['log_cost'] = y_train
X_test['log_cost'] = y_test
print X_train.shape, X_test.shape

X_train = X_train[(X_train.bracketing_pattern == brapa) & (X_train.adj_quantity == 250)]
X_test = X_test[(X_test.bracketing_pattern == brapa) & (X_test.adj_quantity == 250)]
print X_train.shape, X_test.shape

y_train = X_train.pop('log_cost')
y_test = X_test.pop('log_cost')

print X_train.adj_quantity.value_counts()
print X_test.adj_quantity.value_counts()
print X_train.bracketing_pattern.value_counts()
print X_test.bracketing_pattern.value_counts()
print X_train.supplier.value_counts()
print X_test.supplier.value_counts()


(27270, 29) (2943, 29)
(1999, 29) (206, 29)
250    1999
dtype: int64
250    206
dtype: int64
(1, 2, 5, 10, 25, 50, 100, 250)    1999
dtype: int64
(1, 2, 5, 10, 25, 50, 100, 250)    206
dtype: int64
S-0066    1999
dtype: int64
S-0066    206
dtype: int64

In [94]:
# Evaluate RF only on examples with adj_quantity=250.

featurizer = AllCategoricalsFeaturizer()
%time featurizer.fit(X_train)
%time X_train_feats = featurizer.transform(X_train)
%time X_test_feats = featurizer.transform(X_test)
X_train_feats.info(verbose=True)

%time X_train_np = X_train_feats.astype(np.float).values
y_train_np = y_train.values
%time X_test_np = X_test_feats.astype(np.float).values
y_test_np = y_test.values

reg = RandomForestRegressor(n_estimators=20)
%time train_rmsle, test_rmsle = eval_regressor(reg, X_train_np, y_train_np, X_test_np, y_test_np)
print "{}:".format(reg)
print "    train RMSLE {}".format(train_rmsle)
print "    test RMSLE {}".format(test_rmsle)
print


CPU times: user 32 ms, sys: 4 ms, total: 36 ms
Wall time: 37.6 ms
CPU times: user 56 ms, sys: 0 ns, total: 56 ms
Wall time: 55.4 ms
CPU times: user 24 ms, sys: 0 ns, total: 24 ms
Wall time: 22.3 ms
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1999 entries, 7 to 27223
Data columns (total 101 columns):
annual_usage                                          int64
min_order_quantity                                    int64
bracket_pricing                                       bool
quantity                                              int64
diameter                                              float64
wall_thickness                                        float64
length                                                float64
num_bends                                             int64
bend_radius                                           float64
end_a_1x                                              bool
end_a_2x                                              bool
end_x_1x                                              bool
end_x_2x                                              bool
num_boss                                              int64
num_bracket                                           int64
num_other                                             int64
quote_age                                             float64
adj_quantity                                          int64
adj_bracketing                                        bool
supplier other                                        float64
supplier S-0066                                       float64
material_id other                                     float64
material_id SP-0035                                   float64
material_id SP-0019                                   float64
material_id SP-0039                                   float64
material_id SP-0029                                   float64
material_id SP-0028                                   float64
end_a other                                           float64
end_a NONE                                            float64
end_a EF-017                                          float64
end_a EF-003                                          float64
end_a EF-018                                          float64
end_a EF-008                                          float64
end_a EF-023                                          float64
end_x other                                           float64
end_x NONE                                            float64
end_x EF-017                                          float64
end_x EF-003                                          float64
end_x EF-018                                          float64
end_x EF-008                                          float64
end_x EF-023                                          float64
specs other                                           float64
specs SP-0080                                         float64
specs SP-0082                                         float64
specs SP-0026                                         float64
specs SP-0024                                         float64
specs SP-0007                                         float64
specs SP-0063                                         float64
specs SP-0012                                         float64
components other                                      float64
components C-1625                                     float64
components C-1624                                     float64
components C-1627                                     float64
components C-1621                                     float64
components C-1620                                     float64
components C-1623                                     float64
components C-1622                                     float64
components C-1629                                     float64
components C-1628                                     float64
components C-1435                                     float64
components C-2005                                     float64
components C-1421                                     float64
components C-1420                                     float64
components C-1229                                     float64
components C-1639                                     float64
components C-0449                                     float64
components C-0823                                     float64
components C-1630                                     float64
components C-0001                                     float64
components C-1638                                     float64
components C-1889                                     float64
components C-1848                                     float64
components C-1547                                     float64
components C-1660                                     float64
components C-0002                                     float64
components C-0004                                     float64
components C-1313                                     float64
components C-1312                                     float64
components C-1654                                     float64
components C-1652                                     float64
components C-1637                                     float64
components C-0448                                     float64
components C-1635                                     float64
components C-1632                                     float64
components C-1645                                     float64
components C-1631                                     float64
components C-0444                                     float64
components C-0445                                     float64
components C-1845                                     float64
components C-1846                                     float64
components C-1244                                     float64
components C-1243                                     float64
components C-1642                                     float64
components C-1641                                     float64
components C-1640                                     float64
components C-1647                                     float64
components C-1646                                     float64
components C-1649                                     float64
components C-1648                                     float64
bracketing_pattern other                              float64
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250)    float64
dtypes: bool(6), float64(87), int64(8)
memory usage: 1.5 MB
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 3.05 ms
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 1.94 ms
CPU times: user 720 ms, sys: 0 ns, total: 720 ms
Wall time: 725 ms
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=20, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False):
    train RMSLE 0.0733755436104
    test RMSLE 0.148445379091


In [68]:
# Extract pred_var_cost from y_test_pred, which is pred_log_cost for qty=1.

y_test_pred = reg.predict(X_test_np)
print np.sqrt(mean_squared_error(y_test_np, y_test_pred))
df = pd.DataFrame()
df['tube_assembly_id'] = X_test.tube_assembly_id
df['pred_cost_for_qty_1'] = inverse_log_transform_y(y_test_pred)
print df.shape
df = df.merge(bracket)
df['pred_var_cost'] = df.pred_cost_for_qty_1 - df.fixed_cost
print df.shape
df[:5]


0.0676371395344
(206, 2)
(206, 6)
Out[68]:
tube_assembly_id pred_cost_for_qty_1 fixed_cost_class fixed_cost var_cost pred_var_cost
0 TA-00093 21.573154 1 19.043385 2.528315 2.529769
1 TA-00125 28.501491 3 23.633726 4.912866 4.867765
2 TA-00173 27.700158 2 20.295284 4.370288 7.404874
3 TA-00264 21.422820 1 19.043385 2.396515 2.379435
4 TA-00334 22.291537 1 19.043385 2.396515 3.248152

In [112]:
# Try to predict log(var_cost) directly.

X_train, y_train, X_test, y_test = next(generate_xv_splits(aug_train_set))
X_train['log_cost'] = y_train
X_test['log_cost'] = y_test
print X_train.shape, X_test.shape

X_train = X_train[(X_train.bracketing_pattern == brapa) & (X_train.adj_quantity == 1)]
X_test = X_test[(X_test.bracketing_pattern == brapa) & (X_test.adj_quantity == 1)]
log_cost_train = X_train.pop('log_cost')
log_cost_test = X_test.pop('log_cost')
print X_train.shape, X_test.shape

X_train = X_train.merge(bracket, on='tube_assembly_id')
X_test = X_test.merge(bracket, on='tube_assembly_id')
y_train = log_transform_y(X_train.pop('var_cost'))
y_test = log_transform_y(X_test.pop('var_cost'))
print X_train.shape, X_test.shape, y_train.shape, y_test.shape

featurizer = AllCategoricalsFeaturizer()
%time featurizer.fit(X_train)
%time X_train_feats = featurizer.transform(X_train)
%time X_test_feats = featurizer.transform(X_test)
X_train_feats.info(verbose=True)

%time X_train_np = X_train_feats.astype(np.float).values
y_train_np = y_train.values
%time X_test_np = X_test_feats.astype(np.float).values
y_test_np = y_test.values

reg = RandomForestRegressor(n_estimators=20)
%time train_rmsle, test_rmsle = eval_regressor(reg, X_train_np, y_train_np, X_test_np, y_test_np)
print "{}:".format(reg)
print "    train RMSLE_vc {}".format(train_rmsle)
print "    test RMSLE_vc {}".format(test_rmsle)
print


(27270, 29) (2943, 29)
(1999, 28) (206, 28)
(1999, 30) (206, 30) (1999,) (206,)
CPU times: user 36 ms, sys: 0 ns, total: 36 ms
Wall time: 37.1 ms
CPU times: user 60 ms, sys: 0 ns, total: 60 ms
Wall time: 65 ms
CPU times: user 24 ms, sys: 0 ns, total: 24 ms
Wall time: 24.9 ms
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1999 entries, 0 to 1998
Data columns (total 103 columns):
annual_usage                                          int64
min_order_quantity                                    int64
bracket_pricing                                       bool
quantity                                              int64
diameter                                              float64
wall_thickness                                        float64
length                                                float64
num_bends                                             int64
bend_radius                                           float64
end_a_1x                                              bool
end_a_2x                                              bool
end_x_1x                                              bool
end_x_2x                                              bool
num_boss                                              int64
num_bracket                                           int64
num_other                                             int64
quote_age                                             float64
adj_quantity                                          int64
adj_bracketing                                        bool
fixed_cost_class                                      int64
fixed_cost                                            float64
supplier other                                        float64
supplier S-0066                                       float64
material_id other                                     float64
material_id SP-0035                                   float64
material_id SP-0019                                   float64
material_id SP-0039                                   float64
material_id SP-0029                                   float64
material_id SP-0028                                   float64
end_a other                                           float64
end_a NONE                                            float64
end_a EF-017                                          float64
end_a EF-003                                          float64
end_a EF-018                                          float64
end_a EF-008                                          float64
end_a EF-023                                          float64
end_x other                                           float64
end_x NONE                                            float64
end_x EF-017                                          float64
end_x EF-003                                          float64
end_x EF-018                                          float64
end_x EF-008                                          float64
end_x EF-023                                          float64
specs other                                           float64
specs SP-0080                                         float64
specs SP-0082                                         float64
specs SP-0026                                         float64
specs SP-0024                                         float64
specs SP-0007                                         float64
specs SP-0063                                         float64
specs SP-0012                                         float64
components other                                      float64
components C-1625                                     float64
components C-1624                                     float64
components C-1627                                     float64
components C-1621                                     float64
components C-1620                                     float64
components C-1623                                     float64
components C-1622                                     float64
components C-1629                                     float64
components C-1628                                     float64
components C-1435                                     float64
components C-2005                                     float64
components C-1421                                     float64
components C-1420                                     float64
components C-1229                                     float64
components C-1639                                     float64
components C-0449                                     float64
components C-0823                                     float64
components C-1630                                     float64
components C-0001                                     float64
components C-1638                                     float64
components C-1889                                     float64
components C-1848                                     float64
components C-1547                                     float64
components C-1660                                     float64
components C-0002                                     float64
components C-0004                                     float64
components C-1313                                     float64
components C-1312                                     float64
components C-1654                                     float64
components C-1652                                     float64
components C-1637                                     float64
components C-0448                                     float64
components C-1635                                     float64
components C-1632                                     float64
components C-1645                                     float64
components C-1631                                     float64
components C-0444                                     float64
components C-0445                                     float64
components C-1845                                     float64
components C-1846                                     float64
components C-1244                                     float64
components C-1243                                     float64
components C-1642                                     float64
components C-1641                                     float64
components C-1640                                     float64
components C-1647                                     float64
components C-1646                                     float64
components C-1649                                     float64
components C-1648                                     float64
bracketing_pattern other                              float64
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250)    float64
dtypes: bool(6), float64(88), int64(9)
memory usage: 1.5 MB
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 3.55 ms
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 1.82 ms
CPU times: user 680 ms, sys: 0 ns, total: 680 ms
Wall time: 686 ms
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=20, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False):
    train RMSLE_vc 0.0576763792055
    test RMSLE_vc 0.127771671493


In [116]:
# Extract pred_var_cost for each taid in X_test.

y_test_pred = reg.predict(X_test_np)
print np.sqrt(mean_squared_error(y_test_np, y_test_pred))
df = pd.DataFrame()
df['tube_assembly_id'] = X_test.tube_assembly_id
df = df.merge(bracket, on='tube_assembly_id')
df['pred_var_cost'] = inverse_log_transform_y(y_test_pred)
print df.shape
df[:5]


0.127771671493
(206, 5)
Out[116]:
tube_assembly_id fixed_cost_class fixed_cost var_cost pred_var_cost
0 TA-00093 1 19.043385 2.528315 2.492828
1 TA-00125 3 23.633726 4.912866 4.783798
2 TA-00173 2 20.295284 4.370288 4.378908
3 TA-00264 1 19.043385 2.396515 2.358751
4 TA-00334 1 19.043385 2.396515 2.393478

In [118]:
# Extrapolate predicted cost for other quantities, assuming true fixed_cost observed.

_, _, X_test_full, y_test_full = next(generate_xv_splits(aug_train_set))
X_test_full['log_cost'] = y_test_full
print X_test_full.shape

X_test_full = X_test_full[X_test_full.bracketing_pattern == brapa]
print X_test_full.shape

X_test_full = X_test_full.merge(df, on='tube_assembly_id')
print X_test_full.shape

X_test_full['pred_cost'] = X_test_full.fixed_cost / X_test_full.adj_quantity + X_test_full.pred_var_cost
X_test_full['pred_log_cost'] = log_transform_y(X_test_full.pred_cost)
X_test_full['err2'] = (X_test_full.log_cost.values - X_test_full.pred_log_cost.values) ** 2
X_test_full.sort('err2', ascending=False, inplace=True)
print X_test_full.shape

print X_test_full.err2.describe()
print np.sqrt(mean_squared_error(X_test_full.log_cost.values, X_test_full.pred_log_cost.values))

X_test_full[:10]


(2943, 29)
(1648, 29)
(1648, 33)
(1648, 36)
count    1.648000e+03
mean     1.062761e-02
std      7.361506e-02
min      9.391643e-11
25%      1.747167e-05
50%      1.855751e-04
75%      1.118387e-03
max      1.266037e+00
Name: err2, dtype: float64
0.10309032305
Out[118]:
tube_assembly_id supplier quote_date annual_usage min_order_quantity bracket_pricing quantity material_id diameter wall_thickness ... adj_bracketing bracketing_pattern log_cost fixed_cost_class fixed_cost var_cost pred_var_cost pred_cost pred_log_cost err2
375 TA-03006 S-0066 2013-09-01 0 0 True 250 SP-0029 19.05 1.65 ... True (1, 2, 5, 10, 25, 50, 100, 250) 3.085035 2 20.295284 20.70167 6.017092 6.098273 1.959852 1.266037
374 TA-03006 S-0066 2013-09-01 0 0 True 100 SP-0029 19.05 1.65 ... True (1, 2, 5, 10, 25, 50, 100, 250) 3.089224 2 20.295284 20.70167 6.017092 6.220045 1.976861 1.237351
373 TA-03006 S-0066 2013-09-01 0 0 True 50 SP-0029 19.05 1.65 ... True (1, 2, 5, 10, 25, 50, 100, 250) 3.096294 2 20.295284 20.70167 6.017092 6.422998 2.004583 1.191832
372 TA-03006 S-0066 2013-09-01 0 0 True 25 SP-0029 19.05 1.65 ... True (1, 2, 5, 10, 25, 50, 100, 250) 3.111524 2 20.295284 20.70167 6.017092 6.828904 2.057822 1.110287
371 TA-03006 S-0066 2013-09-01 0 0 True 10 SP-0029 19.05 1.65 ... True (1, 2, 5, 10, 25, 50, 100, 250) 3.164603 2 20.295284 20.70167 6.017092 8.046621 2.202391 0.925852
370 TA-03006 S-0066 2013-09-01 0 0 True 5 SP-0029 19.05 1.65 ... True (1, 2, 5, 10, 25, 50, 100, 250) 3.247106 2 20.295284 20.70167 6.017092 10.076149 2.404794 0.709490
1567 TA-20766 S-0066 2013-11-02 1 0 True 250 SP-0029 12.70 0.89 ... True (1, 2, 5, 10, 25, 50, 100, 250) 2.979539 3 23.633726 18.49766 8.426227 8.520762 2.253475 0.527169
1566 TA-20766 S-0066 2013-11-02 1 0 True 100 SP-0029 12.70 0.89 ... True (1, 2, 5, 10, 25, 50, 100, 250) 2.985178 3 23.633726 18.49766 8.426227 8.662564 2.268259 0.513973
1565 TA-20766 S-0066 2013-11-02 1 0 True 50 SP-0029 12.70 0.89 ... True (1, 2, 5, 10, 25, 50, 100, 250) 2.994553 3 23.633726 18.49766 8.426227 8.898901 2.292424 0.492985
1564 TA-20766 S-0066 2013-11-02 1 0 True 25 SP-0029 12.70 0.89 ... True (1, 2, 5, 10, 25, 50, 100, 250) 3.014818 3 23.633726 18.49766 8.426227 9.371576 2.339069 0.456637

10 rows × 36 columns


In [126]:
# Try to predict fixed_cost_class and log(var_cost) independently, then combine the two.

X_train, y_train, X_test, y_test = next(generate_xv_splits(aug_train_set))
X_train['log_cost'] = y_train
X_test['log_cost'] = y_test
print X_train.shape, X_test.shape

X_train = X_train[(X_train.bracketing_pattern == brapa) & (X_train.adj_quantity == 1)]
X_test = X_test[(X_test.bracketing_pattern == brapa) & (X_test.adj_quantity == 1)]
log_cost_train = X_train.pop('log_cost')
log_cost_test = X_test.pop('log_cost')
print X_train.shape, X_test.shape

X_train = X_train.merge(bracket, on='tube_assembly_id')
X_test = X_test.merge(bracket, on='tube_assembly_id')
X_train.pop('fixed_cost')
X_test.pop('fixed_cost')
log_var_cost_train = log_transform_y(X_train.pop('var_cost'))
log_var_cost_test = log_transform_y(X_test.pop('var_cost'))
fcc_train = X_train.pop('fixed_cost_class')
fcc_test = X_test.pop('fixed_cost_class')
print X_train.shape, X_test.shape

featurizer = AllCategoricalsFeaturizer()
%time featurizer.fit(X_train)
%time X_train_feats = featurizer.transform(X_train)
%time X_test_feats = featurizer.transform(X_test)
X_train_feats.info(verbose=True)

%time X_train_np = X_train_feats.astype(np.float).values
%time X_test_np = X_test_feats.astype(np.float).values


(27270, 29) (2943, 29)
(1999, 28) (206, 28)
(1999, 28) (206, 28)
CPU times: user 36 ms, sys: 0 ns, total: 36 ms
Wall time: 35.6 ms
CPU times: user 60 ms, sys: 0 ns, total: 60 ms
Wall time: 91.5 ms
CPU times: user 24 ms, sys: 0 ns, total: 24 ms
Wall time: 30.2 ms
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1999 entries, 0 to 1998
Data columns (total 101 columns):
annual_usage                                          int64
min_order_quantity                                    int64
bracket_pricing                                       bool
quantity                                              int64
diameter                                              float64
wall_thickness                                        float64
length                                                float64
num_bends                                             int64
bend_radius                                           float64
end_a_1x                                              bool
end_a_2x                                              bool
end_x_1x                                              bool
end_x_2x                                              bool
num_boss                                              int64
num_bracket                                           int64
num_other                                             int64
quote_age                                             float64
adj_quantity                                          int64
adj_bracketing                                        bool
supplier other                                        float64
supplier S-0066                                       float64
material_id other                                     float64
material_id SP-0035                                   float64
material_id SP-0019                                   float64
material_id SP-0039                                   float64
material_id SP-0029                                   float64
material_id SP-0028                                   float64
end_a other                                           float64
end_a NONE                                            float64
end_a EF-017                                          float64
end_a EF-003                                          float64
end_a EF-018                                          float64
end_a EF-008                                          float64
end_a EF-023                                          float64
end_x other                                           float64
end_x NONE                                            float64
end_x EF-017                                          float64
end_x EF-003                                          float64
end_x EF-018                                          float64
end_x EF-008                                          float64
end_x EF-023                                          float64
specs other                                           float64
specs SP-0080                                         float64
specs SP-0082                                         float64
specs SP-0026                                         float64
specs SP-0024                                         float64
specs SP-0007                                         float64
specs SP-0063                                         float64
specs SP-0012                                         float64
components other                                      float64
components C-1625                                     float64
components C-1624                                     float64
components C-1627                                     float64
components C-1621                                     float64
components C-1620                                     float64
components C-1623                                     float64
components C-1622                                     float64
components C-1629                                     float64
components C-1628                                     float64
components C-1435                                     float64
components C-2005                                     float64
components C-1421                                     float64
components C-1420                                     float64
components C-1229                                     float64
components C-1639                                     float64
components C-0449                                     float64
components C-0823                                     float64
components C-1630                                     float64
components C-0001                                     float64
components C-1638                                     float64
components C-1889                                     float64
components C-1848                                     float64
components C-1547                                     float64
components C-1660                                     float64
components C-0002                                     float64
components C-0004                                     float64
components C-1313                                     float64
components C-1312                                     float64
components C-1654                                     float64
components C-1652                                     float64
components C-1637                                     float64
components C-0448                                     float64
components C-1635                                     float64
components C-1632                                     float64
components C-1645                                     float64
components C-1631                                     float64
components C-0444                                     float64
components C-0445                                     float64
components C-1845                                     float64
components C-1846                                     float64
components C-1244                                     float64
components C-1243                                     float64
components C-1642                                     float64
components C-1641                                     float64
components C-1640                                     float64
components C-1647                                     float64
components C-1646                                     float64
components C-1649                                     float64
components C-1648                                     float64
bracketing_pattern other                              float64
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250)    float64
dtypes: bool(6), float64(87), int64(8)
memory usage: 1.5 MB
CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 3.38 ms
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 1.66 ms

In [136]:
# The classification part: predict fixed_cost_class.

y_train = fcc_train
y_test = fcc_test
y_train_np = y_train.values
y_test_np = y_test.values

clf = RandomForestClassifier(n_estimators=100)
%time clf.fit(X_train_np, y_train_np)
y_train_pred = clf.predict(X_train_np)
print "on train:"
print clf.score(X_train_np, y_train_np)
print confusion_matrix(y_train_np, y_train_pred)

y_test_pred = clf.predict(X_test_np)
pred_fixed_cost_class_test = y_test_pred

print
print "on test:"
print clf.score(X_test_np, y_test_np)
print confusion_matrix(y_test_np, y_test_pred)

print
print "feature importances:"
print_feature_importances(X_train_feats, clf);


CPU times: user 484 ms, sys: 0 ns, total: 484 ms
Wall time: 516 ms
on train:
0.99899949975
[[  30    0    0    0]
 [   0 1179    0    0]
 [   0    0  227    1]
 [   0    1    0  561]]

on test:
0.917475728155
[[  2   1   0   0]
 [  0 136   0   4]
 [  0   2  22   5]
 [  0   2   3  29]]

feature importances:
length 0.0894820200844
wall_thickness 0.0697350846985
end_x EF-003 0.0500884798817
diameter 0.0464542880704
end_a EF-003 0.0437021136247
material_id SP-0029 0.0436367980116
quote_age 0.0428914689403
num_bends 0.0407594604288
components other 0.0405959715093
bend_radius 0.0363907329378
components C-1631 0.0272862255114
components C-1624 0.0256983858577
components C-1621 0.025370912373
end_x EF-008 0.022712159098
components C-1629 0.0226766530461
end_a EF-008 0.022641447701
components C-1632 0.0201046578161
end_a_2x 0.0196720573343
components C-1622 0.0194165958429
components C-1623 0.0184972111659
components C-1630 0.0178326423712
end_x_2x 0.0154716345103
components C-1638 0.0151851971509
components C-1628 0.014950420584
components C-1625 0.0141126205941
components C-1637 0.0132473789827
components C-1620 0.0108832927474
components C-1641 0.0101983146713
components C-1639 0.00995314836196
material_id SP-0035 0.00890401180135
end_a EF-018 0.00879484819154
components C-1627 0.00867073292584
end_x EF-018 0.00824143399708
components C-1647 0.00678800228522
components C-1640 0.00659397546969
end_x NONE 0.00608271095385
end_x EF-017 0.00561659846826
components C-1642 0.00545547879187
components C-1312 0.00525490158531
material_id SP-0039 0.00512160426996
annual_usage 0.00422368890381
material_id SP-0019 0.0042052012646
end_a NONE 0.0038290728192
end_a_1x 0.00314417741058
components C-1635 0.00306709242811
end_a EF-017 0.00288572382327
material_id SP-0028 0.00276506669105
components C-1646 0.0027200893958
num_boss 0.00268150940662
components C-0445 0.00259756291309
end_x EF-023 0.00250923052179
specs SP-0082 0.00244824154577
components C-1660 0.00243243857595
specs SP-0012 0.00230558309236
components C-0444 0.00229367021646
specs SP-0007 0.00208171576958
specs SP-0080 0.00197787317796
specs SP-0026 0.00191534877311
specs SP-0024 0.00179881575213
components C-1649 0.00161807442508
components C-1889 0.00154974391783
end_a EF-023 0.00146946775251
components C-1652 0.00139101038952
components C-0001 0.00124748957131
components C-1645 0.00119548097541
components C-2005 0.00118271367997
components C-1243 0.0011133063422
components C-1654 0.00110235435258
components C-1244 0.000884397632869
components C-1648 0.000876186862792
components C-0449 0.00083845276469
components C-0448 0.000833773810707
components C-1547 0.000807509175368
components C-1848 0.000789624249138
specs other 0.000750832052449
components C-1846 0.000660621901206
end_a other 0.00065958773985
end_x_1x 0.000612775563502
components C-1435 0.000575417588826
material_id other 0.000551153891352
num_other 0.00043711579742
components C-1421 0.00043525091493
end_x other 0.000415390535435
components C-1420 0.000380184328862
specs SP-0063 0.000165789248886
components C-1845 0.000139345239229
components C-1229 0.000128137786955
num_bracket 8.38552162966e-05
components C-0823 4.52746325461e-05
components C-0004 4.14140867515e-06
components C-0002 1.68765199829e-06
components C-1313 7.74707191897e-08
min_order_quantity 0.0
bracket_pricing 0.0
quantity 0.0
adj_quantity 0.0
adj_bracketing 0.0
supplier other 0.0
supplier S-0066 0.0
bracketing_pattern other 0.0
bracketing_pattern (1, 2, 5, 10, 25, 50, 100, 250) 0.0

In [139]:
# The regression part: predict var_cost.

y_train = log_var_cost_train
y_test = log_var_cost_test
y_train_np = y_train.values
y_test_np = y_test.values

reg = RandomForestRegressor(n_estimators=20)
%time train_rmsle, test_rmsle = eval_regressor(reg, X_train_np, y_train_np, X_test_np, y_test_np)
print "{}:".format(reg)
print "    train RMSLE_vc {}".format(train_rmsle)
print "    test RMSLE_vc {}".format(test_rmsle)
print

y_test_pred = reg.predict(X_test_np)
pred_log_var_cost_test = y_test_pred


CPU times: user 708 ms, sys: 4 ms, total: 712 ms
Wall time: 723 ms
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=20, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False):
    train RMSLE_vc 0.0690357381268
    test RMSLE_vc 0.150964391798


In [141]:
pred_log_var_cost_test


Out[141]:
array([ 1.25510732,  1.76479342,  1.67127429,  1.21585232,  1.22183107,
        1.28498987,  1.26808052,  1.19901911,  1.44166051,  1.35271189,
        1.82371569,  1.27878921,  1.5325906 ,  1.45323163,  1.19430352,
        1.13373326,  1.09149838,  1.08866807,  1.36625882,  1.29316202,
        1.19370247,  1.22671212,  1.30525959,  1.64416158,  1.46805841,
        1.72779512,  2.0982482 ,  1.3060855 ,  1.25877572,  1.49017955,
        1.49569148,  1.68164557,  2.02136207,  0.95520805,  1.71225647,
        1.82119967,  1.50905174,  1.67468686,  1.88742237,  1.30053819,
        1.19429464,  1.67102208,  1.28709567,  1.33989942,  1.2350144 ,
        1.55749239,  1.95859896,  1.50328213,  1.64466444,  1.31170212,
        2.38244723,  1.79726144,  1.50311547,  1.2373287 ,  1.18563642,
        1.26045989,  1.40602467,  1.30840396,  1.30769779,  1.46625918,
        2.13475421,  1.88321349,  1.64075684,  1.18231747,  1.3286897 ,
        1.73044906,  1.30728962,  1.31184127,  1.26312332,  1.28742251,
        1.42718765,  1.1925154 ,  1.66808118,  1.19173735,  1.49170726,
        1.78863292,  1.69412509,  1.99057678,  1.89862368,  1.56879059,
        1.74989344,  0.92774361,  1.17516144,  1.17360909,  1.23101397,
        0.91991377,  1.15705926,  1.09968941,  1.69170811,  1.87997131,
        1.44719739,  1.47224435,  1.74458355,  1.18647823,  1.91864205,
        1.96198041,  1.44517995,  1.86956096,  2.03904829,  1.31790013,
        1.7369954 ,  1.69729192,  1.20327645,  1.3165195 ,  1.82198557,
        1.33673654,  1.87008504,  2.11333726,  1.63909836,  1.39699674,
        1.26278365,  1.78110973,  1.33120404,  1.26101583,  1.25718316,
        1.4026163 ,  1.16878508,  1.34912331,  1.81317263,  1.39780575,
        1.20344678,  1.50779916,  1.76756085,  1.51473131,  1.36979402,
        1.48319406,  1.81733359,  1.55057349,  1.25574819,  1.16844518,
        1.49337688,  1.17782333,  1.08363719,  1.34556589,  1.01391836,
        1.89372132,  1.86472433,  1.87490206,  1.35285312,  1.6399697 ,
        1.31306597,  1.80835779,  0.94271461,  1.68153454,  1.36884525,
        1.91420974,  1.28808134,  1.14689623,  1.04123019,  1.04526413,
        1.41582767,  1.14148667,  0.99584103,  1.6450458 ,  1.33228041,
        2.19394924,  1.24245636,  1.26655232,  1.19911172,  1.22092063,
        1.35598399,  1.3540703 ,  1.08258878,  1.1594278 ,  1.40464137,
        0.9359364 ,  1.74111442,  1.79333434,  1.35591084,  1.2026918 ,
        1.60365119,  1.14988812,  1.18050587,  2.7703757 ,  2.12019583,
        1.18326073,  1.62667036,  1.08987375,  1.19490501,  0.99036722,
        1.18234074,  1.52675936,  1.52433677,  1.48093824,  1.45817616,
        1.48365837,  1.00567243,  1.58086761,  1.70585588,  2.07368081,
        1.98926897,  1.01452373,  1.15530035,  1.18145815,  1.63998283,
        2.17578225,  1.23425052,  1.11684326,  1.26298705,  1.41773171,
        1.57447571,  1.19390458,  1.11241176,  1.46407132,  1.34827112,
        0.85762585])

In [148]:
# Combine predictions.

print accuracy_score(fcc_test, pred_fixed_cost_class_test)
print np.sqrt(mean_squared_error(log_var_cost_test, pred_log_var_cost_test))
df = pd.DataFrame()
df['tube_assembly_id'] = X_test.tube_assembly_id
df['pred_fixed_cost_class'] = pred_fixed_cost_class_test
df['pred_fixed_cost'] = np.array(fc_vals)[df.pred_fixed_cost_class]
df['pred_var_cost'] = inverse_log_transform_y(pred_log_var_cost_test)
print df.shape
df[:5]


0.917475728155
0.150964391798
(206, 4)
Out[148]:
tube_assembly_id pred_fixed_cost_class pred_fixed_cost pred_var_cost
0 TA-00093 1 19.043385 2.508215
1 TA-00125 3 23.633726 4.840366
2 TA-00173 2 20.295284 4.318941
3 TA-00264 1 19.043385 2.373168
4 TA-00334 1 19.043385 2.393396

In [150]:
# Evaluate on all quantities.

_, _, X_test_full, y_test_full = next(generate_xv_splits(aug_train_set))
X_test_full['log_cost'] = y_test_full
print X_test_full.shape

X_test_full = X_test_full[X_test_full.bracketing_pattern == brapa]
print X_test_full.shape

X_test_full = X_test_full.merge(bracket, on='tube_assembly_id')
X_test_full = X_test_full.merge(df, on='tube_assembly_id')
print X_test_full.shape

X_test_full['pred_cost'] = X_test_full.pred_fixed_cost / X_test_full.adj_quantity + X_test_full.pred_var_cost
X_test_full['pred_log_cost'] = log_transform_y(X_test_full.pred_cost)
X_test_full['err2'] = (X_test_full.log_cost.values - X_test_full.pred_log_cost.values) ** 2
X_test_full.sort('err2', ascending=False, inplace=True)
print X_test_full.shape

print X_test_full.err2.describe()
print np.sqrt(mean_squared_error(X_test_full.log_cost.values, X_test_full.pred_log_cost.values))

X_test_full[:10]


(2943, 29)
(1648, 29)
(1648, 35)
(1648, 38)
count    1.648000e+03
mean     1.534164e-02
std      7.813045e-02
min      9.197638e-12
25%      3.764836e-05
50%      3.229354e-04
75%      2.959388e-03
max      1.243336e+00
Name: err2, dtype: float64
0.123861365099
Out[150]:
tube_assembly_id supplier quote_date annual_usage min_order_quantity bracket_pricing quantity material_id diameter wall_thickness ... log_cost fixed_cost_class fixed_cost var_cost pred_fixed_cost_class pred_fixed_cost pred_var_cost pred_cost pred_log_cost err2
375 TA-03006 S-0066 2013-09-01 0 0 True 250 SP-0029 19.05 1.65 ... 3.085035 2 20.295284 20.70167 2 20.295284 6.089388 6.170569 1.969985 1.243336
374 TA-03006 S-0066 2013-09-01 0 0 True 100 SP-0029 19.05 1.65 ... 3.089224 2 20.295284 20.70167 2 20.295284 6.089388 6.292340 1.986825 1.215285
373 TA-03006 S-0066 2013-09-01 0 0 True 50 SP-0029 19.05 1.65 ... 3.096294 2 20.295284 20.70167 2 20.295284 6.089388 6.495293 2.014275 1.170764
372 TA-03006 S-0066 2013-09-01 0 0 True 25 SP-0029 19.05 1.65 ... 3.111524 2 20.295284 20.70167 2 20.295284 6.089388 6.901199 2.067015 1.091000
371 TA-03006 S-0066 2013-09-01 0 0 True 10 SP-0029 19.05 1.65 ... 3.164603 2 20.295284 20.70167 2 20.295284 6.089388 8.118916 2.210351 0.910597
370 TA-03006 S-0066 2013-09-01 0 0 True 5 SP-0029 19.05 1.65 ... 3.247106 2 20.295284 20.70167 2 20.295284 6.089388 10.148444 2.411300 0.698572
1567 TA-20766 S-0066 2013-11-02 1 0 True 250 SP-0029 12.70 0.89 ... 2.979539 3 23.633726 18.49766 2 20.295284 7.809073 7.890254 2.184956 0.631363
1566 TA-20766 S-0066 2013-11-02 1 0 True 100 SP-0029 12.70 0.89 ... 2.985178 3 23.633726 18.49766 2 20.295284 7.809073 8.012026 2.198560 0.618768
1565 TA-20766 S-0066 2013-11-02 1 0 True 50 SP-0029 12.70 0.89 ... 2.994553 3 23.633726 18.49766 2 20.295284 7.809073 8.214979 2.220830 0.598647
1564 TA-20766 S-0066 2013-11-02 1 0 True 25 SP-0029 12.70 0.89 ... 3.014818 3 23.633726 18.49766 2 20.295284 7.809073 8.620885 2.263936 0.563824

10 rows × 38 columns


In [ ]: