In [1]:
import os
import pandas as pd
import numpy as np
from stackregression import stack_regression_step1, stack_regression_step2, print_prediction_report
from utils import encode_numeric_zscore_list, encode_numeric_zscore_all, to_xy
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from scipy.sparse import csr_matrix
from xgboost import XGBRegressor
from random import randint
import xgboost as xgb


/home/arvc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
Using Theano backend.

In [2]:
#Read Input CSV file
path = "./data/self"
inputFilePath = os.path.join(path, "TestRegression.csv")
#df = pd.read_csv(inputFilePath, compression="zip", header=0, na_values=['NULL'])
df = pd.read_csv(inputFilePath, header=0, na_values=['NULL'])
x,y = to_xy(df, "Label")
x_train, x_valid, y_train, y_valid = train_test_split(x,y, test_size=0.20, random_state=43)


float64

In [3]:
xgtrain = xgb.DMatrix(x_train, label=y_train)
xgvalid = xgb.DMatrix(x_valid, label=y_valid)

In [4]:
#best params on 11/1 for 85% train data: {'subsample': 1.0, 'n_estimators': 174.0, 'eta': 0.1, 
#'colsample_bytree': 0.4, 'gamma': 0.2, 'min_child_weight': 1.0, 'max_depth': 3}

RANDOM_STATE = randint(1,429496)
params = {
    'min_child_weight': 1,
    'eta': 0.1,
    'colsample_bytree': 0.5,
    'max_depth': 12,
    'subsample': 0.8,
    'alpha': 1,
    'gamma': 1,
    'silent': 1,
    'verbose_eval': False,
    'seed': RANDOM_STATE,
    'eval_metric': 'mae',
    'objective': 'reg:linear',
}
watchlist = [(xgtrain, 'train'), (xgvalid, 'eval')]   
model = xgb.train(params, xgtrain, 100000, watchlist, early_stopping_rounds=300, verbose_eval=100)


[0]	train-mae:330.862	eval-mae:320.329
Multiple eval metrics have been passed: 'eval-mae' will be used for early stopping.

Will train until eval-mae hasn't improved in 300 rounds.
[100]	train-mae:18.4196	eval-mae:38.3938
[200]	train-mae:11.4753	eval-mae:32.9095
[300]	train-mae:8.46997	eval-mae:30.1869
[400]	train-mae:6.98616	eval-mae:28.7684
[500]	train-mae:6.18352	eval-mae:27.7971
[600]	train-mae:5.57777	eval-mae:26.8002
[700]	train-mae:5.04152	eval-mae:26.0911
[800]	train-mae:4.64248	eval-mae:25.4434
[900]	train-mae:4.27276	eval-mae:24.7759
[1000]	train-mae:4.01328	eval-mae:24.3684
[1100]	train-mae:3.74071	eval-mae:23.9558
[1200]	train-mae:3.55198	eval-mae:23.4874
[1300]	train-mae:3.40277	eval-mae:23.1407
[1400]	train-mae:3.22334	eval-mae:22.7592
[1500]	train-mae:3.09924	eval-mae:22.5023
[1600]	train-mae:2.98631	eval-mae:22.352
[1700]	train-mae:2.88477	eval-mae:22.0925
[1800]	train-mae:2.79072	eval-mae:21.8207
[1900]	train-mae:2.70766	eval-mae:21.6404
[2000]	train-mae:2.64193	eval-mae:21.433
[2100]	train-mae:2.56033	eval-mae:21.2355
[2200]	train-mae:2.4815	eval-mae:21.0381
[2300]	train-mae:2.41969	eval-mae:20.7908
[2400]	train-mae:2.36412	eval-mae:20.6192
[2500]	train-mae:2.31244	eval-mae:20.4495
[2600]	train-mae:2.25416	eval-mae:20.3376
[2700]	train-mae:2.20451	eval-mae:20.2045
[2800]	train-mae:2.15003	eval-mae:20.0585
[2900]	train-mae:2.10654	eval-mae:19.9406
[3000]	train-mae:2.07279	eval-mae:19.7966
[3100]	train-mae:2.04157	eval-mae:19.735
[3200]	train-mae:2.00424	eval-mae:19.5904
[3300]	train-mae:1.97485	eval-mae:19.4521
[3400]	train-mae:1.93954	eval-mae:19.3189
[3500]	train-mae:1.90959	eval-mae:19.2379
[3600]	train-mae:1.87261	eval-mae:19.0967
[3700]	train-mae:1.84384	eval-mae:18.9818
[3800]	train-mae:1.82237	eval-mae:18.8973
[3900]	train-mae:1.79347	eval-mae:18.8163
[4000]	train-mae:1.76728	eval-mae:18.6974
[4100]	train-mae:1.75186	eval-mae:18.6552
[4200]	train-mae:1.727	eval-mae:18.5817
[4300]	train-mae:1.70918	eval-mae:18.4878
[4400]	train-mae:1.68425	eval-mae:18.4466
[4500]	train-mae:1.66878	eval-mae:18.3413
[4600]	train-mae:1.64949	eval-mae:18.2869
[4700]	train-mae:1.63777	eval-mae:18.2091
[4800]	train-mae:1.62592	eval-mae:18.0965
[4900]	train-mae:1.60936	eval-mae:18.0649
[5000]	train-mae:1.59235	eval-mae:17.9938
[5100]	train-mae:1.58001	eval-mae:17.9559
[5200]	train-mae:1.56148	eval-mae:17.8862
[5300]	train-mae:1.5491	eval-mae:17.818
[5400]	train-mae:1.53787	eval-mae:17.7613
[5500]	train-mae:1.52296	eval-mae:17.7235
[5600]	train-mae:1.51306	eval-mae:17.6683
[5700]	train-mae:1.49577	eval-mae:17.6283
[5800]	train-mae:1.48558	eval-mae:17.5701
[5900]	train-mae:1.47289	eval-mae:17.5153
[6000]	train-mae:1.46753	eval-mae:17.4811
[6100]	train-mae:1.44965	eval-mae:17.42
[6200]	train-mae:1.45278	eval-mae:17.3574
[6300]	train-mae:1.43583	eval-mae:17.2912
[6400]	train-mae:1.42247	eval-mae:17.2776
[6500]	train-mae:1.41467	eval-mae:17.2326
[6600]	train-mae:1.40436	eval-mae:17.2075
[6700]	train-mae:1.39712	eval-mae:17.1516
[6800]	train-mae:1.38904	eval-mae:17.1181
[6900]	train-mae:1.3844	eval-mae:17.0678
[7000]	train-mae:1.37791	eval-mae:17.05
[7100]	train-mae:1.3749	eval-mae:17.0076
[7200]	train-mae:1.36579	eval-mae:16.9812
[7300]	train-mae:1.35762	eval-mae:16.9597
[7400]	train-mae:1.355	eval-mae:16.9184
[7500]	train-mae:1.3448	eval-mae:16.887
[7600]	train-mae:1.33218	eval-mae:16.8593
[7700]	train-mae:1.32267	eval-mae:16.8394
[7800]	train-mae:1.32019	eval-mae:16.8128
[7900]	train-mae:1.31367	eval-mae:16.7731
[8000]	train-mae:1.30591	eval-mae:16.7461
[8100]	train-mae:1.29897	eval-mae:16.7238
[8200]	train-mae:1.29512	eval-mae:16.703
[8300]	train-mae:1.29778	eval-mae:16.7011
[8400]	train-mae:1.29383	eval-mae:16.6727
[8500]	train-mae:1.28542	eval-mae:16.6512
[8600]	train-mae:1.27887	eval-mae:16.6179
[8700]	train-mae:1.26996	eval-mae:16.5811
[8800]	train-mae:1.27092	eval-mae:16.5651
[8900]	train-mae:1.26194	eval-mae:16.5493
[9000]	train-mae:1.26051	eval-mae:16.5304
[9100]	train-mae:1.25363	eval-mae:16.5068
[9200]	train-mae:1.25219	eval-mae:16.4457
[9300]	train-mae:1.24853	eval-mae:16.4356
[9400]	train-mae:1.24319	eval-mae:16.4151
[9500]	train-mae:1.23446	eval-mae:16.3893
[9600]	train-mae:1.2283	eval-mae:16.3663
[9700]	train-mae:1.22605	eval-mae:16.3496
[9800]	train-mae:1.22309	eval-mae:16.3293
[9900]	train-mae:1.21835	eval-mae:16.2947
[10000]	train-mae:1.21231	eval-mae:16.2741
[10100]	train-mae:1.20788	eval-mae:16.2705
[10200]	train-mae:1.19868	eval-mae:16.2704
[10300]	train-mae:1.20029	eval-mae:16.2282
[10400]	train-mae:1.19644	eval-mae:16.2241
[10500]	train-mae:1.19506	eval-mae:16.2056
[10600]	train-mae:1.18691	eval-mae:16.1769
[10700]	train-mae:1.19271	eval-mae:16.1589
[10800]	train-mae:1.18333	eval-mae:16.1569
[10900]	train-mae:1.17937	eval-mae:16.1286
[11000]	train-mae:1.17536	eval-mae:16.1079
[11100]	train-mae:1.17333	eval-mae:16.0792
[11200]	train-mae:1.16813	eval-mae:16.0687
[11300]	train-mae:1.17126	eval-mae:16.0326
[11400]	train-mae:1.16629	eval-mae:16.012
[11500]	train-mae:1.16364	eval-mae:15.9817
[11600]	train-mae:1.16077	eval-mae:15.9607
[11700]	train-mae:1.15466	eval-mae:15.9483
[11800]	train-mae:1.15599	eval-mae:15.9416
[11900]	train-mae:1.15226	eval-mae:15.9289
[12000]	train-mae:1.15285	eval-mae:15.9211
[12100]	train-mae:1.14802	eval-mae:15.9124
[12200]	train-mae:1.14897	eval-mae:15.9042
[12300]	train-mae:1.14479	eval-mae:15.8709
[12400]	train-mae:1.13873	eval-mae:15.8644
[12500]	train-mae:1.13077	eval-mae:15.8667
[12600]	train-mae:1.12543	eval-mae:15.8635
[12700]	train-mae:1.12197	eval-mae:15.8417
[12800]	train-mae:1.12733	eval-mae:15.8265
[12900]	train-mae:1.12129	eval-mae:15.8296
[13000]	train-mae:1.11824	eval-mae:15.7952
[13100]	train-mae:1.11669	eval-mae:15.7982
[13200]	train-mae:1.11259	eval-mae:15.7732
[13300]	train-mae:1.11229	eval-mae:15.7802
[13400]	train-mae:1.10996	eval-mae:15.7576
[13500]	train-mae:1.10593	eval-mae:15.7424
[13600]	train-mae:1.10436	eval-mae:15.7344
[13700]	train-mae:1.10301	eval-mae:15.7238
[13800]	train-mae:1.1024	eval-mae:15.7139
[13900]	train-mae:1.09904	eval-mae:15.7109
[14000]	train-mae:1.09762	eval-mae:15.705
[14100]	train-mae:1.0948	eval-mae:15.6912
[14200]	train-mae:1.09781	eval-mae:15.6749
[14300]	train-mae:1.09591	eval-mae:15.6714
[14400]	train-mae:1.09607	eval-mae:15.6458
[14500]	train-mae:1.09003	eval-mae:15.6389
[14600]	train-mae:1.08695	eval-mae:15.6279
[14700]	train-mae:1.08182	eval-mae:15.6195
[14800]	train-mae:1.07616	eval-mae:15.5987
[14900]	train-mae:1.07475	eval-mae:15.5982
[15000]	train-mae:1.077	eval-mae:15.5888
[15100]	train-mae:1.07834	eval-mae:15.5812
[15200]	train-mae:1.0763	eval-mae:15.5543
[15300]	train-mae:1.07144	eval-mae:15.5496
[15400]	train-mae:1.07176	eval-mae:15.5447
[15500]	train-mae:1.06593	eval-mae:15.5465
[15600]	train-mae:1.06289	eval-mae:15.5275
[15700]	train-mae:1.06543	eval-mae:15.5158
[15800]	train-mae:1.06188	eval-mae:15.4986
[15900]	train-mae:1.06028	eval-mae:15.4871
[16000]	train-mae:1.06274	eval-mae:15.469
[16100]	train-mae:1.0606	eval-mae:15.4665
[16200]	train-mae:1.06256	eval-mae:15.4567
[16300]	train-mae:1.06055	eval-mae:15.4431
[16400]	train-mae:1.05947	eval-mae:15.4148
[16500]	train-mae:1.05308	eval-mae:15.3981
[16600]	train-mae:1.05293	eval-mae:15.3958
[16700]	train-mae:1.04969	eval-mae:15.3984
[16800]	train-mae:1.04747	eval-mae:15.405
[16900]	train-mae:1.04687	eval-mae:15.3697
[17000]	train-mae:1.04572	eval-mae:15.364
[17100]	train-mae:1.04276	eval-mae:15.3649
[17200]	train-mae:1.04312	eval-mae:15.3475
[17300]	train-mae:1.03962	eval-mae:15.346
[17400]	train-mae:1.03546	eval-mae:15.3496
[17500]	train-mae:1.03498	eval-mae:15.3421
[17600]	train-mae:1.03382	eval-mae:15.3241
[17700]	train-mae:1.03076	eval-mae:15.3078
[17800]	train-mae:1.02632	eval-mae:15.3068
[17900]	train-mae:1.02872	eval-mae:15.312
[18000]	train-mae:1.02651	eval-mae:15.2916
[18100]	train-mae:1.02819	eval-mae:15.2783
[18200]	train-mae:1.02348	eval-mae:15.2845
[18300]	train-mae:1.02069	eval-mae:15.2834
[18400]	train-mae:1.01832	eval-mae:15.2814
Stopping. Best iteration:
[18103]	train-mae:1.02843	eval-mae:15.2744


In [7]:
predictions = model.predict(xgvalid)
predictions


Out[7]:
array([  2.33756088e+02,   8.23087585e+02,   2.93459137e+02,
         2.23369522e+02,   1.87254257e+02,   3.07151520e+02,
         7.10015137e+02,   6.74835999e+02,   8.53519516e+01,
         1.93848095e+01,   8.35177231e+01,   3.87295410e+02,
         5.10970879e+01,   2.13156372e+02,   7.95487213e+01,
         9.04965019e+00,   1.39839645e+02,   4.96125214e+02,
         8.45960815e+02,   1.65965347e+02,   3.85474365e+02,
         1.12080812e+01,   4.69380707e+02,   2.08623459e+02,
         3.11982117e+02,   4.78376556e+02,   7.49822632e+02,
         1.69424438e+02,   4.20627960e+02,   4.77631256e+02,
         8.30738297e+01,   4.79252319e+02,   1.81699966e+02,
         1.20412308e+02,   4.88348312e+01,   4.75990509e+02,
         1.25487732e+03,   1.22208153e+02,   1.00871262e+02,
         1.21528015e+02,   7.33333557e+02,   6.71861328e+02,
         1.20052393e+03,   1.78649261e+02,   3.08227081e+02,
         4.40458099e+02,   5.43160889e+02,   2.77315636e+01,
         2.82393494e+02,   3.04090271e+02,   4.40558838e+02,
         1.32072769e+02,   7.73418701e+02,   2.24183090e+02,
         1.48509033e+02,   8.68036072e+02,   2.60176910e+02,
         1.74552322e+02,   1.43694906e+01,   8.19280472e+01,
         1.25253799e+02,   2.50419418e+02,   6.72724128e-01,
        -4.38738525e-01,   5.75327271e+02,   5.17810097e+01,
         1.12647606e+02,   1.74183395e+02,   7.31893738e+02,
         6.27613831e+02,   6.76478333e+02,   2.33475723e+02,
         5.00212288e+01,   3.27363181e+00,   5.76054077e+02,
         6.82208710e+01,   4.25552795e+02,   2.30682098e+02,
         3.75539337e+02,   7.68183899e+02,   4.69972961e+02,
         1.27276581e+02,   5.97261047e+02,   5.09331398e+01,
         3.03293579e+02,   1.35753433e+02,   4.34963531e+01,
         5.45053635e+01,   3.07755768e+02,   8.23204193e+01,
         4.45279388e+02,   7.17905396e+02,   7.94817734e+01,
         3.84621796e+02,   1.29147491e+02,   9.02195435e+02,
        -1.22408554e-01,   5.05892426e+02,   9.90257324e+02,
         6.45039673e+02,   4.82600983e+02,   3.18642639e+02,
         8.23635742e+02,   2.79669380e+01,  -2.12537840e-01,
         8.59199646e+02,   1.27379326e+02,   4.65877838e+02,
         3.93590088e+02,  -2.12537840e-01,   7.71966797e+02,
         3.27363181e+00,   2.20641022e+02,   2.72350521e+01,
         7.88050598e+02,   3.89647430e+02,   9.27356567e+02,
         5.51916275e+01,   2.33244034e+02,   1.16333103e+01,
         5.10332336e+01,   2.80811081e+01,   5.55861694e+02,
         2.47134614e+00,   1.22318581e+02,   2.76556915e+02,
         3.78163025e+02,   5.29078197e+00,   8.47032547e+01,
         4.87565033e+02,   1.10156822e+02,   7.83492661e+01,
         8.57125397e+01,   5.88282532e+02,   1.48509033e+02,
         1.11045464e+02,   4.95621490e+01,   8.40123535e+02,
         5.10137634e+01,   5.43374329e+01,   1.78722519e+02,
         2.75398102e+02,   1.81493500e+02,   3.44278679e+01,
         9.33501587e+01,   3.06918488e+02,   7.62037201e+01,
         1.11069617e+03,   8.43654175e+02,   3.93432098e+02,
        -6.94304705e-01,   4.88696381e+02,   4.43637466e+00,
         1.00802551e+03,   1.22910301e+02,   1.83032578e+02,
         1.17504285e+03,   8.62962799e+01,   1.54892932e+03,
         1.78534973e+02,   1.86977783e+02,   5.91770813e+02,
         9.82467102e+02,   6.69084656e+02,   3.08156250e+02,
         1.70105225e+02,   3.32948578e+02,   1.83032578e+02,
         1.70707188e+01,   1.73445068e+02,   2.32942017e+02,
         3.53618652e+02,   3.00979736e+02,   3.09609375e+02,
         7.83492661e+01,   3.34050079e+02,   1.24050708e+03,
         8.35916519e+01,   8.24856384e+02,   4.86031952e+02,
         1.22223535e+03,   4.86031952e+02,   6.85141174e+02,
         4.81113983e+02,   1.19573921e+02,   2.31587387e+02,
         1.78722519e+02,   4.16482277e+01,   1.10311060e+03,
        -1.22408554e-01,   6.85454163e+02,   6.21658211e+01,
         9.29158508e+02,   8.19855728e+01,   2.30942505e+02,
         4.72834808e+02,   3.12377350e+02,   3.04666016e+02,
         8.47111389e+02,   7.00644165e+02], dtype=float32)

In [8]:
from utils import chart_regression
chart_regression(predictions, y_valid)



In [ ]: