In [1]:
import os
import pandas as pd
import numpy as np
from stackregression import stack_regression_step1, stack_regression_step2, print_prediction_report
from utils import encode_numeric_zscore_list, encode_numeric_zscore_all, to_xy
from sklearn import preprocessing
from sklearn.cross_validation import train_test_split
from scipy.sparse import csr_matrix
from xgboost import XGBRegressor
from random import randint
import xgboost as xgb
/home/arvc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
"This module will be removed in 0.20.", DeprecationWarning)
Using Theano backend.
In [2]:
#Read Input CSV file
path = "./data/self"
inputFilePath = os.path.join(path, "TestRegression.csv")
#df = pd.read_csv(inputFilePath, compression="zip", header=0, na_values=['NULL'])
df = pd.read_csv(inputFilePath, header=0, na_values=['NULL'])
x,y = to_xy(df, "Label")
x_train, x_valid, y_train, y_valid = train_test_split(x,y, test_size=0.20, random_state=43)
float64
In [3]:
xgtrain = xgb.DMatrix(x_train, label=y_train)
xgvalid = xgb.DMatrix(x_valid, label=y_valid)
In [4]:
#best params on 11/1 for 85% train data: {'subsample': 1.0, 'n_estimators': 174.0, 'eta': 0.1,
#'colsample_bytree': 0.4, 'gamma': 0.2, 'min_child_weight': 1.0, 'max_depth': 3}
RANDOM_STATE = randint(1,429496)
params = {
'min_child_weight': 1,
'eta': 0.1,
'colsample_bytree': 0.5,
'max_depth': 12,
'subsample': 0.8,
'alpha': 1,
'gamma': 1,
'silent': 1,
'verbose_eval': False,
'seed': RANDOM_STATE,
'eval_metric': 'mae',
'objective': 'reg:linear',
}
watchlist = [(xgtrain, 'train'), (xgvalid, 'eval')]
model = xgb.train(params, xgtrain, 100000, watchlist, early_stopping_rounds=300, verbose_eval=100)
[0] train-mae:330.862 eval-mae:320.329
Multiple eval metrics have been passed: 'eval-mae' will be used for early stopping.
Will train until eval-mae hasn't improved in 300 rounds.
[100] train-mae:18.4196 eval-mae:38.3938
[200] train-mae:11.4753 eval-mae:32.9095
[300] train-mae:8.46997 eval-mae:30.1869
[400] train-mae:6.98616 eval-mae:28.7684
[500] train-mae:6.18352 eval-mae:27.7971
[600] train-mae:5.57777 eval-mae:26.8002
[700] train-mae:5.04152 eval-mae:26.0911
[800] train-mae:4.64248 eval-mae:25.4434
[900] train-mae:4.27276 eval-mae:24.7759
[1000] train-mae:4.01328 eval-mae:24.3684
[1100] train-mae:3.74071 eval-mae:23.9558
[1200] train-mae:3.55198 eval-mae:23.4874
[1300] train-mae:3.40277 eval-mae:23.1407
[1400] train-mae:3.22334 eval-mae:22.7592
[1500] train-mae:3.09924 eval-mae:22.5023
[1600] train-mae:2.98631 eval-mae:22.352
[1700] train-mae:2.88477 eval-mae:22.0925
[1800] train-mae:2.79072 eval-mae:21.8207
[1900] train-mae:2.70766 eval-mae:21.6404
[2000] train-mae:2.64193 eval-mae:21.433
[2100] train-mae:2.56033 eval-mae:21.2355
[2200] train-mae:2.4815 eval-mae:21.0381
[2300] train-mae:2.41969 eval-mae:20.7908
[2400] train-mae:2.36412 eval-mae:20.6192
[2500] train-mae:2.31244 eval-mae:20.4495
[2600] train-mae:2.25416 eval-mae:20.3376
[2700] train-mae:2.20451 eval-mae:20.2045
[2800] train-mae:2.15003 eval-mae:20.0585
[2900] train-mae:2.10654 eval-mae:19.9406
[3000] train-mae:2.07279 eval-mae:19.7966
[3100] train-mae:2.04157 eval-mae:19.735
[3200] train-mae:2.00424 eval-mae:19.5904
[3300] train-mae:1.97485 eval-mae:19.4521
[3400] train-mae:1.93954 eval-mae:19.3189
[3500] train-mae:1.90959 eval-mae:19.2379
[3600] train-mae:1.87261 eval-mae:19.0967
[3700] train-mae:1.84384 eval-mae:18.9818
[3800] train-mae:1.82237 eval-mae:18.8973
[3900] train-mae:1.79347 eval-mae:18.8163
[4000] train-mae:1.76728 eval-mae:18.6974
[4100] train-mae:1.75186 eval-mae:18.6552
[4200] train-mae:1.727 eval-mae:18.5817
[4300] train-mae:1.70918 eval-mae:18.4878
[4400] train-mae:1.68425 eval-mae:18.4466
[4500] train-mae:1.66878 eval-mae:18.3413
[4600] train-mae:1.64949 eval-mae:18.2869
[4700] train-mae:1.63777 eval-mae:18.2091
[4800] train-mae:1.62592 eval-mae:18.0965
[4900] train-mae:1.60936 eval-mae:18.0649
[5000] train-mae:1.59235 eval-mae:17.9938
[5100] train-mae:1.58001 eval-mae:17.9559
[5200] train-mae:1.56148 eval-mae:17.8862
[5300] train-mae:1.5491 eval-mae:17.818
[5400] train-mae:1.53787 eval-mae:17.7613
[5500] train-mae:1.52296 eval-mae:17.7235
[5600] train-mae:1.51306 eval-mae:17.6683
[5700] train-mae:1.49577 eval-mae:17.6283
[5800] train-mae:1.48558 eval-mae:17.5701
[5900] train-mae:1.47289 eval-mae:17.5153
[6000] train-mae:1.46753 eval-mae:17.4811
[6100] train-mae:1.44965 eval-mae:17.42
[6200] train-mae:1.45278 eval-mae:17.3574
[6300] train-mae:1.43583 eval-mae:17.2912
[6400] train-mae:1.42247 eval-mae:17.2776
[6500] train-mae:1.41467 eval-mae:17.2326
[6600] train-mae:1.40436 eval-mae:17.2075
[6700] train-mae:1.39712 eval-mae:17.1516
[6800] train-mae:1.38904 eval-mae:17.1181
[6900] train-mae:1.3844 eval-mae:17.0678
[7000] train-mae:1.37791 eval-mae:17.05
[7100] train-mae:1.3749 eval-mae:17.0076
[7200] train-mae:1.36579 eval-mae:16.9812
[7300] train-mae:1.35762 eval-mae:16.9597
[7400] train-mae:1.355 eval-mae:16.9184
[7500] train-mae:1.3448 eval-mae:16.887
[7600] train-mae:1.33218 eval-mae:16.8593
[7700] train-mae:1.32267 eval-mae:16.8394
[7800] train-mae:1.32019 eval-mae:16.8128
[7900] train-mae:1.31367 eval-mae:16.7731
[8000] train-mae:1.30591 eval-mae:16.7461
[8100] train-mae:1.29897 eval-mae:16.7238
[8200] train-mae:1.29512 eval-mae:16.703
[8300] train-mae:1.29778 eval-mae:16.7011
[8400] train-mae:1.29383 eval-mae:16.6727
[8500] train-mae:1.28542 eval-mae:16.6512
[8600] train-mae:1.27887 eval-mae:16.6179
[8700] train-mae:1.26996 eval-mae:16.5811
[8800] train-mae:1.27092 eval-mae:16.5651
[8900] train-mae:1.26194 eval-mae:16.5493
[9000] train-mae:1.26051 eval-mae:16.5304
[9100] train-mae:1.25363 eval-mae:16.5068
[9200] train-mae:1.25219 eval-mae:16.4457
[9300] train-mae:1.24853 eval-mae:16.4356
[9400] train-mae:1.24319 eval-mae:16.4151
[9500] train-mae:1.23446 eval-mae:16.3893
[9600] train-mae:1.2283 eval-mae:16.3663
[9700] train-mae:1.22605 eval-mae:16.3496
[9800] train-mae:1.22309 eval-mae:16.3293
[9900] train-mae:1.21835 eval-mae:16.2947
[10000] train-mae:1.21231 eval-mae:16.2741
[10100] train-mae:1.20788 eval-mae:16.2705
[10200] train-mae:1.19868 eval-mae:16.2704
[10300] train-mae:1.20029 eval-mae:16.2282
[10400] train-mae:1.19644 eval-mae:16.2241
[10500] train-mae:1.19506 eval-mae:16.2056
[10600] train-mae:1.18691 eval-mae:16.1769
[10700] train-mae:1.19271 eval-mae:16.1589
[10800] train-mae:1.18333 eval-mae:16.1569
[10900] train-mae:1.17937 eval-mae:16.1286
[11000] train-mae:1.17536 eval-mae:16.1079
[11100] train-mae:1.17333 eval-mae:16.0792
[11200] train-mae:1.16813 eval-mae:16.0687
[11300] train-mae:1.17126 eval-mae:16.0326
[11400] train-mae:1.16629 eval-mae:16.012
[11500] train-mae:1.16364 eval-mae:15.9817
[11600] train-mae:1.16077 eval-mae:15.9607
[11700] train-mae:1.15466 eval-mae:15.9483
[11800] train-mae:1.15599 eval-mae:15.9416
[11900] train-mae:1.15226 eval-mae:15.9289
[12000] train-mae:1.15285 eval-mae:15.9211
[12100] train-mae:1.14802 eval-mae:15.9124
[12200] train-mae:1.14897 eval-mae:15.9042
[12300] train-mae:1.14479 eval-mae:15.8709
[12400] train-mae:1.13873 eval-mae:15.8644
[12500] train-mae:1.13077 eval-mae:15.8667
[12600] train-mae:1.12543 eval-mae:15.8635
[12700] train-mae:1.12197 eval-mae:15.8417
[12800] train-mae:1.12733 eval-mae:15.8265
[12900] train-mae:1.12129 eval-mae:15.8296
[13000] train-mae:1.11824 eval-mae:15.7952
[13100] train-mae:1.11669 eval-mae:15.7982
[13200] train-mae:1.11259 eval-mae:15.7732
[13300] train-mae:1.11229 eval-mae:15.7802
[13400] train-mae:1.10996 eval-mae:15.7576
[13500] train-mae:1.10593 eval-mae:15.7424
[13600] train-mae:1.10436 eval-mae:15.7344
[13700] train-mae:1.10301 eval-mae:15.7238
[13800] train-mae:1.1024 eval-mae:15.7139
[13900] train-mae:1.09904 eval-mae:15.7109
[14000] train-mae:1.09762 eval-mae:15.705
[14100] train-mae:1.0948 eval-mae:15.6912
[14200] train-mae:1.09781 eval-mae:15.6749
[14300] train-mae:1.09591 eval-mae:15.6714
[14400] train-mae:1.09607 eval-mae:15.6458
[14500] train-mae:1.09003 eval-mae:15.6389
[14600] train-mae:1.08695 eval-mae:15.6279
[14700] train-mae:1.08182 eval-mae:15.6195
[14800] train-mae:1.07616 eval-mae:15.5987
[14900] train-mae:1.07475 eval-mae:15.5982
[15000] train-mae:1.077 eval-mae:15.5888
[15100] train-mae:1.07834 eval-mae:15.5812
[15200] train-mae:1.0763 eval-mae:15.5543
[15300] train-mae:1.07144 eval-mae:15.5496
[15400] train-mae:1.07176 eval-mae:15.5447
[15500] train-mae:1.06593 eval-mae:15.5465
[15600] train-mae:1.06289 eval-mae:15.5275
[15700] train-mae:1.06543 eval-mae:15.5158
[15800] train-mae:1.06188 eval-mae:15.4986
[15900] train-mae:1.06028 eval-mae:15.4871
[16000] train-mae:1.06274 eval-mae:15.469
[16100] train-mae:1.0606 eval-mae:15.4665
[16200] train-mae:1.06256 eval-mae:15.4567
[16300] train-mae:1.06055 eval-mae:15.4431
[16400] train-mae:1.05947 eval-mae:15.4148
[16500] train-mae:1.05308 eval-mae:15.3981
[16600] train-mae:1.05293 eval-mae:15.3958
[16700] train-mae:1.04969 eval-mae:15.3984
[16800] train-mae:1.04747 eval-mae:15.405
[16900] train-mae:1.04687 eval-mae:15.3697
[17000] train-mae:1.04572 eval-mae:15.364
[17100] train-mae:1.04276 eval-mae:15.3649
[17200] train-mae:1.04312 eval-mae:15.3475
[17300] train-mae:1.03962 eval-mae:15.346
[17400] train-mae:1.03546 eval-mae:15.3496
[17500] train-mae:1.03498 eval-mae:15.3421
[17600] train-mae:1.03382 eval-mae:15.3241
[17700] train-mae:1.03076 eval-mae:15.3078
[17800] train-mae:1.02632 eval-mae:15.3068
[17900] train-mae:1.02872 eval-mae:15.312
[18000] train-mae:1.02651 eval-mae:15.2916
[18100] train-mae:1.02819 eval-mae:15.2783
[18200] train-mae:1.02348 eval-mae:15.2845
[18300] train-mae:1.02069 eval-mae:15.2834
[18400] train-mae:1.01832 eval-mae:15.2814
Stopping. Best iteration:
[18103] train-mae:1.02843 eval-mae:15.2744
In [7]:
predictions = model.predict(xgvalid)
predictions
Out[7]:
array([ 2.33756088e+02, 8.23087585e+02, 2.93459137e+02,
2.23369522e+02, 1.87254257e+02, 3.07151520e+02,
7.10015137e+02, 6.74835999e+02, 8.53519516e+01,
1.93848095e+01, 8.35177231e+01, 3.87295410e+02,
5.10970879e+01, 2.13156372e+02, 7.95487213e+01,
9.04965019e+00, 1.39839645e+02, 4.96125214e+02,
8.45960815e+02, 1.65965347e+02, 3.85474365e+02,
1.12080812e+01, 4.69380707e+02, 2.08623459e+02,
3.11982117e+02, 4.78376556e+02, 7.49822632e+02,
1.69424438e+02, 4.20627960e+02, 4.77631256e+02,
8.30738297e+01, 4.79252319e+02, 1.81699966e+02,
1.20412308e+02, 4.88348312e+01, 4.75990509e+02,
1.25487732e+03, 1.22208153e+02, 1.00871262e+02,
1.21528015e+02, 7.33333557e+02, 6.71861328e+02,
1.20052393e+03, 1.78649261e+02, 3.08227081e+02,
4.40458099e+02, 5.43160889e+02, 2.77315636e+01,
2.82393494e+02, 3.04090271e+02, 4.40558838e+02,
1.32072769e+02, 7.73418701e+02, 2.24183090e+02,
1.48509033e+02, 8.68036072e+02, 2.60176910e+02,
1.74552322e+02, 1.43694906e+01, 8.19280472e+01,
1.25253799e+02, 2.50419418e+02, 6.72724128e-01,
-4.38738525e-01, 5.75327271e+02, 5.17810097e+01,
1.12647606e+02, 1.74183395e+02, 7.31893738e+02,
6.27613831e+02, 6.76478333e+02, 2.33475723e+02,
5.00212288e+01, 3.27363181e+00, 5.76054077e+02,
6.82208710e+01, 4.25552795e+02, 2.30682098e+02,
3.75539337e+02, 7.68183899e+02, 4.69972961e+02,
1.27276581e+02, 5.97261047e+02, 5.09331398e+01,
3.03293579e+02, 1.35753433e+02, 4.34963531e+01,
5.45053635e+01, 3.07755768e+02, 8.23204193e+01,
4.45279388e+02, 7.17905396e+02, 7.94817734e+01,
3.84621796e+02, 1.29147491e+02, 9.02195435e+02,
-1.22408554e-01, 5.05892426e+02, 9.90257324e+02,
6.45039673e+02, 4.82600983e+02, 3.18642639e+02,
8.23635742e+02, 2.79669380e+01, -2.12537840e-01,
8.59199646e+02, 1.27379326e+02, 4.65877838e+02,
3.93590088e+02, -2.12537840e-01, 7.71966797e+02,
3.27363181e+00, 2.20641022e+02, 2.72350521e+01,
7.88050598e+02, 3.89647430e+02, 9.27356567e+02,
5.51916275e+01, 2.33244034e+02, 1.16333103e+01,
5.10332336e+01, 2.80811081e+01, 5.55861694e+02,
2.47134614e+00, 1.22318581e+02, 2.76556915e+02,
3.78163025e+02, 5.29078197e+00, 8.47032547e+01,
4.87565033e+02, 1.10156822e+02, 7.83492661e+01,
8.57125397e+01, 5.88282532e+02, 1.48509033e+02,
1.11045464e+02, 4.95621490e+01, 8.40123535e+02,
5.10137634e+01, 5.43374329e+01, 1.78722519e+02,
2.75398102e+02, 1.81493500e+02, 3.44278679e+01,
9.33501587e+01, 3.06918488e+02, 7.62037201e+01,
1.11069617e+03, 8.43654175e+02, 3.93432098e+02,
-6.94304705e-01, 4.88696381e+02, 4.43637466e+00,
1.00802551e+03, 1.22910301e+02, 1.83032578e+02,
1.17504285e+03, 8.62962799e+01, 1.54892932e+03,
1.78534973e+02, 1.86977783e+02, 5.91770813e+02,
9.82467102e+02, 6.69084656e+02, 3.08156250e+02,
1.70105225e+02, 3.32948578e+02, 1.83032578e+02,
1.70707188e+01, 1.73445068e+02, 2.32942017e+02,
3.53618652e+02, 3.00979736e+02, 3.09609375e+02,
7.83492661e+01, 3.34050079e+02, 1.24050708e+03,
8.35916519e+01, 8.24856384e+02, 4.86031952e+02,
1.22223535e+03, 4.86031952e+02, 6.85141174e+02,
4.81113983e+02, 1.19573921e+02, 2.31587387e+02,
1.78722519e+02, 4.16482277e+01, 1.10311060e+03,
-1.22408554e-01, 6.85454163e+02, 6.21658211e+01,
9.29158508e+02, 8.19855728e+01, 2.30942505e+02,
4.72834808e+02, 3.12377350e+02, 3.04666016e+02,
8.47111389e+02, 7.00644165e+02], dtype=float32)
In [8]:
from utils import chart_regression
chart_regression(predictions, y_valid)
In [ ]:
Content source: jbliss1234/ML
Similar notebooks: