Prediction model


In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR, LinearSVR
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import validation_curve, learning_curve


/Users/Home/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

Import data

Might still need to clean up the files some after import


In [2]:
path = '../Final report'
X_fn = 'x.csv'
y_fn = 'y.csv'
X_path = os.path.join(path, X_fn)
y_path = os.path.join(path, y_fn)

X = pd.read_csv(X_path)
y = pd.read_csv(y_path)

In [7]:
X.head()


Out[7]:
cluster Year nameplate_capacity DATETIME GROSS LOAD (MW) ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Wind Output, % of Installed Wind Output, % of Load 1-hr MW change 1-hr % change Net Load (MW) Net Load Change (MW) Month NG Price ($/mcf) All coal Lignite Subbituminous
0 0 2007 13263.0 2007-01-01 00:00:00 670.0 30428.0 2790.0 1074.0 38.494624 3.529644 NaN NaN 29354.0 NaN 1 6.42 25.1475 20.0275 28.115
1 0 2007 13263.0 2007-01-01 01:00:00 492.0 30133.0 2790.0 922.6 33.068100 3.061760 -151.4 -14.096834 29210.4 -143.6 1 6.42 25.1475 20.0275 28.115
2 0 2007 13263.0 2007-01-01 02:00:00 461.0 29941.0 2790.0 849.2 30.437276 2.836245 -73.4 -7.955777 29091.8 -118.6 1 6.42 25.1475 20.0275 28.115
3 0 2007 13263.0 2007-01-01 03:00:00 417.0 29949.0 2790.0 1056.3 37.860215 3.526996 207.1 24.387659 28892.7 -199.1 1 6.42 25.1475 20.0275 28.115
4 0 2007 13263.0 2007-01-01 04:00:00 677.0 30248.0 2790.0 837.1 30.003584 2.767456 -219.2 -20.751680 29410.9 518.2 1 6.42 25.1475 20.0275 28.115

Make fuel price a ratio of the coal price to the natural gas price


In [3]:
for fuel in ['All coal', 'Lignite', 'Subbituminous']:
    X.loc[:,fuel] = X.loc[:,fuel].values/X.loc[:,'NG Price ($/mcf)'].values
    
X.drop('NG Price ($/mcf)', axis=1, inplace=True)

One-hot encoding of the cluster variable

I'm trying to make this easy for using with different numbers of clusters


In [4]:
cluster_ids = X['cluster'].unique()
for cluster in cluster_ids:
    X['cluster_{}'.format(cluster)] = np.eye(len(cluster_ids))[X['cluster'],cluster]

In [5]:
X.head()


Out[5]:
cluster Year nameplate_capacity DATETIME GROSS LOAD (MW) ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Wind Output, % of Installed Wind Output, % of Load ... Month All coal Lignite Subbituminous cluster_0 cluster_1 cluster_2 cluster_3 cluster_4 cluster_5
0 0 2007 13263.0 2007-01-01 00:00:00 670.0 30428.0 2790.0 1074.0 38.494624 3.529644 ... 1 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
1 0 2007 13263.0 2007-01-01 01:00:00 492.0 30133.0 2790.0 922.6 33.068100 3.061760 ... 1 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
2 0 2007 13263.0 2007-01-01 02:00:00 461.0 29941.0 2790.0 849.2 30.437276 2.836245 ... 1 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
3 0 2007 13263.0 2007-01-01 03:00:00 417.0 29949.0 2790.0 1056.3 37.860215 3.526996 ... 1 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
4 0 2007 13263.0 2007-01-01 04:00:00 677.0 30248.0 2790.0 837.1 30.003584 2.767456 ... 1 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0

5 rows × 24 columns


In [6]:
X.tail()


Out[6]:
cluster Year nameplate_capacity DATETIME GROSS LOAD (MW) ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Wind Output, % of Installed Wind Output, % of Load ... Month All coal Lignite Subbituminous cluster_0 cluster_1 cluster_2 cluster_3 cluster_4 cluster_5
473329 5 2015 10014.8 2015-12-31 19:00:00 3625.0 39908.77734 16170.0 3824.932373 23.654498 9.584188 ... 12 12.793722 10.780269 14.596413 0.0 0.0 0.0 0.0 0.0 1.0
473330 5 2015 10014.8 2015-12-31 20:00:00 3319.0 38736.85938 16170.0 4625.632813 28.606264 11.941166 ... 12 12.793722 10.780269 14.596413 0.0 0.0 0.0 0.0 0.0 1.0
473331 5 2015 10014.8 2015-12-31 21:00:00 3332.0 37587.70313 16170.0 4957.714844 30.659956 13.189725 ... 12 12.793722 10.780269 14.596413 0.0 0.0 0.0 0.0 0.0 1.0
473332 5 2015 10014.8 2015-12-31 22:00:00 3214.0 36356.26172 16170.0 4699.097656 29.060592 12.925140 ... 12 12.793722 10.780269 14.596413 0.0 0.0 0.0 0.0 0.0 1.0
473333 5 2015 10014.8 2015-12-31 23:00:00 3173.0 35150.33984 16170.0 4313.125000 26.673624 12.270507 ... 12 12.793722 10.780269 14.596413 0.0 0.0 0.0 0.0 0.0 1.0

5 rows × 24 columns

Add free capacity of every group for that hour.

Turns out that this doesn't help prediction. It actually makes prediction much worse...


In [46]:
free_cap_dict = {}
for cluster in range(6):
    free_cap_dict[cluster] = X.loc[X['cluster'] == cluster, ['DATETIME', 'nameplate_capacity', 'GROSS LOAD (MW)']]
    col_name = 'cluster_' +  str(cluster) + ' free capacity'
    free_cap_dict[cluster].loc[:,col_name] = (free_cap_dict[cluster].loc[:,'nameplate_capacity'].values - 
                                              free_cap_dict[cluster].loc[:,'GROSS LOAD (MW)'].values)

In [47]:
free_cap_dict[0].head()


Out[47]:
DATETIME nameplate_capacity GROSS LOAD (MW) cluster_0 free capacity
0 2007-01-01 00:00:00 13263.0 670.0 12593.0
1 2007-01-01 01:00:00 13263.0 492.0 12771.0
2 2007-01-01 02:00:00 13263.0 461.0 12802.0
3 2007-01-01 03:00:00 13263.0 417.0 12846.0
4 2007-01-01 04:00:00 13263.0 677.0 12586.0

In [49]:
for cluster in range(6):
    col_name = 'cluster_' +  str(cluster) + ' free capacity'
    X = pd.merge(X, free_cap_dict[cluster].loc[:,['DATETIME', col_name]], on='DATETIME')

In [51]:
X.head(n=10)


Out[51]:
cluster Year nameplate_capacity DATETIME GROSS LOAD (MW) ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Wind Output, % of Installed Wind Output, % of Load ... cluster_2 cluster_3 cluster_4 cluster_5 cluster_0 free capacity cluster_1 free capacity cluster_2 free capacity cluster_3 free capacity cluster_4 free capacity cluster_5 free capacity
0 0 2007 13263.0 2007-01-01 00:00:00 670.0 30428.0 2790.0 1074.0 38.494624 3.529644 ... 0.0 0.0 0.0 0.0 12593.0 7318.7 451.6 371.3 1442.0 4904.1
1 1 2007 7442.7 2007-01-01 00:00:00 124.0 30428.0 2790.0 1074.0 38.494624 3.529644 ... 0.0 0.0 0.0 0.0 12593.0 7318.7 451.6 371.3 1442.0 4904.1
2 2 2007 6706.6 2007-01-01 00:00:00 6255.0 30428.0 2790.0 1074.0 38.494624 3.529644 ... 1.0 0.0 0.0 0.0 12593.0 7318.7 451.6 371.3 1442.0 4904.1
3 3 2007 4985.3 2007-01-01 00:00:00 4614.0 30428.0 2790.0 1074.0 38.494624 3.529644 ... 0.0 1.0 0.0 0.0 12593.0 7318.7 451.6 371.3 1442.0 4904.1
4 4 2007 3969.0 2007-01-01 00:00:00 2527.0 30428.0 2790.0 1074.0 38.494624 3.529644 ... 0.0 0.0 1.0 0.0 12593.0 7318.7 451.6 371.3 1442.0 4904.1
5 5 2007 5478.1 2007-01-01 00:00:00 574.0 30428.0 2790.0 1074.0 38.494624 3.529644 ... 0.0 0.0 0.0 1.0 12593.0 7318.7 451.6 371.3 1442.0 4904.1
6 0 2007 13263.0 2007-01-01 01:00:00 492.0 30133.0 2790.0 922.6 33.068100 3.061760 ... 0.0 0.0 0.0 0.0 12771.0 7307.7 460.6 443.3 1472.0 4878.1
7 1 2007 7442.7 2007-01-01 01:00:00 135.0 30133.0 2790.0 922.6 33.068100 3.061760 ... 0.0 0.0 0.0 0.0 12771.0 7307.7 460.6 443.3 1472.0 4878.1
8 2 2007 6706.6 2007-01-01 01:00:00 6246.0 30133.0 2790.0 922.6 33.068100 3.061760 ... 1.0 0.0 0.0 0.0 12771.0 7307.7 460.6 443.3 1472.0 4878.1
9 3 2007 4985.3 2007-01-01 01:00:00 4542.0 30133.0 2790.0 922.6 33.068100 3.061760 ... 0.0 1.0 0.0 0.0 12771.0 7307.7 460.6 443.3 1472.0 4878.1

10 rows × 30 columns


In [ ]:
for idx in X.index:
datetime = X.loc[idx, 'DATETIME']
    for cluster in range(6):
        col_name = 'cluster_' +  cluster + ' free capacity'
        X.loc[idx, col_name] =

In [ ]:


In [ ]:


In [ ]:


In [8]:
y.tail()


Out[8]:
Unnamed: 0 DATETIME cluster_id_6 Gen Change (MW)
473329 473329 2015-12-31 19:00:00 5 -20.0
473330 473330 2015-12-31 20:00:00 5 -964.0
473331 473331 2015-12-31 21:00:00 5 -608.0
473332 473332 2015-12-31 22:00:00 5 -246.0
473333 473333 2015-12-31 23:00:00 5 -333.0

Drop unnecessary columns and replace nan's with 0


In [7]:
X_cols = ['nameplate_capacity', 'GROSS LOAD (MW)', 'ERCOT Load, MW',
          'Total Wind Installed, MW', 'Total Wind Output, MW', 'Net Load Change (MW)',
          'All coal', 'Lignite', 'Subbituminous']
X_cluster_cols = ['cluster_{}'.format(cluster) for cluster in cluster_ids]
# X_cluster_free_cols  = ['cluster_{} free capacity'.format(cluster) for cluster in cluster_ids]

X_clean = X.loc[:,X_cols+X_cluster_cols]#+X_cluster_free_cols]
X_clean.fillna(0, inplace=True)

y_clean = y.loc[:,'Gen Change (MW)']
y_clean.fillna(0, inplace=True)

In [8]:
print X_clean.shape
print y_clean.shape


(473334, 15)
(473334,)

In [9]:
X_clean.head()


Out[9]:
nameplate_capacity GROSS LOAD (MW) ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Net Load Change (MW) All coal Lignite Subbituminous cluster_0 cluster_1 cluster_2 cluster_3 cluster_4 cluster_5
0 13263.0 670.0 30428.0 2790.0 1074.0 0.0 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
1 13263.0 492.0 30133.0 2790.0 922.6 -143.6 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
2 13263.0 461.0 29941.0 2790.0 849.2 -118.6 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
3 13263.0 417.0 29949.0 2790.0 1056.3 -199.1 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
4 13263.0 677.0 30248.0 2790.0 837.1 518.2 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0

Split into training, validation, testing


In [10]:
X_train = X_clean.loc[(X['Year']<2012),:]
y_train = y_clean.loc[(X['Year']<2012)]

X_va = X_clean.loc[X['Year'].isin([2012, 2013]),:]
y_va = y_clean.loc[X['Year'].isin([2012, 2013])]

X_test = X_clean.loc[X['Year']>2013,:]
y_test = y_clean.loc[X['Year']>2013]

Need scaled versions of the X data for some of the models


In [11]:
X_train_scaled = StandardScaler().fit_transform(X_train)
X_va_scaled = StandardScaler().fit_transform(X_va)
X_test_scaled = StandardScaler().fit_transform(X_test)

Check size of all arrays


In [21]:
print X_train_scaled.shape, y_train.shape
print X_va_scaled.shape, y_va.shape
print X_test_scaled.shape, y_test.shape


(262944, 16) (262944,)
(105264, 16) (105264,)
(105126, 16) (105126,)

Linear Regression (OLS)


In [59]:
lm = LinearRegression()
lm.fit(X_train_scaled, y_train)


Out[59]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [60]:
lm.score(X_va_scaled, y_va)


Out[60]:
-0.00072877220951217403

In [24]:
y_pr = lm.predict(X_va_scaled)

In [41]:
y_va.values.shape, y_pr.shape, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values.shape


Out[41]:
((105266,), (105264,), (105264,))

In [25]:
y_lm_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))
# y_lm_resids['y_pr'] = y_pr
# y_lm_resids['cluster'] = X.loc[:,'cluster']

In [26]:
y_lm_resids.head()


Out[26]:
Gen Change (MW) cluster y_pr
0 0.0 0 -61.176003
1 1.0 0 -20.166237
2 -1.0 0 -17.027565
3 0.0 0 -14.052956
4 0.0 0 21.281029

In [27]:
y_lm_resids.loc[:,'residuals'] = y_lm_resids.loc[:,'y_pr'] - y_lm_resids.loc[:,'Gen Change (MW)']

In [29]:
g = sns.FacetGrid(y_lm_resids, hue='cluster', col='cluster',
                  col_wrap=3)
g.map(plt.scatter, 'Gen Change (MW)', 'residuals')
g.add_legend()


Out[29]:
<seaborn.axisgrid.FacetGrid at 0x116244110>
Out[29]:
<seaborn.axisgrid.FacetGrid at 0x116244110>

XGBoost


In [12]:
from xgboost import XGBRegressor

Validation curve for n_estimators


In [13]:
param_values = [25, 100, 250, 350]
train_scores, valid_scores = validation_curve(XGBRegressor(), X_train, y_train, "n_estimators", param_values,
                                              n_jobs=-1, verbose=3)


[CV] n_estimators=25 .................................................
[CV] n_estimators=100 ................................................
[CV] n_estimators=250 ................................................
[CV] n_estimators=350 ................................................
[CV] ........................ n_estimators=25, score=0.436578 -   0.1s
[CV] n_estimators=25 .................................................
[CV] ........................ n_estimators=25, score=0.411833 -   0.1s
[CV] n_estimators=100 ................................................
[CV] ....................... n_estimators=100, score=0.489780 -   0.5s
[CV] n_estimators=250 ................................................
[Parallel(n_jobs=-1)]: Done   3 out of  12 | elapsed:   23.4s remaining:  1.2min
[CV] ....................... n_estimators=100, score=0.472213 -   0.4s
[CV] n_estimators=350 ................................................
[CV] ....................... n_estimators=250, score=0.523162 -   1.2s
[CV] n_estimators=25 .................................................
[CV] ........................ n_estimators=25, score=0.432120 -   0.2s
[CV] n_estimators=100 ................................................
[CV] ....................... n_estimators=350, score=0.528431 -   1.4s
[CV] n_estimators=250 ................................................
[CV] ....................... n_estimators=250, score=0.493032 -   1.0s
[CV] n_estimators=350 ................................................
[Parallel(n_jobs=-1)]: Done   8 out of  12 | elapsed:  1.4min remaining:   41.7s
[CV] ....................... n_estimators=100, score=0.483753 -   0.5s
[CV] ....................... n_estimators=350, score=0.492202 -   1.2s
[CV] ....................... n_estimators=250, score=0.466626 -   0.8s
[CV] ....................... n_estimators=350, score=0.446004 -   0.9s
[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:  2.3min finished

In [14]:
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)

In [15]:
plt.title("Validation Curve with XGBoost", size=15)
plt.xlabel("n_estimators", size=15)
plt.ylabel("Score", size=15)
plt.ylim(0.0, 1.1)
lw = 2
plt.plot(param_values, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_values, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(param_values, valid_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_values, valid_scores_mean - valid_scores_std,
                 valid_scores_mean + valid_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")
plt.savefig('XGBoost n_estimators validation curve.pdf', bbox_inches='tight')


Out[15]:
<matplotlib.text.Text at 0x103a17c90>
Out[15]:
<matplotlib.text.Text at 0x1150befd0>
Out[15]:
<matplotlib.text.Text at 0x103aa40d0>
Out[15]:
(0.0, 1.1)
Out[15]:
[<matplotlib.lines.Line2D at 0x1150a7110>]
Out[15]:
<matplotlib.collections.PolyCollection at 0x103a93a10>
Out[15]:
[<matplotlib.lines.Line2D at 0x103abfe50>]
Out[15]:
<matplotlib.collections.PolyCollection at 0x103a93950>
Out[15]:
<matplotlib.legend.Legend at 0x103adb8d0>

Validation curve for n_estimators


In [16]:
param_values = [1,3,5,9,15]
train_scores, valid_scores = validation_curve(XGBRegressor(n_estimators=250), X_train, y_train, "max_depth", param_values,
                                              n_jobs=-1, verbose=3)


[CV] max_depth=1 .....................................................
[CV] max_depth=3 .....................................................
[CV] max_depth=5 .....................................................
[CV] max_depth=9 .....................................................
[CV] ............................ max_depth=1, score=0.189391 -   0.7s
[CV] max_depth=15 ....................................................
[CV] ............................ max_depth=3, score=0.523162 -   0.9s
[CV] max_depth=1 .....................................................
[CV] ............................ max_depth=1, score=0.221639 -   0.4s
[CV] max_depth=3 .....................................................
[CV] ............................ max_depth=5, score=0.526783 -   1.5s
[CV] max_depth=5 .....................................................
[Parallel(n_jobs=-1)]: Done   4 out of  15 | elapsed:  1.6min remaining:  4.3min
[CV] ............................ max_depth=3, score=0.493032 -   0.8s
[CV] max_depth=9 .....................................................
[CV] ............................ max_depth=9, score=0.499568 -   3.8s
[CV] max_depth=15 ....................................................
[CV] ............................ max_depth=5, score=0.479847 -   1.5s
[CV] max_depth=1 .....................................................
[CV] ............................ max_depth=1, score=0.208100 -   0.4s
[CV] max_depth=3 .....................................................
[CV] ............................ max_depth=3, score=0.466626 -   1.0s
[CV] max_depth=5 .....................................................
[CV] ............................ max_depth=9, score=0.414931 -   4.1s
[CV] max_depth=9 .....................................................
[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:  4.7min remaining:  2.3min
[CV] ........................... max_depth=15, score=0.438524 -  10.4s
[CV] max_depth=15 ....................................................
[CV] ............................ max_depth=5, score=0.373544 -   1.7s
[CV] ............................ max_depth=9, score=0.334710 -   3.9s
[CV] ........................... max_depth=15, score=0.364308 -   9.7s
[CV] ........................... max_depth=15, score=0.343634 -   6.6s
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  8.0min finished

In [17]:
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)

In [18]:
plt.title("Validation Curve with XGBoost", size=15)
plt.xlabel("max_depth", size=15)
plt.ylabel("Score", size=15)
plt.ylim(0.0, 1.1)
lw = 2
plt.plot(param_values, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_values, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(param_values, valid_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_values, valid_scores_mean - valid_scores_std,
                 valid_scores_mean + valid_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")
plt.savefig('XGBoost max_depth validation curve.pdf', bbox_inches='tight')


Out[18]:
<matplotlib.text.Text at 0x115da7bd0>
Out[18]:
<matplotlib.text.Text at 0x115c17590>
Out[18]:
<matplotlib.text.Text at 0x115ad7810>
Out[18]:
(0.0, 1.1)
Out[18]:
[<matplotlib.lines.Line2D at 0x115c019d0>]
Out[18]:
<matplotlib.collections.PolyCollection at 0x115c14dd0>
Out[18]:
[<matplotlib.lines.Line2D at 0x117510890>]
Out[18]:
<matplotlib.collections.PolyCollection at 0x103a8e650>
Out[18]:
<matplotlib.legend.Legend at 0x117524650>

Validation curve for reg_alpha


In [68]:
param_values = np.logspace(-5, 1, 7)
train_scores, valid_scores = validation_curve(XGBRegressor(n_estimators=250), X_train, y_train, "reg_alpha", param_values,
                                              n_jobs=-1, verbose=3)


[CV] reg_alpha=1e-05 .................................................
[CV] reg_alpha=0.0001 ................................................
[CV] reg_alpha=0.001 .................................................
[CV] reg_alpha=0.01 ..................................................
[CV] ........................ reg_alpha=1e-05, score=0.523162 -   0.9s
[CV] reg_alpha=0.1 ...................................................
[CV] ........................ reg_alpha=0.001, score=0.523162 -   0.9s
[CV] reg_alpha=1.0 ...................................................
[CV] ......................... reg_alpha=0.01, score=0.523162 -   1.0s
[CV] reg_alpha=10.0 ..................................................
[CV] ....................... reg_alpha=0.0001, score=0.523162 -   1.0s
[CV] reg_alpha=1e-05 .................................................
[CV] .......................... reg_alpha=1.0, score=0.523163 -   1.2s
[CV] reg_alpha=0.0001 ................................................
[CV] ........................ reg_alpha=1e-05, score=0.493032 -   1.3s
[CV] reg_alpha=0.001 .................................................
[CV] .......................... reg_alpha=0.1, score=0.523162 -   1.2s
[CV] reg_alpha=0.01 ..................................................
[Parallel(n_jobs=-1)]: Done   6 out of  21 | elapsed:  1.7min remaining:  4.3min
[CV] ......................... reg_alpha=10.0, score=0.523173 -   1.0s
[CV] reg_alpha=0.1 ...................................................
[CV] ....................... reg_alpha=0.0001, score=0.493032 -   0.9s
[CV] reg_alpha=1.0 ...................................................
[CV] ......................... reg_alpha=0.01, score=0.493032 -   1.0s
[CV] reg_alpha=10.0 ..................................................
[CV] ........................ reg_alpha=0.001, score=0.493032 -   1.0s
[CV] reg_alpha=1e-05 .................................................
[CV] .......................... reg_alpha=0.1, score=0.493032 -   1.0s
[CV] reg_alpha=0.0001 ................................................
[CV] ......................... reg_alpha=10.0, score=0.493033 -   1.0s
[CV] reg_alpha=0.001 .................................................
[CV] .......................... reg_alpha=1.0, score=0.493032 -   1.0s
[CV] reg_alpha=0.01 ..................................................
[Parallel(n_jobs=-1)]: Done  14 out of  21 | elapsed:  3.5min remaining:  1.7min
[CV] ........................ reg_alpha=1e-05, score=0.466626 -   1.1s
[CV] reg_alpha=0.1 ...................................................
[CV] ....................... reg_alpha=0.0001, score=0.466626 -   1.2s
[CV] reg_alpha=1.0 ...................................................
[CV] ........................ reg_alpha=0.001, score=0.466626 -   1.0s
[CV] reg_alpha=10.0 ..................................................
[CV] ......................... reg_alpha=0.01, score=0.466626 -   1.0s
[CV] .......................... reg_alpha=1.0, score=0.466634 -   1.0s
[CV] .......................... reg_alpha=0.1, score=0.466627 -   1.1s
[CV] ......................... reg_alpha=10.0, score=0.466572 -   0.6s
[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:  4.8min finished

In [69]:
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)

In [71]:
plt.title("Validation Curve with XGBoost")
plt.xlabel("reg_alpha")
plt.ylabel("Score")
plt.ylim(0.0, 1.1)
lw = 2
plt.semilogx(param_values, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_values, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.semilogx(param_values, valid_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_values, valid_scores_mean - valid_scores_std,
                 valid_scores_mean + valid_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")


Out[71]:
<matplotlib.text.Text at 0x1180158d0>
Out[71]:
<matplotlib.text.Text at 0x1150d4d10>
Out[71]:
<matplotlib.text.Text at 0x117fe4ad0>
Out[71]:
(0.0, 1.1)
Out[71]:
[<matplotlib.lines.Line2D at 0x115bf2410>]
Out[71]:
<matplotlib.collections.PolyCollection at 0x117a32510>
Out[71]:
[<matplotlib.lines.Line2D at 0x118a08e90>]
Out[71]:
<matplotlib.collections.PolyCollection at 0x118a1a850>
Out[71]:
<matplotlib.legend.Legend at 0x118a1af90>

Learning curve for n_estimators=250 and max_depth=3


In [37]:
param_values = [1,3,5,9,15]
train_sizes, train_scores, valid_scores = learning_curve(XGBRegressor(n_estimators=250), X_train, y_train,
                                              n_jobs=-1, verbose=3)


[learning_curve] Training set sizes: [ 17529  56971  96412 135854 175296]
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV] ....................................... , score=0.499820 -   1.1s
[CV]  ................................................................
[CV] ....................................... , score=0.534452 -   0.8s
[CV]  ................................................................
[CV] ....................................... , score=0.063444 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.519911 -   0.9s
[CV]  ................................................................
[Parallel(n_jobs=-1)]: Done   4 out of  15 | elapsed:   26.6s remaining:  1.2min
[CV] ....................................... , score=0.379781 -   1.0s
[CV]  ................................................................
[CV] ....................................... , score=0.524756 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.456198 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.523162 -   0.9s
[CV]  ................................................................
[CV] ...................................... , score=-0.076204 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.040880 -   0.9s
[CV]  ................................................................
[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:  1.1min remaining:   34.5s
[CV] ....................................... , score=0.479948 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.307569 -   0.9s
[CV] ....................................... , score=0.493032 -   0.9s
[CV] ....................................... , score=0.327817 -   0.7s
[CV] ....................................... , score=0.466626 -   0.7s
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  1.8min finished

In [38]:
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)

In [39]:
plt.title("Learning Curve with XGBoost", size=15)
plt.xlabel("Sample size", size=15)
plt.ylabel("Score", size=15)
plt.ylim(0.0, 1.1)
lw = 2
plt.plot(train_sizes, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(train_sizes, valid_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(train_sizes, valid_scores_mean - valid_scores_std,
                 valid_scores_mean + valid_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")
plt.savefig('XGBoost learning curve.pdf', bbox_inches='tight')


Out[39]:
<matplotlib.text.Text at 0x118172190>
Out[39]:
<matplotlib.text.Text at 0x115b7a310>
Out[39]:
<matplotlib.text.Text at 0x116143190>
Out[39]:
(0.0, 1.1)
Out[39]:
[<matplotlib.lines.Line2D at 0x1150aa450>]
Out[39]:
<matplotlib.collections.PolyCollection at 0x115bd6b50>
Out[39]:
[<matplotlib.lines.Line2D at 0x117c4ead0>]
Out[39]:
<matplotlib.collections.PolyCollection at 0x11818dd90>
Out[39]:
<matplotlib.legend.Legend at 0x115a8ea10>

In [22]:
xgbr = XGBRegressor(n_estimators=250)

In [23]:
xgbr.fit(X_train, y_train)


Out[23]:
XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='reg:linear', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [25]:
y_pr = xgbr.predict(X_va)
y_xgbr_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))

In [27]:
y_xgbr_resids.loc[:,'residuals'] = y_xgbr_resids.loc[:,'y_pr'] - y_xgbr_resids.loc[:,'Gen Change (MW)']

In [ ]:
plt.scatter()

In [39]:
with sns.axes_style('whitegrid'):
    g = sns.FacetGrid(y_xgbr_resids, hue='cluster', col='cluster',
                      col_wrap=3)
    g.map(plt.scatter, 'y_pr', 'residuals', s=5, alpha=.3)
    g.set_xlabels(size=15)
    g.set_ylabels(size=15)
    plt.savefig('XGBR residuals.pdf')


Out[39]:
<seaborn.axisgrid.FacetGrid at 0x11d961750>
Out[39]:
<seaborn.axisgrid.FacetGrid at 0x11d961750>
Out[39]:
<seaborn.axisgrid.FacetGrid at 0x11d961750>

In [ ]:


In [ ]:


In [15]:
model = XGBRegressor()

In [16]:
subsample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
param_grid = dict(subsample=subsample)

In [18]:
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, verbose=3)

In [19]:
result = grid_search.fit(X_train_scaled, y_train)


Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] subsample=0.1 ...................................................
[CV] subsample=0.1 ...................................................
[CV] subsample=0.1 ...................................................
[CV] subsample=0.2 ...................................................
[CV] .......................... subsample=0.1, score=0.484954 -   0.4s
[CV] subsample=0.2 ...................................................
[CV] .......................... subsample=0.1, score=0.457812 -   0.4s
[CV] subsample=0.2 ...................................................
[CV] .......................... subsample=0.1, score=0.432054 -   0.4s
[CV] subsample=0.3 ...................................................
[CV] .......................... subsample=0.2, score=0.489922 -   0.4s
[CV] subsample=0.3 ...................................................
[CV] .......................... subsample=0.2, score=0.405688 -   0.3s
[CV] subsample=0.3 ...................................................
[CV] .......................... subsample=0.2, score=0.462352 -   0.3s
[CV] subsample=0.4 ...................................................
[CV] .......................... subsample=0.3, score=0.494859 -   0.3s
[CV] subsample=0.4 ...................................................
[CV] .......................... subsample=0.3, score=0.461317 -   0.3s
[CV] subsample=0.4 ...................................................
[CV] .......................... subsample=0.3, score=0.459056 -   0.3s
[CV] subsample=0.5 ...................................................
[CV] .......................... subsample=0.4, score=0.496053 -   0.3s
[CV] subsample=0.5 ...................................................
[CV] .......................... subsample=0.4, score=0.463809 -   0.3s
[CV] subsample=0.5 ...................................................
[CV] .......................... subsample=0.4, score=0.449886 -   0.3s
[CV] subsample=0.6 ...................................................
[CV] .......................... subsample=0.5, score=0.499908 -   0.5s
[CV] subsample=0.6 ...................................................
[CV] .......................... subsample=0.5, score=0.462957 -   0.3s
[CV] subsample=0.6 ...................................................
[CV] .......................... subsample=0.5, score=0.447146 -   0.3s
[CV] subsample=0.7 ...................................................
[CV] .......................... subsample=0.6, score=0.500445 -   0.5s
[CV] subsample=0.7 ...................................................
[CV] .......................... subsample=0.6, score=0.463235 -   0.3s
[CV] subsample=0.7 ...................................................
[CV] .......................... subsample=0.6, score=0.424141 -   0.4s
[CV] subsample=0.8 ...................................................
[CV] .......................... subsample=0.7, score=0.498927 -   0.3s
[CV] subsample=0.8 ...................................................
[CV] .......................... subsample=0.7, score=0.465330 -   0.3s
[CV] subsample=0.8 ...................................................
[CV] .......................... subsample=0.7, score=0.467672 -   0.3s
[CV] subsample=1.0 ...................................................
[CV] .......................... subsample=0.8, score=0.496447 -   0.3s
[CV] subsample=1.0 ...................................................
[CV] .......................... subsample=0.8, score=0.461902 -   0.4s
[CV] subsample=1.0 ...................................................
[CV] .......................... subsample=0.8, score=0.462582 -   0.5s
[CV] .......................... subsample=1.0, score=0.498483 -   0.3s
[CV] .......................... subsample=1.0, score=0.463337 -   0.2s
[CV] .......................... subsample=1.0, score=0.473275 -   0.2s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:  2.4min finished

In [20]:
result.cv_results_


Out[20]:
{'mean_fit_time': array([ 17.29887025,  17.27616898,  17.9309593 ,  20.16705402,
         21.57488139,  21.970059  ,  20.77520935,  20.17156092,  15.8298982 ]),
 'mean_score_time': array([ 0.37530239,  0.33570194,  0.31040764,  0.32522202,  0.35875694,
         0.3899494 ,  0.2991906 ,  0.38670103,  0.22519708]),
 'mean_test_score': array([ 0.4582734 ,  0.45265404,  0.47174388,  0.46991596,  0.47000372,
         0.46260688,  0.47730961,  0.47364352,  0.47836477]),
 'mean_train_score': array([ 0.52659795,  0.53019258,  0.53201711,  0.53158389,  0.53369591,
         0.53187077,  0.53222026,  0.53103841,  0.53230937]),
 'param_subsample': masked_array(data = [0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 1.0],
              mask = [False False False False False False False False False],
        fill_value = ?),
 'params': ({'subsample': 0.1},
  {'subsample': 0.2},
  {'subsample': 0.3},
  {'subsample': 0.4},
  {'subsample': 0.5},
  {'subsample': 0.6},
  {'subsample': 0.7},
  {'subsample': 0.8},
  {'subsample': 1.0}),
 'rank_test_score': array([8, 9, 4, 6, 5, 7, 2, 3, 1], dtype=int32),
 'split0_test_score': array([ 0.48495396,  0.48992193,  0.49485867,  0.49605334,  0.49990795,
         0.50044486,  0.49892667,  0.49644705,  0.49848254]),
 'split0_train_score': array([ 0.52960193,  0.53432657,  0.53327846,  0.53291754,  0.53559476,
         0.53532652,  0.53562279,  0.5357719 ,  0.53546957]),
 'split1_test_score': array([ 0.45781181,  0.46235239,  0.46131677,  0.46380884,  0.46295738,
         0.46323497,  0.46533023,  0.4619017 ,  0.46333653]),
 'split1_train_score': array([ 0.54735279,  0.54738404,  0.54435946,  0.54753438,  0.55000352,
         0.54396756,  0.54661591,  0.54567355,  0.54748602]),
 'split2_test_score': array([ 0.43205443,  0.4056878 ,  0.4590562 ,  0.44988569,  0.44714583,
         0.4241408 ,  0.46767192,  0.4625818 ,  0.47327525]),
 'split2_train_score': array([ 0.50283914,  0.50886712,  0.5184134 ,  0.51429974,  0.51548944,
         0.51631822,  0.51442209,  0.51166979,  0.51397251]),
 'std_fit_time': array([ 0.03468956,  1.56806389,  0.17731887,  0.08404964,  0.17640389,
         0.37043793,  0.28607418,  0.18931806,  0.82959389]),
 'std_score_time': array([ 0.01687459,  0.03872609,  0.01886037,  0.01837927,  0.07238271,
         0.07224925,  0.01489875,  0.06787804,  0.02795523]),
 'std_test_score': array([ 0.02159861,  0.03506556,  0.01637066,  0.01933625,  0.0221088 ,
         0.03115417,  0.01531544,  0.01612693,  0.01479274]),
 'std_train_score': array([ 0.01829634,  0.01599387,  0.01062992,  0.01360072,  0.01415414,
         0.01154927,  0.01336147,  0.01427978,  0.01386312])}

In [ ]:


In [22]:
model = XGBRegressor()

In [23]:
colsample_bytree = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
param_grid = dict(colsample_bytree=colsample_bytree)

In [24]:
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, verbose=3)

In [25]:
result = grid_search.fit(X_train_scaled, y_train)


Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] colsample_bytree=0.1 ............................................
[CV] colsample_bytree=0.1 ............................................
[CV] colsample_bytree=0.1 ............................................
[CV] colsample_bytree=0.2 ............................................
[CV] ................... colsample_bytree=0.1, score=0.164646 -   0.2s
[CV] colsample_bytree=0.2 ............................................
[CV] ................... colsample_bytree=0.1, score=0.163248 -   0.2s
[CV] colsample_bytree=0.2 ............................................
[CV] ................... colsample_bytree=0.1, score=0.149747 -   0.2s
[CV] colsample_bytree=0.3 ............................................
[CV] ................... colsample_bytree=0.2, score=0.270653 -   0.3s
[CV] colsample_bytree=0.3 ............................................
[CV] ................... colsample_bytree=0.2, score=0.277469 -   0.4s
[CV] colsample_bytree=0.3 ............................................
[CV] ................... colsample_bytree=0.2, score=0.241283 -   0.4s
[CV] colsample_bytree=0.4 ............................................
[CV] ................... colsample_bytree=0.3, score=0.361171 -   0.3s
[CV] colsample_bytree=0.4 ............................................
[CV] ................... colsample_bytree=0.3, score=0.343253 -   0.3s
[CV] colsample_bytree=0.4 ............................................
[CV] ................... colsample_bytree=0.3, score=0.332786 -   0.3s
[CV] colsample_bytree=0.5 ............................................
[CV] ................... colsample_bytree=0.4, score=0.432744 -   0.3s
[CV] colsample_bytree=0.5 ............................................
[CV] ................... colsample_bytree=0.4, score=0.404372 -   0.4s
[CV] colsample_bytree=0.5 ............................................
[CV] ................... colsample_bytree=0.4, score=0.394815 -   0.3s
[CV] colsample_bytree=0.6 ............................................
[CV] ................... colsample_bytree=0.5, score=0.461426 -   0.3s
[CV] colsample_bytree=0.6 ............................................
[CV] ................... colsample_bytree=0.5, score=0.426662 -   0.4s
[CV] colsample_bytree=0.6 ............................................
[CV] ................... colsample_bytree=0.5, score=0.428052 -   0.3s
[CV] colsample_bytree=0.7 ............................................
[CV] ................... colsample_bytree=0.6, score=0.484498 -   0.4s
[CV] colsample_bytree=0.7 ............................................
[CV] ................... colsample_bytree=0.6, score=0.435976 -   0.4s
[CV] colsample_bytree=0.7 ............................................
[CV] ................... colsample_bytree=0.6, score=0.432028 -   0.4s
[CV] colsample_bytree=0.8 ............................................
[CV] ................... colsample_bytree=0.7, score=0.481932 -   0.4s
[CV] colsample_bytree=0.8 ............................................
[CV] ................... colsample_bytree=0.7, score=0.449476 -   0.3s
[CV] colsample_bytree=0.8 ............................................
[CV] ................... colsample_bytree=0.7, score=0.445217 -   0.4s
[CV] colsample_bytree=1.0 ............................................
[CV] ................... colsample_bytree=0.8, score=0.477589 -   0.3s
[CV] colsample_bytree=1.0 ............................................
[CV] ................... colsample_bytree=0.8, score=0.455795 -   0.3s
[CV] colsample_bytree=1.0 ............................................
[CV] ................... colsample_bytree=0.8, score=0.440667 -   0.3s
[CV] ................... colsample_bytree=1.0, score=0.498483 -   0.3s
[CV] ................... colsample_bytree=1.0, score=0.463337 -   0.2s
[CV] ................... colsample_bytree=1.0, score=0.473275 -   0.2s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:  1.5min finished

In [26]:
result.cv_results_


Out[26]:
{'mean_fit_time': array([  4.56124123,   6.81195768,   8.10874836,   9.60875932,
         12.16170565,  14.600293  ,  17.21607868,  16.53451133,  15.66608071]),
 'mean_score_time': array([ 0.22303112,  0.37348668,  0.32521661,  0.35028831,  0.35460496,
         0.38649503,  0.35863702,  0.32337038,  0.23110159]),
 'mean_test_score': array([ 0.15921352,  0.26313523,  0.3457365 ,  0.41064376,  0.43871338,
         0.45083418,  0.45887504,  0.45801704,  0.47836477]),
 'mean_train_score': array([ 0.16892206,  0.31116065,  0.39683733,  0.46549971,  0.49976069,
         0.51085103,  0.51879221,  0.52348525,  0.53230937]),
 'param_colsample_bytree': masked_array(data = [0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 1.0],
              mask = [False False False False False False False False False],
        fill_value = ?),
 'params': ({'colsample_bytree': 0.1},
  {'colsample_bytree': 0.2},
  {'colsample_bytree': 0.3},
  {'colsample_bytree': 0.4},
  {'colsample_bytree': 0.5},
  {'colsample_bytree': 0.6},
  {'colsample_bytree': 0.7},
  {'colsample_bytree': 0.8},
  {'colsample_bytree': 1.0}),
 'rank_test_score': array([9, 8, 7, 6, 5, 4, 2, 3, 1], dtype=int32),
 'split0_test_score': array([ 0.16464645,  0.27065299,  0.36117083,  0.43274381,  0.46142571,
         0.48449781,  0.48193165,  0.47758942,  0.49848254]),
 'split0_train_score': array([ 0.17610519,  0.30573853,  0.38943222,  0.4613343 ,  0.49598943,
         0.51501353,  0.5193987 ,  0.51984055,  0.53546957]),
 'split1_test_score': array([ 0.16324756,  0.2774694 ,  0.34325273,  0.40437222,  0.42666201,
         0.43597646,  0.44947615,  0.45579473,  0.46333653]),
 'split1_train_score': array([ 0.17075628,  0.3177841 ,  0.40282043,  0.48186734,  0.51148631,
         0.51749275,  0.53201472,  0.53787768,  0.54748602]),
 'split2_test_score': array([ 0.14974656,  0.24128329,  0.33278595,  0.39481524,  0.42805241,
         0.43202828,  0.44521732,  0.44066697,  0.47327525]),
 'split2_train_score': array([ 0.15990471,  0.30995931,  0.39825933,  0.4532975 ,  0.49180633,
         0.50004681,  0.50496321,  0.51273751,  0.51397251]),
 'std_fit_time': array([ 0.04071576,  0.34999139,  0.31733372,  0.03549674,  0.1198805 ,
         0.91900495,  0.88420138,  0.37777915,  0.98321903]),
 'std_score_time': array([ 0.00639099,  0.05793678,  0.00767493,  0.0210572 ,  0.03189375,
         0.02170566,  0.03632257,  0.00240637,  0.01842607]),
 'std_test_score': array([ 0.00671847,  0.01570024,  0.01172041,  0.0161068 ,  0.01607007,
         0.02385829,  0.01639593,  0.01515522,  0.01479274]),
 'std_train_score': array([ 0.00673979,  0.00499042,  0.00555743,  0.01202974,  0.00846531,
         0.00770649,  0.01105205,  0.01058206,  0.01386312])}

In [ ]:


In [27]:
model = XGBRegressor()

In [28]:
colsample_bylevel = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
param_grid = dict(colsample_bylevel=colsample_bylevel)

In [29]:
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, verbose=3)

In [30]:
result = grid_search.fit(X_train_scaled, y_train)


Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] colsample_bylevel=0.1 ...........................................
[CV] colsample_bylevel=0.1 ...........................................
[CV] colsample_bylevel=0.1 ...........................................
[CV] colsample_bylevel=0.2 ...........................................
[CV] .................. colsample_bylevel=0.1, score=0.236454 -   0.2s
[CV] colsample_bylevel=0.2 ...........................................
[CV] .................. colsample_bylevel=0.1, score=0.230266 -   0.3s
[CV] colsample_bylevel=0.2 ...........................................
[CV] .................. colsample_bylevel=0.1, score=0.221978 -   0.3s
[CV] colsample_bylevel=0.3 ...........................................
[CV] .................. colsample_bylevel=0.2, score=0.389605 -   0.3s
[CV] colsample_bylevel=0.3 ...........................................
[CV] .................. colsample_bylevel=0.2, score=0.357623 -   0.3s
[CV] .................. colsample_bylevel=0.2, score=0.361486 -   0.3s
[CV] colsample_bylevel=0.3 ...........................................
[CV] colsample_bylevel=0.4 ...........................................
[CV] .................. colsample_bylevel=0.3, score=0.443983 -   0.4s
[CV] colsample_bylevel=0.4 ...........................................
[CV] .................. colsample_bylevel=0.3, score=0.396302 -   0.3s
[CV] colsample_bylevel=0.4 ...........................................
[CV] .................. colsample_bylevel=0.3, score=0.425530 -   0.3s
[CV] colsample_bylevel=0.5 ...........................................
[CV] .................. colsample_bylevel=0.4, score=0.433810 -   0.4s
[CV] colsample_bylevel=0.5 ...........................................
[CV] .................. colsample_bylevel=0.4, score=0.429944 -   0.6s
[CV] colsample_bylevel=0.5 ...........................................
[CV] .................. colsample_bylevel=0.4, score=0.450593 -   0.4s
[CV] colsample_bylevel=0.6 ...........................................
[CV] .................. colsample_bylevel=0.5, score=0.461982 -   0.3s
[CV] colsample_bylevel=0.6 ...........................................
[CV] .................. colsample_bylevel=0.5, score=0.436827 -   0.3s
[CV] colsample_bylevel=0.6 ...........................................
[CV] .................. colsample_bylevel=0.5, score=0.443715 -   0.4s
[CV] colsample_bylevel=0.7 ...........................................
[CV] .................. colsample_bylevel=0.6, score=0.466433 -   0.6s
[CV] colsample_bylevel=0.7 ...........................................
[CV] .................. colsample_bylevel=0.6, score=0.438975 -   0.3s
[CV] colsample_bylevel=0.7 ...........................................
[CV] .................. colsample_bylevel=0.6, score=0.445031 -   0.6s
[CV] colsample_bylevel=0.8 ...........................................
[CV] .................. colsample_bylevel=0.7, score=0.483877 -   0.3s
[CV] colsample_bylevel=0.8 ...........................................
[CV] .................. colsample_bylevel=0.7, score=0.455740 -   0.5s
[CV] colsample_bylevel=0.8 ...........................................
[CV] .................. colsample_bylevel=0.7, score=0.430706 -   0.3s
[CV] colsample_bylevel=1.0 ...........................................
[CV] .................. colsample_bylevel=0.8, score=0.484522 -   0.3s
[CV] colsample_bylevel=1.0 ...........................................
[CV] .................. colsample_bylevel=0.8, score=0.447451 -   0.3s
[CV] colsample_bylevel=1.0 ...........................................
[CV] .................. colsample_bylevel=0.8, score=0.482551 -   0.6s
[CV] .................. colsample_bylevel=1.0, score=0.498483 -   0.3s
[CV] .................. colsample_bylevel=1.0, score=0.463337 -   0.4s
[CV] .................. colsample_bylevel=1.0, score=0.473275 -   0.2s
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:  1.6min finished

In [31]:
result.cv_results_


Out[31]:
{'mean_fit_time': array([  5.58680964,   7.96834898,   9.22394466,  11.17805529,
         14.44551174,  14.79537845,  16.52017864,  16.49024232,  16.76054573]),
 'mean_score_time': array([ 0.27904503,  0.30942973,  0.3542637 ,  0.47181129,  0.36044025,
         0.49709463,  0.39499768,  0.40493266,  0.27615031]),
 'mean_test_score': array([ 0.22956596,  0.36957139,  0.42193826,  0.43811555,  0.44750804,
         0.45014643,  0.45677407,  0.47150803,  0.47836477]),
 'mean_train_score': array([ 0.2551972 ,  0.4250602 ,  0.46792848,  0.49801298,  0.50912125,
         0.51348689,  0.52657848,  0.52464538,  0.53230937]),
 'param_colsample_bylevel': masked_array(data = [0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 1.0],
              mask = [False False False False False False False False False],
        fill_value = ?),
 'params': ({'colsample_bylevel': 0.1},
  {'colsample_bylevel': 0.2},
  {'colsample_bylevel': 0.3},
  {'colsample_bylevel': 0.4},
  {'colsample_bylevel': 0.5},
  {'colsample_bylevel': 0.6},
  {'colsample_bylevel': 0.7},
  {'colsample_bylevel': 0.8},
  {'colsample_bylevel': 1.0}),
 'rank_test_score': array([9, 8, 7, 6, 5, 4, 3, 2, 1], dtype=int32),
 'split0_test_score': array([ 0.23645415,  0.38960514,  0.44398314,  0.43380956,  0.46198182,
         0.46643296,  0.48387676,  0.48452161,  0.49848254]),
 'split0_train_score': array([ 0.25873623,  0.43174816,  0.47813957,  0.48825062,  0.50763914,
         0.51478784,  0.52760406,  0.52697517,  0.53546957]),
 'split1_test_score': array([ 0.23026568,  0.35762309,  0.39630159,  0.42994417,  0.43682736,
         0.43897526,  0.45573985,  0.44745105,  0.46333653]),
 'split1_train_score': array([ 0.26557786,  0.43169105,  0.47092311,  0.51763234,  0.52470075,
         0.52806783,  0.54517381,  0.53490598,  0.54748602]),
 'split2_test_score': array([ 0.22197804,  0.36148594,  0.42553005,  0.45059293,  0.44371494,
         0.44503108,  0.43070559,  0.48255143,  0.47327525]),
 'split2_train_score': array([ 0.2412775 ,  0.41174138,  0.45472278,  0.48815597,  0.49502386,
         0.497605  ,  0.50695756,  0.512055  ,  0.51397251]),
 'std_fit_time': array([ 0.01258047,  0.33746784,  0.59310873,  0.53371143,  0.60921623,
         0.19267317,  0.29740686,  0.5224749 ,  0.81394888]),
 'std_score_time': array([ 0.0229224 ,  0.02305377,  0.05598837,  0.11034201,  0.04290427,
         0.11777629,  0.10116711,  0.11118439,  0.06571458]),
 'std_test_score': array([ 0.00593052,  0.01425351,  0.0196309 ,  0.00896285,  0.01061374,
         0.01177869,  0.02171936,  0.01702986,  0.01479274]),
 'std_train_score': array([ 0.01023134,  0.00941785,  0.00979157,  0.01387304,  0.01216078,
         0.01247037,  0.01561856,  0.00947321,  0.01386312])}

In [ ]:


In [32]:
model = XGBRegressor()

In [33]:
max_depth = [3, 6, 9]
n_estimators = [100, 250, 500]
reg_alpha = [1e-5, 1e-3, 0.1]
reg_lambda = [1e-3, 0.1, 1]
param_grid = dict(max_depth=max_depth, n_estimators=n_estimators,
                  reg_alpha=reg_alpha, reg_lambda=reg_lambda)

In [35]:
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, verbose=2)

In [36]:
result = grid_search.fit(X_train_scaled, y_train)


Fitting 3 folds for each of 81 candidates, totalling 243 fits
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.6s
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.7s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.3s
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.4s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.5s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.5s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.5s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.4s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.4s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.4s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.4s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.3s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.3s
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.3s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.7s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  4.1min
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   1.0s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.7s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.7s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.8s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   1.9s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   1.9s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   2.2s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   2.2s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   2.6s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   3.5s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   2.4s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   2.7s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   2.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   2.6s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   1.9s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   2.1s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   1.9s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   2.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   2.1s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   1.1s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   0.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   0.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   1.3s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   2.5s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   1.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   3.0s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   1.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   1.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   2.2s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   1.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   3.1s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   2.3s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   2.1s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   2.1s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   2.7s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   1.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   2.2s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   2.3s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   1.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   2.2s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   2.2s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   1.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   2.1s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   2.1s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   1.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   1.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   2.6s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   2.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   3.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   2.2s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   5.5s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   4.2s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   5.4s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   4.7s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   4.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   4.7s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   5.3s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   4.3s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -  12.7s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   6.3s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   7.0s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   7.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   5.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   4.5s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   4.9s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   4.6s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   5.4s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   4.5s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   4.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 56.3min
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   4.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   4.3s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   4.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   4.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   4.5s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   4.8s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   1.4s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   4.4s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   5.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   1.5s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   2.1s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   1.4s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   1.4s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   2.0s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   1.1s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   1.1s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   1.3s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   1.1s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   1.1s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   1.2s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   4.1s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   4.1s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   3.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   4.2s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   3.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   4.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   3.7s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   4.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   5.1s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -  11.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -  11.8s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -  14.8s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -  11.7s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -  12.9s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -  11.8s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -  10.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -  14.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -  16.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -  14.7s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -  19.2s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -  17.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -  15.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -  18.7s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -  21.7s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 -  19.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 -  17.6s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 -  21.5s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -  16.9s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -  17.1s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -  17.2s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -  13.5s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -  19.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -  21.3s
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 -  10.8s
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   9.0s
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   8.6s
[Parallel(n_jobs=-1)]: Done 243 out of 243 | elapsed: 134.4min finished

In [37]:
import cPickle as pickle

In [38]:
pickle.dump((grid_search, result), open( "xgb gridsearch and results.pkl", "wb" ) )

In [39]:
result.cv_results_


Out[39]:
{'mean_fit_time': array([  22.47146734,   19.85001024,   18.88750442,   19.05822746,
          18.78651031,   20.00944002,   21.43022664,   23.52011832,
          18.54295969,   45.00260838,   45.99241924,   47.29698006,
          47.67425768,   43.60707927,   44.10489106,   44.84957798,
          43.919686  ,   43.47668465,   86.81721862,   86.67804432,
          91.35499477,  102.640498  ,   95.7173396 ,   96.57667494,
         102.3701551 ,   95.73338199,   94.18298801,   39.80882366,
          43.65931567,   42.42765705,   38.12971965,   38.45218102,
          38.24791106,   37.63723095,   38.28630408,   40.42925262,
         105.61946464,  108.43322961,  107.24544835,  107.80645768,
         103.35916114,   97.95467575,   97.98183091,   99.43998098,
         107.10949628,  202.54374401,  194.41516336,  204.30298233,
         313.67145197,  223.47959693,  188.01607633,  189.07257533,
         183.10826127,  187.89160872,   62.74625071,   61.75910044,
          62.5911816 ,   63.4838237 ,   64.60604366,   59.78916963,
          60.54867752,   56.1776793 ,   58.63835327,  139.06640402,
         135.61382731,  134.05349803,  134.35580373,  134.53863064,
         135.34126663,  137.74004738,  144.99287208,  143.55949601,
         298.56969301,  310.15868433,  327.98374923,  380.83669599,
         389.13507271,  444.9323643 ,  465.68078033,  399.23097968,
         318.15350103]),
 'mean_score_time': array([  0.64510202,   0.42531403,   0.36831458,   0.47510425,
          0.334831  ,   0.32499361,   0.36263768,   0.40617736,
          0.33033705,   0.83359599,   0.85212811,   0.82079935,
          0.87318301,   0.79149508,   0.81061602,   0.85486698,
          0.83743834,   0.79407867,   1.96986628,   1.9699014 ,
          2.13690694,   2.23936931,   2.51991105,   2.35435534,
          2.4416492 ,   2.04840358,   2.08888197,   0.61286894,
          0.816492  ,   0.64597535,   0.64961934,   0.62140663,
          0.62591966,   0.62220828,   0.62799899,   0.91073902,
          2.45118427,   1.96037833,   2.40257279,   2.28467162,
          2.11549822,   2.02359533,   1.99199939,   2.09874129,
          2.43012071,   5.05041567,   4.47006869,   7.42688624,
          6.80764429,   4.78142881,   4.81631398,   4.48680139,
          4.20406866,   4.8163929 ,   1.28546071,   1.23436419,
          1.23773265,   1.26887202,   1.57145294,   1.5407354 ,
          1.21871765,   1.18149567,   1.13583446,   3.95044796,
          3.75426229,   3.82978996,   3.87594604,   3.78032033,
          3.78483701,   3.86818258,   3.87522388,   4.31407698,
         12.73754597,  12.12339306,  13.60442074,  17.01043169,
         18.49543595,  19.34253867,  17.0617253 ,  17.94042699,   9.46461463]),
 'mean_test_score': array([ 0.47476509,  0.47481771,  0.47836477,  0.47476509,  0.47481771,
         0.47836477,  0.47476534,  0.47481795,  0.47836479,  0.47250483,
         0.47744062,  0.48390166,  0.47250484,  0.47744063,  0.48390166,
         0.47895337,  0.47744106,  0.48390186,  0.45305078,  0.47485295,
         0.47797076,  0.45305083,  0.47485296,  0.47797077,  0.4681252 ,
         0.47485382,  0.4779718 ,  0.43881415,  0.46167842,  0.48331064,
         0.43881423,  0.46167846,  0.48331065,  0.44369675,  0.46168283,
         0.48331197,  0.35605434,  0.38107234,  0.44572734,  0.35605463,
         0.38107256,  0.4457274 ,  0.37149349,  0.38264133,  0.4445941 ,
         0.26132655,  0.26890554,  0.3989032 ,  0.261327  ,  0.26890589,
         0.3989033 ,  0.30566842,  0.29996746,  0.39139257,  0.36714381,
         0.39205064,  0.40212936,  0.36714405,  0.39206942,  0.40212902,
         0.37245284,  0.38867676,  0.40326716,  0.27551112,  0.31712893,
         0.35460872,  0.27551179,  0.32270191,  0.35407602,  0.28001021,
         0.30735607,  0.35042325,  0.21114584,  0.23148839,  0.31036296,
         0.20403695,  0.23413726,  0.30455753,  0.20892431,  0.22948024,
         0.30888958]),
 'mean_train_score': array([ 0.53234346,  0.53233605,  0.53230937,  0.53234346,  0.53233605,
         0.53230937,  0.53234342,  0.53233601,  0.53230933,  0.57047365,
         0.57112058,  0.57142654,  0.57047365,  0.57112057,  0.57142654,
         0.57053486,  0.57112051,  0.57142648,  0.59332666,  0.59350791,
         0.59318737,  0.59332666,  0.5935079 ,  0.59318737,  0.59334044,
         0.59350778,  0.59318726,  0.63510804,  0.6338578 ,  0.63299249,
         0.63510803,  0.6338578 ,  0.63299248,  0.63527208,  0.6338571 ,
         0.63299202,  0.66823407,  0.66803645,  0.6642309 ,  0.66823405,
         0.66803643,  0.66423089,  0.66835504,  0.66824809,  0.66412709,
         0.70165232,  0.70107866,  0.69507789,  0.70165228,  0.70107863,
         0.69507787,  0.7013474 ,  0.70088766,  0.69564498,  0.73353895,
         0.73017286,  0.72482406,  0.7335389 ,  0.73017308,  0.72482403,
         0.73325362,  0.72999212,  0.72706969,  0.77799831,  0.77652584,
         0.76795271,  0.7779982 ,  0.77764112,  0.76779032,  0.78021514,
         0.77756907,  0.76924628,  0.82742815,  0.82474962,  0.81420268,
         0.82730749,  0.82604533,  0.8141921 ,  0.82947992,  0.8257738 ,
         0.81282365]),
 'param_max_depth': masked_array(data = [3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 6 6 6 6 6 6 6 6 6 6
  6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
  9 9 9 9 9 9 9],
              mask = [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False],
        fill_value = ?),
 'param_n_estimators': masked_array(data = [100 100 100 100 100 100 100 100 100 250 250 250 250 250 250 250 250 250
  500 500 500 500 500 500 500 500 500 100 100 100 100 100 100 100 100 100
  250 250 250 250 250 250 250 250 250 500 500 500 500 500 500 500 500 500
  100 100 100 100 100 100 100 100 100 250 250 250 250 250 250 250 250 250
  500 500 500 500 500 500 500 500 500],
              mask = [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False],
        fill_value = ?),
 'param_reg_alpha': masked_array(data = [1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001
  0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1
  1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001
  0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1
  1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001
  0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1],
              mask = [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False],
        fill_value = ?),
 'param_reg_lambda': masked_array(data = [0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1
  0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1
  0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1
  0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1
  0.001 0.1 1 0.001 0.1 1 0.001 0.1 1],
              mask = [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False],
        fill_value = ?),
 'params': ({'max_depth': 3,
   'n_estimators': 100,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 100,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 250,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 250,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 500,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 500,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 100,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 100,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 250,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 250,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 500,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 500,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 100,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 100,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 250,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 250,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 500,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 500,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 1}),
 'rank_test_score': array([25, 22, 10, 24, 21,  9, 23, 20,  8, 27, 16,  3, 26, 15,  2,  7, 14,
         1, 33, 19, 13, 32, 18, 12, 28, 17, 11, 39, 31,  6, 38, 30,  5, 37,
        29,  4, 57, 51, 35, 56, 50, 34, 53, 49, 36, 75, 73, 44, 74, 72, 43,
        66, 68, 47, 55, 46, 41, 54, 45, 42, 52, 48, 40, 71, 62, 58, 70, 61,
        59, 69, 65, 60, 79, 77, 63, 81, 76, 67, 80, 78, 64], dtype=int32),
 'split0_test_score': array([ 0.49474562,  0.4947435 ,  0.49848254,  0.49474562,  0.4947435 ,
         0.49848254,  0.49474561,  0.49474349,  0.49848253,  0.51155364,
         0.51519969,  0.51491556,  0.51155365,  0.5151997 ,  0.51491556,
         0.51155414,  0.51520025,  0.51491582,  0.51140193,  0.51334442,
         0.51074804,  0.51140195,  0.51334443,  0.51074807,  0.51140374,
         0.51334641,  0.51075093,  0.49165984,  0.49945349,  0.50725681,
         0.49165994,  0.49945353,  0.50725682,  0.49167024,  0.49945839,
         0.50725825,  0.45872446,  0.45993189,  0.48671092,  0.4587247 ,
         0.45993212,  0.48671099,  0.4587478 ,  0.45995494,  0.48671808,
         0.36925464,  0.39665942,  0.45589877,  0.36925507,  0.39665972,
         0.4558989 ,  0.39324021,  0.41973453,  0.45148489,  0.43334863,
         0.44487705,  0.44699823,  0.43334879,  0.44487712,  0.44699828,
         0.43662048,  0.45247234,  0.45897372,  0.33860778,  0.39140448,
         0.42587786,  0.33860837,  0.39157033,  0.4231349 ,  0.37165177,
         0.39551687,  0.43416741,  0.28393352,  0.2923014 ,  0.40590884,
         0.26260459,  0.29192227,  0.38489991,  0.29430021,  0.33147511,
         0.39738151]),
 'split0_train_score': array([ 0.53443315,  0.53442782,  0.53546957,  0.53443315,  0.53442781,
         0.53546957,  0.53443311,  0.53442778,  0.53546954,  0.57421078,
         0.57615405,  0.57593423,  0.57421078,  0.57615405,  0.57593423,
         0.57421072,  0.576154  ,  0.57593419,  0.59912142,  0.59948991,
         0.59893569,  0.59912141,  0.59948991,  0.59893569,  0.59912131,
         0.59948982,  0.5989356 ,  0.64202307,  0.64048347,  0.63969532,
         0.64202307,  0.64048346,  0.63969532,  0.6420224 ,  0.64048288,
         0.63969489,  0.6761729 ,  0.6750221 ,  0.67121003,  0.67617287,
         0.67502208,  0.67121002,  0.67617103,  0.67502044,  0.67120888,
         0.70817042,  0.70789133,  0.70282594,  0.70817038,  0.7078913 ,
         0.70282591,  0.70872647,  0.70776006,  0.70405758,  0.7386827 ,
         0.73612474,  0.7316606 ,  0.73868265,  0.73612469,  0.73166057,
         0.73821941,  0.7360215 ,  0.73365953,  0.78451879,  0.78412555,
         0.77380276,  0.78451868,  0.78434793,  0.77427044,  0.78843261,
         0.78302777,  0.77361169,  0.83273399,  0.83139538,  0.8192197 ,
         0.83237237,  0.83112358,  0.81774209,  0.83618019,  0.82865958,
         0.81730153]),
 'split1_test_score': array([ 0.46470655,  0.46470555,  0.46333653,  0.46470655,  0.46470555,
         0.46333653,  0.46470654,  0.46470553,  0.46333652,  0.48478241,
         0.48386656,  0.48454846,  0.48478241,  0.48386656,  0.48454846,
         0.4847824 ,  0.48386655,  0.48454846,  0.48402567,  0.48087675,
         0.48347694,  0.48402567,  0.48087675,  0.48347694,  0.48402571,
         0.48087679,  0.48347698,  0.48519849,  0.48725037,  0.48585777,
         0.48519849,  0.48725038,  0.48585777,  0.48519893,  0.48725115,
         0.48585804,  0.46137887,  0.46710904,  0.46621289,  0.46137889,
         0.46710906,  0.4662129 ,  0.45924851,  0.46864208,  0.46621381,
         0.4339561 ,  0.44320544,  0.4411906 ,  0.43395615,  0.44320549,
         0.44119063,  0.43586745,  0.43973808,  0.44418094,  0.43863514,
         0.44714133,  0.45598225,  0.43863526,  0.44714138,  0.45598106,
         0.43312991,  0.44692783,  0.45469225,  0.41004252,  0.41883592,
         0.4313145 ,  0.41004272,  0.41883603,  0.43245912,  0.40508357,
         0.41363066,  0.43391615,  0.38284684,  0.39217874,  0.40497338,
         0.3828471 ,  0.39271342,  0.40856566,  0.38132134,  0.39065795,
         0.41280208]),
 'split1_train_score': array([ 0.54778305,  0.54777428,  0.54748602,  0.54778305,  0.54777428,
         0.54748602,  0.54778302,  0.54777425,  0.54748599,  0.58982963,
         0.5894682 ,  0.59121695,  0.58982963,  0.5894682 ,  0.59121695,
         0.58982957,  0.58946814,  0.5912169 ,  0.61638981,  0.61647412,
         0.61606183,  0.6163898 ,  0.61647412,  0.61606182,  0.61638968,
         0.616474  ,  0.61606172,  0.65788031,  0.65564431,  0.65611787,
         0.6578803 ,  0.6556443 ,  0.65611786,  0.65787959,  0.65564363,
         0.65611746,  0.6912664 ,  0.69166357,  0.68768399,  0.69126638,
         0.69166355,  0.68768397,  0.69057456,  0.69154044,  0.68768289,
         0.72473959,  0.72251788,  0.71870908,  0.72473955,  0.72251785,
         0.71870906,  0.72355782,  0.72394925,  0.71750637,  0.75349928,
         0.75059864,  0.74411467,  0.75349922,  0.75059859,  0.74411464,
         0.75337096,  0.74877678,  0.7494593 ,  0.79506945,  0.79360631,
         0.78468385,  0.79506936,  0.79360623,  0.78372906,  0.79546223,
         0.79504327,  0.79013929,  0.84016911,  0.83991614,  0.82806248,
         0.84016893,  0.83964441,  0.82950846,  0.84232836,  0.84010113,
         0.83015236]),
 'split2_test_score': array([ 0.46484309,  0.46500409,  0.47327525,  0.46484309,  0.46500409,
         0.47327525,  0.46484387,  0.46500484,  0.47327532,  0.42117845,
         0.43325562,  0.45224095,  0.42117846,  0.43325563,  0.45224095,
         0.44052359,  0.43325637,  0.4522413 ,  0.36372474,  0.4303377 ,
         0.43968731,  0.36372487,  0.43033771,  0.43968731,  0.40894615,
         0.43033827,  0.4396875 ,  0.33958412,  0.39833139,  0.45681733,
         0.33958424,  0.39833147,  0.45681735,  0.35422108,  0.39833894,
         0.45681961,  0.14805969,  0.2161761 ,  0.3842582 ,  0.1480603 ,
         0.21617651,  0.3842583 ,  0.19648415,  0.21932699,  0.38085041,
        -0.01923108, -0.03314824,  0.29962022, -0.01923021, -0.03314753,
         0.29962036,  0.0878976 ,  0.04042977,  0.27851188,  0.22944764,
         0.28413355,  0.30340759,  0.22944811,  0.28418975,  0.30340771,
         0.24760812,  0.26663011,  0.2961355 ,  0.07788307,  0.1411464 ,
         0.20663378,  0.07788428,  0.15769938,  0.20663405,  0.06329527,
         0.11292069,  0.18318618, -0.03334283,  0.00998503,  0.12020665,
        -0.03334083,  0.0177761 ,  0.12020701, -0.04884863, -0.03369236,
         0.11648514]),
 'split2_train_score': array([ 0.51481418,  0.51480605,  0.51397251,  0.51481418,  0.51480605,
         0.51397251,  0.51481414,  0.51480601,  0.51397247,  0.54738053,
         0.54773948,  0.54712843,  0.54738053,  0.54773948,  0.54712843,
         0.54756428,  0.5477394 ,  0.54712836,  0.56446877,  0.56455968,
         0.5645646 ,  0.56446876,  0.56455968,  0.5645646 ,  0.56451033,
         0.56455951,  0.56456445,  0.60542074,  0.60544563,  0.60316427,
         0.60542073,  0.60544562,  0.60316426,  0.60591425,  0.6054448 ,
         0.6031637 ,  0.6372629 ,  0.63742369,  0.63379867,  0.63726288,
         0.63742366,  0.63379866,  0.63831952,  0.63818338,  0.63348951,
         0.67204694,  0.67282678,  0.66369865,  0.6720469 ,  0.67282674,
         0.66369862,  0.6717579 ,  0.67095368,  0.665371  ,  0.70843488,
         0.7037952 ,  0.6986969 ,  0.70843481,  0.70379597,  0.69869687,
         0.70817048,  0.70517808,  0.69809023,  0.75440669,  0.75184564,
         0.74537152,  0.75440657,  0.7549692 ,  0.74537145,  0.75675059,
         0.75463619,  0.74398786,  0.80938134,  0.80293733,  0.79532585,
         0.80938117,  0.80736799,  0.79532575,  0.80993121,  0.80856069,
         0.79101705]),
 'std_fit_time': array([  2.99713593e-01,   1.45161885e+00,   8.23305642e-02,
          4.77228656e-02,   1.14599278e-02,   9.10339186e-01,
          1.30985559e+00,   1.28078482e-01,   5.80239798e-02,
          1.49698147e-01,   5.09727484e-01,   1.18158731e+00,
          2.06953935e-01,   2.91895303e-01,   4.73679049e-01,
          1.50929430e-01,   1.85554288e-01,   1.67375845e-01,
          2.09170227e-01,   8.60335677e-01,   4.42216335e+00,
          6.36690827e-01,   1.27684095e+00,   1.18541026e+00,
          5.02413311e-01,   1.13041931e+00,   8.17649112e-01,
          5.49341966e-01,   1.81196674e+00,   2.61398226e+00,
          6.54316157e-02,   4.11409483e-01,   2.34180846e-01,
          4.26086167e-01,   7.82664854e-01,   5.28110505e-01,
          4.24322978e-01,   2.43857535e+00,   8.10478530e-01,
          1.44150804e+00,   1.14971375e-01,   3.76105824e+00,
          1.58316693e+00,   3.27741342e-01,   1.30915286e+00,
          2.47151014e+00,   4.26324758e+00,   1.19374073e+01,
          3.49852112e+00,   3.96843910e+01,   3.01803875e+00,
          3.48924140e-01,   1.71577176e+00,   2.06242688e+00,
          1.95720131e+00,   3.63089410e-01,   1.65008413e-01,
          7.98189421e-01,   8.76426975e-01,   3.07840540e+00,
          7.32344973e-01,   5.56713539e-01,   4.02997792e-01,
          1.24116207e+00,   3.11765142e+00,   8.83974596e-01,
          3.59716483e-01,   1.40035883e+00,   1.41651323e-01,
          2.21443961e+00,   1.79963028e-01,   4.87691295e-01,
          3.64960171e+00,   4.10403747e+00,   1.73023177e+01,
          2.37619574e+00,   1.70580339e+00,   3.39207293e+01,
          2.39839741e+01,   9.43313040e-01,   3.71276827e+01]),
 'std_score_time': array([ 0.03477721,  0.13213481,  0.05241081,  0.01453639,  0.00579445,
         0.00553128,  0.04102249,  0.03178402,  0.01755374,  0.07724467,
         0.06059319,  0.02871818,  0.06942251,  0.04319767,  0.06491631,
         0.05446811,  0.06071537,  0.05083732,  0.03220441,  0.02057478,
         0.07976481,  0.25569367,  0.70822144,  0.28879692,  0.21550601,
         0.0779901 ,  0.1608823 ,  0.01388435,  0.22605973,  0.01499108,
         0.04621139,  0.02562523,  0.04154988,  0.05081106,  0.03163774,
         0.2917878 ,  0.47096391,  0.13466506,  0.52484685,  0.28712639,
         0.2180192 ,  0.20107782,  0.14147027,  0.32799384,  0.47030536,
         0.58448209,  0.30056085,  3.72115841,  0.34811232,  0.21413475,
         0.40288007,  0.1144288 ,  0.18721975,  0.31894268,  0.06309075,
         0.06240809,  0.03923366,  0.13834726,  0.37960482,  0.36731932,
         0.05455984,  0.08971731,  0.02544776,  0.1625441 ,  0.17493712,
         0.11906818,  0.15546693,  0.22220849,  0.16679494,  0.24653544,
         0.11245885,  0.57521304,  1.47198121,  0.54999468,  2.68216352,
         1.82000625,  2.74997058,  1.6299797 ,  0.12690978,  3.29420102,
         0.94589519]),
 'std_test_score': array([ 0.01412848,  0.01409019,  0.01479274,  0.01412848,  0.01409018,
         0.01479274,  0.0141283 ,  0.01409001,  0.01479274,  0.03790315,
         0.0337607 ,  0.02559089,  0.03790315,  0.0337607 ,  0.02559089,
         0.02928957,  0.03376058,  0.02559085,  0.06414422,  0.03415399,
         0.02927053,  0.06414416,  0.034154  ,  0.02927054,  0.04331289,
         0.0341545 ,  0.02927152,  0.07021579,  0.04506931,  0.02067045,
         0.07021576,  0.04506928,  0.02067045,  0.06332399,  0.04506728,
         0.02067004,  0.14707842,  0.11663606,  0.04426348,  0.14707819,
         0.11663592,  0.04426346,  0.12375046,  0.11553513,  0.0458443 ,
         0.20013497,  0.21442792,  0.07045999,  0.20013465,  0.21442766,
         0.07045996,  0.15496745,  0.18370247,  0.07987438,  0.09738981,
         0.0763145 ,  0.06990312,  0.09738965,  0.07628804,  0.06990277,
         0.08829004,  0.08632969,  0.07577368,  0.14275472,  0.12494135,
         0.10465762,  0.14275431,  0.11720419,  0.10432669,  0.1538472 ,
         0.13768531,  0.11825451,  0.17753308,  0.16184699,  0.13446136,
         0.17488229,  0.15842733,  0.13071305,  0.18570287,  0.18765307,
         0.13619605]),
 'std_train_score': array([ 0.01354036,  0.01354025,  0.01386312,  0.01354036,  0.01354025,
         0.01386312,  0.01354036,  0.01354025,  0.01386312,  0.01753009,
         0.01740351,  0.01827911,  0.01753009,  0.01740351,  0.01827911,
         0.01744941,  0.01740352,  0.01827912,  0.02158909,  0.02161196,
         0.02141298,  0.02158909,  0.02161196,  0.02141298,  0.02157051,
         0.02161199,  0.021413  ,  0.02196763,  0.02102223,  0.02213168,
         0.02196763,  0.02102223,  0.02213168,  0.0217451 ,  0.02102231,
         0.02213175,  0.02275029,  0.0226876 ,  0.02254533,  0.02275029,
         0.0226876 ,  0.02254533,  0.02203731,  0.0223031 ,  0.02268398,
         0.02199989,  0.02085043,  0.02311653,  0.0219999 ,  0.02085043,
         0.02311653,  0.02178143,  0.02217438,  0.02209982,  0.01875355,
         0.01956543,  0.01916154,  0.01875356,  0.01956507,  0.01916155,
         0.01878413,  0.01830258,  0.02148278,  0.0172289 ,  0.01787559,
         0.01657372,  0.01722891,  0.01647101,  0.01631606,  0.01683831,
         0.01694168,  0.01909243,  0.01311706,  0.01581102,  0.0138275 ,
         0.01306934,  0.01365731,  0.014179  ,  0.01404906,  0.01303702,
         0.01628766])}

In [41]:
result.best_estimator_


Out[41]:
XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='reg:linear', reg_alpha=0.1, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [40]:
grid_search.score(X_va_scaled, y_va)


Out[40]:
0.52178018832808326

Try XGBoost on non-scaled data

Turns out this works better


In [42]:
xgb = XGBRegressor(n_estimators=250, reg_alpha=0.1)

In [43]:
xgb.fit(X_train, y_train)


Out[43]:
XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='reg:linear', reg_alpha=0.1, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [44]:
xgb.score(X_va, y_va)


Out[44]:
0.54909280212343337

In [46]:
y_pr = xgb.predict(X_va)

In [47]:
y_xgb_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))

In [50]:
y_xgb_resids.loc[:,'residuals'] = y_xgb_resids.loc[:,'y_pr'] - y_xgb_resids.loc[:,'Gen Change (MW)']

In [51]:
g = sns.FacetGrid(y_xgb_resids, hue='cluster', col='cluster',
                  col_wrap=3)
g.map(plt.scatter, 'y_pr', 'residuals')
g.add_legend()


Out[51]:
<seaborn.axisgrid.FacetGrid at 0x115d8ea90>
Out[51]:
<seaborn.axisgrid.FacetGrid at 0x115d8ea90>

In [52]:
y_xgb_resids.describe()


Out[52]:
Gen Change (MW) cluster y_pr residuals
count 105264.000000 105264.000000 105264.000000 105264.000000
mean 0.072684 2.500000 -4.463920 -4.536561
std 252.875892 1.707833 179.048598 169.744599
min -2236.000000 0.000000 -1508.438721 -1661.269684
25% -54.000000 1.000000 -71.290184 -66.515215
50% 0.000000 2.500000 3.031837 -1.428239
75% 55.000000 4.000000 62.890798 58.265282
max 3104.000000 5.000000 1829.738892 1646.675659

Try ratio of fuel prices


In [54]:
X_train.columns


Out[54]:
Index([u'nameplate_capacity', u'GROSS LOAD (MW)', u'ERCOT Load, MW',
       u'Total Wind Installed, MW', u'Total Wind Output, MW',
       u'Net Load Change (MW)', u'NG Price ($/mcf)', u'All coal', u'Lignite',
       u'Subbituminous', u'cluster_0', u'cluster_1', u'cluster_2',
       u'cluster_3', u'cluster_4', u'cluster_5'],
      dtype='object')

In [66]:
X_train_ratio = X_train.copy()
X_va_ratio = X_va.copy()
for fuel in ['All coal', 'Lignite', 'Subbituminous']:
    X_train_ratio.loc[:,fuel] = X_train_ratio.loc[:,fuel].values/X_train_ratio.loc[:,'NG Price ($/mcf)'].values
    X_va_ratio.loc[:,fuel] = X_va.loc[:,fuel]/X_va.loc[:,'NG Price ($/mcf)']
    
X_train_ratio.drop('NG Price ($/mcf)', axis=1, inplace=True)
X_va_ratio.drop('NG Price ($/mcf)', axis=1, inplace=True)

In [67]:
X_train.head()


Out[67]:
nameplate_capacity GROSS LOAD (MW) ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Net Load Change (MW) NG Price ($/mcf) All coal Lignite Subbituminous cluster_0 cluster_1 cluster_2 cluster_3 cluster_4 cluster_5
0 5949.0 4596.0 30428.0 2790.0 1074.0 0.0 6.42 25.1475 20.0275 28.115 1.0 0.0 0.0 0.0 0.0 0.0
1 5949.0 4566.0 30133.0 2790.0 922.6 -143.6 6.42 25.1475 20.0275 28.115 1.0 0.0 0.0 0.0 0.0 0.0
2 5949.0 4667.0 29941.0 2790.0 849.2 -118.6 6.42 25.1475 20.0275 28.115 1.0 0.0 0.0 0.0 0.0 0.0
3 5949.0 4668.0 29949.0 2790.0 1056.3 -199.1 6.42 25.1475 20.0275 28.115 1.0 0.0 0.0 0.0 0.0 0.0
4 5949.0 4685.0 30248.0 2790.0 837.1 518.2 6.42 25.1475 20.0275 28.115 1.0 0.0 0.0 0.0 0.0 0.0

In [68]:
X_train_ratio.head()


Out[68]:
nameplate_capacity GROSS LOAD (MW) ERCOT Load, MW Total Wind Installed, MW Total Wind Output, MW Net Load Change (MW) All coal Lignite Subbituminous cluster_0 cluster_1 cluster_2 cluster_3 cluster_4 cluster_5
0 5949.0 4596.0 30428.0 2790.0 1074.0 0.0 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
1 5949.0 4566.0 30133.0 2790.0 922.6 -143.6 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
2 5949.0 4667.0 29941.0 2790.0 849.2 -118.6 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
3 5949.0 4668.0 29949.0 2790.0 1056.3 -199.1 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0
4 5949.0 4685.0 30248.0 2790.0 837.1 518.2 3.917056 3.119548 4.379283 1.0 0.0 0.0 0.0 0.0 0.0

In [71]:
xgb_ratio = XGBRegressor(n_estimators=250, reg_alpha=0.1)

In [72]:
xgb_ratio.fit(X_train_ratio, y_train)


Out[72]:
XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='reg:linear', reg_alpha=0.1, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)

In [73]:
xgb_ratio.score(X_va_ratio, y_va)


Out[73]:
0.55063694186589407

In [75]:
y_pr = xgb_ratio.predict(X_va_ratio)

In [76]:
y_xgb_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))

In [77]:
y_xgb_resids.loc[:,'residuals'] = y_xgb_resids.loc[:,'y_pr'] - y_xgb_resids.loc[:,'Gen Change (MW)']

In [78]:
g = sns.FacetGrid(y_xgb_resids, hue='cluster', col='cluster',
                  col_wrap=3)
g.map(plt.scatter, 'y_pr', 'residuals')
g.add_legend()


Out[78]:
<seaborn.axisgrid.FacetGrid at 0x11632a810>
Out[78]:
<seaborn.axisgrid.FacetGrid at 0x11632a810>

In [79]:
y_xgb_resids.describe()


Out[79]:
Gen Change (MW) cluster y_pr residuals
count 105264.000000 105264.000000 105264.000000 105264.000000
mean 0.072684 2.500000 2.518387 2.445684
std 252.875892 1.707833 181.456628 169.496567
min -2236.000000 0.000000 -1515.943848 -1643.027626
25% -54.000000 1.000000 -64.903372 -60.947704
50% 0.000000 2.500000 9.577057 4.558166
75% 55.000000 4.000000 69.535152 64.237963
max 3104.000000 5.000000 1779.647217 1636.885956

In [81]:
from xgboost import plot_importance

In [82]:
plot_importance(xgb_ratio)


Out[82]:
<matplotlib.axes._subplots.AxesSubplot at 0x11b42b790>

In [ ]:

Linear Regression (OLS) with ratio of fuel prices

Slight improvement from first OLS try, but still nowhere near as good as gradient boosting


In [83]:
lm = LinearRegression(normalize=True)
lm.fit(X_train_ratio, y_train)


Out[83]:
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)

In [84]:
lm.score(X_va_ratio, y_va)


Out[84]:
0.26965150769180002

In [24]:
y_pr = lm.predict(X_va_scaled)

In [41]:
y_va.values.shape, y_pr.shape, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values.shape


Out[41]:
((105266,), (105264,), (105264,))

In [25]:
y_lm_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))
# y_lm_resids['y_pr'] = y_pr
# y_lm_resids['cluster'] = X.loc[:,'cluster']

In [26]:
y_lm_resids.head()


Out[26]:
Gen Change (MW) cluster y_pr
0 0.0 0 -61.176003
1 1.0 0 -20.166237
2 -1.0 0 -17.027565
3 0.0 0 -14.052956
4 0.0 0 21.281029

In [27]:
y_lm_resids.loc[:,'residuals'] = y_lm_resids.loc[:,'y_pr'] - y_lm_resids.loc[:,'Gen Change (MW)']

In [29]:
g = sns.FacetGrid(y_lm_resids, hue='cluster', col='cluster',
                  col_wrap=3)
g.map(plt.scatter, 'Gen Change (MW)', 'residuals')
g.add_legend()


Out[29]:
<seaborn.axisgrid.FacetGrid at 0x116244110>
Out[29]:
<seaborn.axisgrid.FacetGrid at 0x116244110>

In [ ]: