Prediction model



In [1]:

    
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import os
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR, LinearSVR
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import validation_curve, learning_curve









    



/Users/Home/anaconda/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.
  warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')

Import data

Might still need to clean up the files some after import



In [2]:

    
path = '../Final report'
X_fn = 'x.csv'
y_fn = 'y.csv'
X_path = os.path.join(path, X_fn)
y_path = os.path.join(path, y_fn)

X = pd.read_csv(X_path)
y = pd.read_csv(y_path)



In [7]:

    
X.head()









    Out[7]:






  
    
      
      cluster
      Year
      nameplate_capacity
      DATETIME
      GROSS LOAD (MW)
      ERCOT Load, MW
      Total Wind Installed, MW
      Total Wind Output, MW
      Wind Output, % of Installed
      Wind Output, % of Load
      1-hr MW change
      1-hr % change
      Net Load (MW)
      Net Load Change (MW)
      Month
      NG Price ($/mcf)
      All coal
      Lignite
      Subbituminous
    
  
  
    
      0
      0
      2007
      13263.0
      2007-01-01 00:00:00
      670.0
      30428.0
      2790.0
      1074.0
      38.494624
      3.529644
      NaN
      NaN
      29354.0
      NaN
      1
      6.42
      25.1475
      20.0275
      28.115
    
    
      1
      0
      2007
      13263.0
      2007-01-01 01:00:00
      492.0
      30133.0
      2790.0
      922.6
      33.068100
      3.061760
      -151.4
      -14.096834
      29210.4
      -143.6
      1
      6.42
      25.1475
      20.0275
      28.115
    
    
      2
      0
      2007
      13263.0
      2007-01-01 02:00:00
      461.0
      29941.0
      2790.0
      849.2
      30.437276
      2.836245
      -73.4
      -7.955777
      29091.8
      -118.6
      1
      6.42
      25.1475
      20.0275
      28.115
    
    
      3
      0
      2007
      13263.0
      2007-01-01 03:00:00
      417.0
      29949.0
      2790.0
      1056.3
      37.860215
      3.526996
      207.1
      24.387659
      28892.7
      -199.1
      1
      6.42
      25.1475
      20.0275
      28.115
    
    
      4
      0
      2007
      13263.0
      2007-01-01 04:00:00
      677.0
      30248.0
      2790.0
      837.1
      30.003584
      2.767456
      -219.2
      -20.751680
      29410.9
      518.2
      1
      6.42
      25.1475
      20.0275
      28.115

Make fuel price a ratio of the coal price to the natural gas price



In [3]:

    
for fuel in ['All coal', 'Lignite', 'Subbituminous']:
    X.loc[:,fuel] = X.loc[:,fuel].values/X.loc[:,'NG Price ($/mcf)'].values
    
X.drop('NG Price ($/mcf)', axis=1, inplace=True)

One-hot encoding of the cluster variable

I'm trying to make this easy for using with different numbers of clusters



In [4]:

    
cluster_ids = X['cluster'].unique()
for cluster in cluster_ids:
    X['cluster_{}'.format(cluster)] = np.eye(len(cluster_ids))[X['cluster'],cluster]



In [5]:

    
X.head()









    Out[5]:






  
    
      
      cluster
      Year
      nameplate_capacity
      DATETIME
      GROSS LOAD (MW)
      ERCOT Load, MW
      Total Wind Installed, MW
      Total Wind Output, MW
      Wind Output, % of Installed
      Wind Output, % of Load
      ...
      Month
      All coal
      Lignite
      Subbituminous
      cluster_0
      cluster_1
      cluster_2
      cluster_3
      cluster_4
      cluster_5
    
  
  
    
      0
      0
      2007
      13263.0
      2007-01-01 00:00:00
      670.0
      30428.0
      2790.0
      1074.0
      38.494624
      3.529644
      ...
      1
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      0
      2007
      13263.0
      2007-01-01 01:00:00
      492.0
      30133.0
      2790.0
      922.6
      33.068100
      3.061760
      ...
      1
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      0
      2007
      13263.0
      2007-01-01 02:00:00
      461.0
      29941.0
      2790.0
      849.2
      30.437276
      2.836245
      ...
      1
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      0
      2007
      13263.0
      2007-01-01 03:00:00
      417.0
      29949.0
      2790.0
      1056.3
      37.860215
      3.526996
      ...
      1
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      4
      0
      2007
      13263.0
      2007-01-01 04:00:00
      677.0
      30248.0
      2790.0
      837.1
      30.003584
      2.767456
      ...
      1
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

5 rows × 24 columns



In [6]:

    
X.tail()









    Out[6]:






  
    
      
      cluster
      Year
      nameplate_capacity
      DATETIME
      GROSS LOAD (MW)
      ERCOT Load, MW
      Total Wind Installed, MW
      Total Wind Output, MW
      Wind Output, % of Installed
      Wind Output, % of Load
      ...
      Month
      All coal
      Lignite
      Subbituminous
      cluster_0
      cluster_1
      cluster_2
      cluster_3
      cluster_4
      cluster_5
    
  
  
    
      473329
      5
      2015
      10014.8
      2015-12-31 19:00:00
      3625.0
      39908.77734
      16170.0
      3824.932373
      23.654498
      9.584188
      ...
      12
      12.793722
      10.780269
      14.596413
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      473330
      5
      2015
      10014.8
      2015-12-31 20:00:00
      3319.0
      38736.85938
      16170.0
      4625.632813
      28.606264
      11.941166
      ...
      12
      12.793722
      10.780269
      14.596413
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      473331
      5
      2015
      10014.8
      2015-12-31 21:00:00
      3332.0
      37587.70313
      16170.0
      4957.714844
      30.659956
      13.189725
      ...
      12
      12.793722
      10.780269
      14.596413
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      473332
      5
      2015
      10014.8
      2015-12-31 22:00:00
      3214.0
      36356.26172
      16170.0
      4699.097656
      29.060592
      12.925140
      ...
      12
      12.793722
      10.780269
      14.596413
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
    
      473333
      5
      2015
      10014.8
      2015-12-31 23:00:00
      3173.0
      35150.33984
      16170.0
      4313.125000
      26.673624
      12.270507
      ...
      12
      12.793722
      10.780269
      14.596413
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
    
  

5 rows × 24 columns

Add free capacity of every group for that hour.

Turns out that this doesn't help prediction. It actually makes prediction much worse...



In [46]:

    
free_cap_dict = {}
for cluster in range(6):
    free_cap_dict[cluster] = X.loc[X['cluster'] == cluster, ['DATETIME', 'nameplate_capacity', 'GROSS LOAD (MW)']]
    col_name = 'cluster_' +  str(cluster) + ' free capacity'
    free_cap_dict[cluster].loc[:,col_name] = (free_cap_dict[cluster].loc[:,'nameplate_capacity'].values - 
                                              free_cap_dict[cluster].loc[:,'GROSS LOAD (MW)'].values)



In [47]:

    
free_cap_dict[0].head()









    Out[47]:






  
    
      
      DATETIME
      nameplate_capacity
      GROSS LOAD (MW)
      cluster_0 free capacity
    
  
  
    
      0
      2007-01-01 00:00:00
      13263.0
      670.0
      12593.0
    
    
      1
      2007-01-01 01:00:00
      13263.0
      492.0
      12771.0
    
    
      2
      2007-01-01 02:00:00
      13263.0
      461.0
      12802.0
    
    
      3
      2007-01-01 03:00:00
      13263.0
      417.0
      12846.0
    
    
      4
      2007-01-01 04:00:00
      13263.0
      677.0
      12586.0



In [49]:

    
for cluster in range(6):
    col_name = 'cluster_' +  str(cluster) + ' free capacity'
    X = pd.merge(X, free_cap_dict[cluster].loc[:,['DATETIME', col_name]], on='DATETIME')



In [51]:

    
X.head(n=10)









    Out[51]:






  
    
      
      cluster
      Year
      nameplate_capacity
      DATETIME
      GROSS LOAD (MW)
      ERCOT Load, MW
      Total Wind Installed, MW
      Total Wind Output, MW
      Wind Output, % of Installed
      Wind Output, % of Load
      ...
      cluster_2
      cluster_3
      cluster_4
      cluster_5
      cluster_0 free capacity
      cluster_1 free capacity
      cluster_2 free capacity
      cluster_3 free capacity
      cluster_4 free capacity
      cluster_5 free capacity
    
  
  
    
      0
      0
      2007
      13263.0
      2007-01-01 00:00:00
      670.0
      30428.0
      2790.0
      1074.0
      38.494624
      3.529644
      ...
      0.0
      0.0
      0.0
      0.0
      12593.0
      7318.7
      451.6
      371.3
      1442.0
      4904.1
    
    
      1
      1
      2007
      7442.7
      2007-01-01 00:00:00
      124.0
      30428.0
      2790.0
      1074.0
      38.494624
      3.529644
      ...
      0.0
      0.0
      0.0
      0.0
      12593.0
      7318.7
      451.6
      371.3
      1442.0
      4904.1
    
    
      2
      2
      2007
      6706.6
      2007-01-01 00:00:00
      6255.0
      30428.0
      2790.0
      1074.0
      38.494624
      3.529644
      ...
      1.0
      0.0
      0.0
      0.0
      12593.0
      7318.7
      451.6
      371.3
      1442.0
      4904.1
    
    
      3
      3
      2007
      4985.3
      2007-01-01 00:00:00
      4614.0
      30428.0
      2790.0
      1074.0
      38.494624
      3.529644
      ...
      0.0
      1.0
      0.0
      0.0
      12593.0
      7318.7
      451.6
      371.3
      1442.0
      4904.1
    
    
      4
      4
      2007
      3969.0
      2007-01-01 00:00:00
      2527.0
      30428.0
      2790.0
      1074.0
      38.494624
      3.529644
      ...
      0.0
      0.0
      1.0
      0.0
      12593.0
      7318.7
      451.6
      371.3
      1442.0
      4904.1
    
    
      5
      5
      2007
      5478.1
      2007-01-01 00:00:00
      574.0
      30428.0
      2790.0
      1074.0
      38.494624
      3.529644
      ...
      0.0
      0.0
      0.0
      1.0
      12593.0
      7318.7
      451.6
      371.3
      1442.0
      4904.1
    
    
      6
      0
      2007
      13263.0
      2007-01-01 01:00:00
      492.0
      30133.0
      2790.0
      922.6
      33.068100
      3.061760
      ...
      0.0
      0.0
      0.0
      0.0
      12771.0
      7307.7
      460.6
      443.3
      1472.0
      4878.1
    
    
      7
      1
      2007
      7442.7
      2007-01-01 01:00:00
      135.0
      30133.0
      2790.0
      922.6
      33.068100
      3.061760
      ...
      0.0
      0.0
      0.0
      0.0
      12771.0
      7307.7
      460.6
      443.3
      1472.0
      4878.1
    
    
      8
      2
      2007
      6706.6
      2007-01-01 01:00:00
      6246.0
      30133.0
      2790.0
      922.6
      33.068100
      3.061760
      ...
      1.0
      0.0
      0.0
      0.0
      12771.0
      7307.7
      460.6
      443.3
      1472.0
      4878.1
    
    
      9
      3
      2007
      4985.3
      2007-01-01 01:00:00
      4542.0
      30133.0
      2790.0
      922.6
      33.068100
      3.061760
      ...
      0.0
      1.0
      0.0
      0.0
      12771.0
      7307.7
      460.6
      443.3
      1472.0
      4878.1
    
  

10 rows × 30 columns



In [ ]:

    
for idx in X.index:
datetime = X.loc[idx, 'DATETIME']
    for cluster in range(6):
        col_name = 'cluster_' +  cluster + ' free capacity'
        X.loc[idx, col_name] =



In [ ]:



In [ ]:



In [ ]:



In [8]:

    
y.tail()









    Out[8]:






  
    
      
      Unnamed: 0
      DATETIME
      cluster_id_6
      Gen Change (MW)
    
  
  
    
      473329
      473329
      2015-12-31 19:00:00
      5
      -20.0
    
    
      473330
      473330
      2015-12-31 20:00:00
      5
      -964.0
    
    
      473331
      473331
      2015-12-31 21:00:00
      5
      -608.0
    
    
      473332
      473332
      2015-12-31 22:00:00
      5
      -246.0
    
    
      473333
      473333
      2015-12-31 23:00:00
      5
      -333.0

Drop unnecessary columns and replace nan's with 0



In [7]:

    
X_cols = ['nameplate_capacity', 'GROSS LOAD (MW)', 'ERCOT Load, MW',
          'Total Wind Installed, MW', 'Total Wind Output, MW', 'Net Load Change (MW)',
          'All coal', 'Lignite', 'Subbituminous']
X_cluster_cols = ['cluster_{}'.format(cluster) for cluster in cluster_ids]
# X_cluster_free_cols  = ['cluster_{} free capacity'.format(cluster) for cluster in cluster_ids]

X_clean = X.loc[:,X_cols+X_cluster_cols]#+X_cluster_free_cols]
X_clean.fillna(0, inplace=True)

y_clean = y.loc[:,'Gen Change (MW)']
y_clean.fillna(0, inplace=True)



In [8]:

    
print X_clean.shape
print y_clean.shape









    



(473334, 15)
(473334,)



In [9]:

    
X_clean.head()









    Out[9]:






  
    
      
      nameplate_capacity
      GROSS LOAD (MW)
      ERCOT Load, MW
      Total Wind Installed, MW
      Total Wind Output, MW
      Net Load Change (MW)
      All coal
      Lignite
      Subbituminous
      cluster_0
      cluster_1
      cluster_2
      cluster_3
      cluster_4
      cluster_5
    
  
  
    
      0
      13263.0
      670.0
      30428.0
      2790.0
      1074.0
      0.0
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      13263.0
      492.0
      30133.0
      2790.0
      922.6
      -143.6
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      13263.0
      461.0
      29941.0
      2790.0
      849.2
      -118.6
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      13263.0
      417.0
      29949.0
      2790.0
      1056.3
      -199.1
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      4
      13263.0
      677.0
      30248.0
      2790.0
      837.1
      518.2
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0

Split into training, validation, testing



In [10]:

    
X_train = X_clean.loc[(X['Year']<2012),:]
y_train = y_clean.loc[(X['Year']<2012)]

X_va = X_clean.loc[X['Year'].isin([2012, 2013]),:]
y_va = y_clean.loc[X['Year'].isin([2012, 2013])]

X_test = X_clean.loc[X['Year']>2013,:]
y_test = y_clean.loc[X['Year']>2013]

Need scaled versions of the X data for some of the models



In [11]:

    
X_train_scaled = StandardScaler().fit_transform(X_train)
X_va_scaled = StandardScaler().fit_transform(X_va)
X_test_scaled = StandardScaler().fit_transform(X_test)

Check size of all arrays



In [21]:

    
print X_train_scaled.shape, y_train.shape
print X_va_scaled.shape, y_va.shape
print X_test_scaled.shape, y_test.shape









    



(262944, 16) (262944,)
(105264, 16) (105264,)
(105126, 16) (105126,)

Linear Regression (OLS)



In [59]:

    
lm = LinearRegression()
lm.fit(X_train_scaled, y_train)









    Out[59]:





LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)



In [60]:

    
lm.score(X_va_scaled, y_va)









    Out[60]:





-0.00072877220951217403



In [24]:

    
y_pr = lm.predict(X_va_scaled)



In [41]:

    
y_va.values.shape, y_pr.shape, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values.shape









    Out[41]:





((105266,), (105264,), (105264,))



In [25]:

    
y_lm_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))
# y_lm_resids['y_pr'] = y_pr
# y_lm_resids['cluster'] = X.loc[:,'cluster']



In [26]:

    
y_lm_resids.head()









    Out[26]:






  
    
      
      Gen Change (MW)
      cluster
      y_pr
    
  
  
    
      0
      0.0
      0
      -61.176003
    
    
      1
      1.0
      0
      -20.166237
    
    
      2
      -1.0
      0
      -17.027565
    
    
      3
      0.0
      0
      -14.052956
    
    
      4
      0.0
      0
      21.281029



In [27]:

    
y_lm_resids.loc[:,'residuals'] = y_lm_resids.loc[:,'y_pr'] - y_lm_resids.loc[:,'Gen Change (MW)']



In [29]:

    
g = sns.FacetGrid(y_lm_resids, hue='cluster', col='cluster',
                  col_wrap=3)
g.map(plt.scatter, 'Gen Change (MW)', 'residuals')
g.add_legend()









    Out[29]:





<seaborn.axisgrid.FacetGrid at 0x116244110>






    Out[29]:





<seaborn.axisgrid.FacetGrid at 0x116244110>

XGBoost



In [12]:

    
from xgboost import XGBRegressor

Validation curve for n_estimators



In [13]:

    
param_values = [25, 100, 250, 350]
train_scores, valid_scores = validation_curve(XGBRegressor(), X_train, y_train, "n_estimators", param_values,
                                              n_jobs=-1, verbose=3)









    



[CV] n_estimators=25 .................................................
[CV] n_estimators=100 ................................................
[CV] n_estimators=250 ................................................
[CV] n_estimators=350 ................................................
[CV] ........................ n_estimators=25, score=0.436578 -   0.1s
[CV] n_estimators=25 .................................................
[CV] ........................ n_estimators=25, score=0.411833 -   0.1s
[CV] n_estimators=100 ................................................
[CV] ....................... n_estimators=100, score=0.489780 -   0.5s
[CV] n_estimators=250 ................................................






    



[Parallel(n_jobs=-1)]: Done   3 out of  12 | elapsed:   23.4s remaining:  1.2min






    



[CV] ....................... n_estimators=100, score=0.472213 -   0.4s
[CV] n_estimators=350 ................................................
[CV] ....................... n_estimators=250, score=0.523162 -   1.2s
[CV] n_estimators=25 .................................................
[CV] ........................ n_estimators=25, score=0.432120 -   0.2s
[CV] n_estimators=100 ................................................
[CV] ....................... n_estimators=350, score=0.528431 -   1.4s
[CV] n_estimators=250 ................................................
[CV] ....................... n_estimators=250, score=0.493032 -   1.0s
[CV] n_estimators=350 ................................................






    



[Parallel(n_jobs=-1)]: Done   8 out of  12 | elapsed:  1.4min remaining:   41.7s






    



[CV] ....................... n_estimators=100, score=0.483753 -   0.5s
[CV] ....................... n_estimators=350, score=0.492202 -   1.2s
[CV] ....................... n_estimators=250, score=0.466626 -   0.8s
[CV] ....................... n_estimators=350, score=0.446004 -   0.9s






    



[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:  2.3min finished



In [14]:

    
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)



In [15]:

    
plt.title("Validation Curve with XGBoost", size=15)
plt.xlabel("n_estimators", size=15)
plt.ylabel("Score", size=15)
plt.ylim(0.0, 1.1)
lw = 2
plt.plot(param_values, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_values, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(param_values, valid_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_values, valid_scores_mean - valid_scores_std,
                 valid_scores_mean + valid_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")
plt.savefig('XGBoost n_estimators validation curve.pdf', bbox_inches='tight')









    Out[15]:





<matplotlib.text.Text at 0x103a17c90>






    Out[15]:





<matplotlib.text.Text at 0x1150befd0>






    Out[15]:





<matplotlib.text.Text at 0x103aa40d0>






    Out[15]:





(0.0, 1.1)






    Out[15]:





[<matplotlib.lines.Line2D at 0x1150a7110>]






    Out[15]:





<matplotlib.collections.PolyCollection at 0x103a93a10>






    Out[15]:





[<matplotlib.lines.Line2D at 0x103abfe50>]






    Out[15]:





<matplotlib.collections.PolyCollection at 0x103a93950>






    Out[15]:





<matplotlib.legend.Legend at 0x103adb8d0>

Validation curve for n_estimators



In [16]:

    
param_values = [1,3,5,9,15]
train_scores, valid_scores = validation_curve(XGBRegressor(n_estimators=250), X_train, y_train, "max_depth", param_values,
                                              n_jobs=-1, verbose=3)









    



[CV] max_depth=1 .....................................................
[CV] max_depth=3 .....................................................
[CV] max_depth=5 .....................................................
[CV] max_depth=9 .....................................................
[CV] ............................ max_depth=1, score=0.189391 -   0.7s
[CV] max_depth=15 ....................................................
[CV] ............................ max_depth=3, score=0.523162 -   0.9s
[CV] max_depth=1 .....................................................
[CV] ............................ max_depth=1, score=0.221639 -   0.4s
[CV] max_depth=3 .....................................................
[CV] ............................ max_depth=5, score=0.526783 -   1.5s
[CV] max_depth=5 .....................................................






    



[Parallel(n_jobs=-1)]: Done   4 out of  15 | elapsed:  1.6min remaining:  4.3min






    



[CV] ............................ max_depth=3, score=0.493032 -   0.8s
[CV] max_depth=9 .....................................................
[CV] ............................ max_depth=9, score=0.499568 -   3.8s
[CV] max_depth=15 ....................................................
[CV] ............................ max_depth=5, score=0.479847 -   1.5s
[CV] max_depth=1 .....................................................
[CV] ............................ max_depth=1, score=0.208100 -   0.4s
[CV] max_depth=3 .....................................................
[CV] ............................ max_depth=3, score=0.466626 -   1.0s
[CV] max_depth=5 .....................................................
[CV] ............................ max_depth=9, score=0.414931 -   4.1s
[CV] max_depth=9 .....................................................






    



[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:  4.7min remaining:  2.3min






    



[CV] ........................... max_depth=15, score=0.438524 -  10.4s
[CV] max_depth=15 ....................................................
[CV] ............................ max_depth=5, score=0.373544 -   1.7s
[CV] ............................ max_depth=9, score=0.334710 -   3.9s
[CV] ........................... max_depth=15, score=0.364308 -   9.7s
[CV] ........................... max_depth=15, score=0.343634 -   6.6s






    



[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  8.0min finished



In [17]:

    
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)



In [18]:

    
plt.title("Validation Curve with XGBoost", size=15)
plt.xlabel("max_depth", size=15)
plt.ylabel("Score", size=15)
plt.ylim(0.0, 1.1)
lw = 2
plt.plot(param_values, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_values, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(param_values, valid_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_values, valid_scores_mean - valid_scores_std,
                 valid_scores_mean + valid_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")
plt.savefig('XGBoost max_depth validation curve.pdf', bbox_inches='tight')









    Out[18]:





<matplotlib.text.Text at 0x115da7bd0>






    Out[18]:





<matplotlib.text.Text at 0x115c17590>






    Out[18]:





<matplotlib.text.Text at 0x115ad7810>






    Out[18]:





(0.0, 1.1)






    Out[18]:





[<matplotlib.lines.Line2D at 0x115c019d0>]






    Out[18]:





<matplotlib.collections.PolyCollection at 0x115c14dd0>






    Out[18]:





[<matplotlib.lines.Line2D at 0x117510890>]






    Out[18]:





<matplotlib.collections.PolyCollection at 0x103a8e650>






    Out[18]:





<matplotlib.legend.Legend at 0x117524650>

Validation curve for reg_alpha



In [68]:

    
param_values = np.logspace(-5, 1, 7)
train_scores, valid_scores = validation_curve(XGBRegressor(n_estimators=250), X_train, y_train, "reg_alpha", param_values,
                                              n_jobs=-1, verbose=3)









    



[CV] reg_alpha=1e-05 .................................................
[CV] reg_alpha=0.0001 ................................................
[CV] reg_alpha=0.001 .................................................
[CV] reg_alpha=0.01 ..................................................
[CV] ........................ reg_alpha=1e-05, score=0.523162 -   0.9s
[CV] reg_alpha=0.1 ...................................................
[CV] ........................ reg_alpha=0.001, score=0.523162 -   0.9s
[CV] reg_alpha=1.0 ...................................................
[CV] ......................... reg_alpha=0.01, score=0.523162 -   1.0s
[CV] reg_alpha=10.0 ..................................................
[CV] ....................... reg_alpha=0.0001, score=0.523162 -   1.0s
[CV] reg_alpha=1e-05 .................................................
[CV] .......................... reg_alpha=1.0, score=0.523163 -   1.2s
[CV] reg_alpha=0.0001 ................................................
[CV] ........................ reg_alpha=1e-05, score=0.493032 -   1.3s
[CV] reg_alpha=0.001 .................................................
[CV] .......................... reg_alpha=0.1, score=0.523162 -   1.2s
[CV] reg_alpha=0.01 ..................................................






    



[Parallel(n_jobs=-1)]: Done   6 out of  21 | elapsed:  1.7min remaining:  4.3min






    



[CV] ......................... reg_alpha=10.0, score=0.523173 -   1.0s
[CV] reg_alpha=0.1 ...................................................
[CV] ....................... reg_alpha=0.0001, score=0.493032 -   0.9s
[CV] reg_alpha=1.0 ...................................................
[CV] ......................... reg_alpha=0.01, score=0.493032 -   1.0s
[CV] reg_alpha=10.0 ..................................................
[CV] ........................ reg_alpha=0.001, score=0.493032 -   1.0s
[CV] reg_alpha=1e-05 .................................................
[CV] .......................... reg_alpha=0.1, score=0.493032 -   1.0s
[CV] reg_alpha=0.0001 ................................................
[CV] ......................... reg_alpha=10.0, score=0.493033 -   1.0s
[CV] reg_alpha=0.001 .................................................
[CV] .......................... reg_alpha=1.0, score=0.493032 -   1.0s
[CV] reg_alpha=0.01 ..................................................






    



[Parallel(n_jobs=-1)]: Done  14 out of  21 | elapsed:  3.5min remaining:  1.7min






    



[CV] ........................ reg_alpha=1e-05, score=0.466626 -   1.1s
[CV] reg_alpha=0.1 ...................................................
[CV] ....................... reg_alpha=0.0001, score=0.466626 -   1.2s
[CV] reg_alpha=1.0 ...................................................
[CV] ........................ reg_alpha=0.001, score=0.466626 -   1.0s
[CV] reg_alpha=10.0 ..................................................
[CV] ......................... reg_alpha=0.01, score=0.466626 -   1.0s
[CV] .......................... reg_alpha=1.0, score=0.466634 -   1.0s
[CV] .......................... reg_alpha=0.1, score=0.466627 -   1.1s
[CV] ......................... reg_alpha=10.0, score=0.466572 -   0.6s






    



[Parallel(n_jobs=-1)]: Done  21 out of  21 | elapsed:  4.8min finished



In [69]:

    
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)



In [71]:

    
plt.title("Validation Curve with XGBoost")
plt.xlabel("reg_alpha")
plt.ylabel("Score")
plt.ylim(0.0, 1.1)
lw = 2
plt.semilogx(param_values, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(param_values, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.semilogx(param_values, valid_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(param_values, valid_scores_mean - valid_scores_std,
                 valid_scores_mean + valid_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")









    Out[71]:





<matplotlib.text.Text at 0x1180158d0>






    Out[71]:





<matplotlib.text.Text at 0x1150d4d10>






    Out[71]:





<matplotlib.text.Text at 0x117fe4ad0>






    Out[71]:





(0.0, 1.1)






    Out[71]:





[<matplotlib.lines.Line2D at 0x115bf2410>]






    Out[71]:





<matplotlib.collections.PolyCollection at 0x117a32510>






    Out[71]:





[<matplotlib.lines.Line2D at 0x118a08e90>]






    Out[71]:





<matplotlib.collections.PolyCollection at 0x118a1a850>






    Out[71]:





<matplotlib.legend.Legend at 0x118a1af90>

Learning curve for n_estimators=250 and max_depth=3



In [37]:

    
param_values = [1,3,5,9,15]
train_sizes, train_scores, valid_scores = learning_curve(XGBRegressor(n_estimators=250), X_train, y_train,
                                              n_jobs=-1, verbose=3)









    



[learning_curve] Training set sizes: [ 17529  56971  96412 135854 175296]
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV]  ................................................................
[CV] ....................................... , score=0.499820 -   1.1s
[CV]  ................................................................
[CV] ....................................... , score=0.534452 -   0.8s
[CV]  ................................................................
[CV] ....................................... , score=0.063444 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.519911 -   0.9s
[CV]  ................................................................






    



[Parallel(n_jobs=-1)]: Done   4 out of  15 | elapsed:   26.6s remaining:  1.2min






    



[CV] ....................................... , score=0.379781 -   1.0s
[CV]  ................................................................
[CV] ....................................... , score=0.524756 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.456198 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.523162 -   0.9s
[CV]  ................................................................
[CV] ...................................... , score=-0.076204 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.040880 -   0.9s
[CV]  ................................................................






    



[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:  1.1min remaining:   34.5s






    



[CV] ....................................... , score=0.479948 -   0.9s
[CV]  ................................................................
[CV] ....................................... , score=0.307569 -   0.9s
[CV] ....................................... , score=0.493032 -   0.9s
[CV] ....................................... , score=0.327817 -   0.7s
[CV] ....................................... , score=0.466626 -   0.7s






    



[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  1.8min finished



In [38]:

    
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
valid_scores_mean = np.mean(valid_scores, axis=1)
valid_scores_std = np.std(valid_scores, axis=1)



In [39]:

    
plt.title("Learning Curve with XGBoost", size=15)
plt.xlabel("Sample size", size=15)
plt.ylabel("Score", size=15)
plt.ylim(0.0, 1.1)
lw = 2
plt.plot(train_sizes, train_scores_mean, label="Training score",
             color="darkorange", lw=lw)
plt.fill_between(train_sizes, train_scores_mean - train_scores_std,
                 train_scores_mean + train_scores_std, alpha=0.2,
                 color="darkorange", lw=lw)
plt.plot(train_sizes, valid_scores_mean, label="Cross-validation score",
             color="navy", lw=lw)
plt.fill_between(train_sizes, valid_scores_mean - valid_scores_std,
                 valid_scores_mean + valid_scores_std, alpha=0.2,
                 color="navy", lw=lw)
plt.legend(loc="best")
plt.savefig('XGBoost learning curve.pdf', bbox_inches='tight')









    Out[39]:





<matplotlib.text.Text at 0x118172190>






    Out[39]:





<matplotlib.text.Text at 0x115b7a310>






    Out[39]:





<matplotlib.text.Text at 0x116143190>






    Out[39]:





(0.0, 1.1)






    Out[39]:





[<matplotlib.lines.Line2D at 0x1150aa450>]






    Out[39]:





<matplotlib.collections.PolyCollection at 0x115bd6b50>






    Out[39]:





[<matplotlib.lines.Line2D at 0x117c4ead0>]






    Out[39]:





<matplotlib.collections.PolyCollection at 0x11818dd90>






    Out[39]:





<matplotlib.legend.Legend at 0x115a8ea10>



In [22]:

    
xgbr = XGBRegressor(n_estimators=250)



In [23]:

    
xgbr.fit(X_train, y_train)









    Out[23]:





XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='reg:linear', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)



In [25]:

    
y_pr = xgbr.predict(X_va)
y_xgbr_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))



In [27]:

    
y_xgbr_resids.loc[:,'residuals'] = y_xgbr_resids.loc[:,'y_pr'] - y_xgbr_resids.loc[:,'Gen Change (MW)']



In [ ]:

    
plt.scatter()



In [39]:

    
with sns.axes_style('whitegrid'):
    g = sns.FacetGrid(y_xgbr_resids, hue='cluster', col='cluster',
                      col_wrap=3)
    g.map(plt.scatter, 'y_pr', 'residuals', s=5, alpha=.3)
    g.set_xlabels(size=15)
    g.set_ylabels(size=15)
    plt.savefig('XGBR residuals.pdf')









    Out[39]:





<seaborn.axisgrid.FacetGrid at 0x11d961750>






    Out[39]:





<seaborn.axisgrid.FacetGrid at 0x11d961750>






    Out[39]:





<seaborn.axisgrid.FacetGrid at 0x11d961750>



In [ ]:



In [ ]:



In [15]:

    
model = XGBRegressor()



In [16]:

    
subsample = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
param_grid = dict(subsample=subsample)



In [18]:

    
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, verbose=3)



In [19]:

    
result = grid_search.fit(X_train_scaled, y_train)









    



Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] subsample=0.1 ...................................................
[CV] subsample=0.1 ...................................................
[CV] subsample=0.1 ...................................................
[CV] subsample=0.2 ...................................................
[CV] .......................... subsample=0.1, score=0.484954 -   0.4s
[CV] subsample=0.2 ...................................................
[CV] .......................... subsample=0.1, score=0.457812 -   0.4s
[CV] subsample=0.2 ...................................................
[CV] .......................... subsample=0.1, score=0.432054 -   0.4s
[CV] subsample=0.3 ...................................................
[CV] .......................... subsample=0.2, score=0.489922 -   0.4s
[CV] subsample=0.3 ...................................................
[CV] .......................... subsample=0.2, score=0.405688 -   0.3s
[CV] subsample=0.3 ...................................................
[CV] .......................... subsample=0.2, score=0.462352 -   0.3s
[CV] subsample=0.4 ...................................................
[CV] .......................... subsample=0.3, score=0.494859 -   0.3s
[CV] subsample=0.4 ...................................................
[CV] .......................... subsample=0.3, score=0.461317 -   0.3s
[CV] subsample=0.4 ...................................................
[CV] .......................... subsample=0.3, score=0.459056 -   0.3s
[CV] subsample=0.5 ...................................................
[CV] .......................... subsample=0.4, score=0.496053 -   0.3s
[CV] subsample=0.5 ...................................................
[CV] .......................... subsample=0.4, score=0.463809 -   0.3s
[CV] subsample=0.5 ...................................................
[CV] .......................... subsample=0.4, score=0.449886 -   0.3s
[CV] subsample=0.6 ...................................................
[CV] .......................... subsample=0.5, score=0.499908 -   0.5s
[CV] subsample=0.6 ...................................................
[CV] .......................... subsample=0.5, score=0.462957 -   0.3s
[CV] subsample=0.6 ...................................................
[CV] .......................... subsample=0.5, score=0.447146 -   0.3s
[CV] subsample=0.7 ...................................................
[CV] .......................... subsample=0.6, score=0.500445 -   0.5s
[CV] subsample=0.7 ...................................................
[CV] .......................... subsample=0.6, score=0.463235 -   0.3s
[CV] subsample=0.7 ...................................................
[CV] .......................... subsample=0.6, score=0.424141 -   0.4s
[CV] subsample=0.8 ...................................................
[CV] .......................... subsample=0.7, score=0.498927 -   0.3s
[CV] subsample=0.8 ...................................................
[CV] .......................... subsample=0.7, score=0.465330 -   0.3s
[CV] subsample=0.8 ...................................................
[CV] .......................... subsample=0.7, score=0.467672 -   0.3s
[CV] subsample=1.0 ...................................................
[CV] .......................... subsample=0.8, score=0.496447 -   0.3s
[CV] subsample=1.0 ...................................................
[CV] .......................... subsample=0.8, score=0.461902 -   0.4s
[CV] subsample=1.0 ...................................................
[CV] .......................... subsample=0.8, score=0.462582 -   0.5s
[CV] .......................... subsample=1.0, score=0.498483 -   0.3s
[CV] .......................... subsample=1.0, score=0.463337 -   0.2s
[CV] .......................... subsample=1.0, score=0.473275 -   0.2s






    



[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:  2.4min finished



In [20]:

    
result.cv_results_









    Out[20]:





{'mean_fit_time': array([ 17.29887025,  17.27616898,  17.9309593 ,  20.16705402,
         21.57488139,  21.970059  ,  20.77520935,  20.17156092,  15.8298982 ]),
 'mean_score_time': array([ 0.37530239,  0.33570194,  0.31040764,  0.32522202,  0.35875694,
         0.3899494 ,  0.2991906 ,  0.38670103,  0.22519708]),
 'mean_test_score': array([ 0.4582734 ,  0.45265404,  0.47174388,  0.46991596,  0.47000372,
         0.46260688,  0.47730961,  0.47364352,  0.47836477]),
 'mean_train_score': array([ 0.52659795,  0.53019258,  0.53201711,  0.53158389,  0.53369591,
         0.53187077,  0.53222026,  0.53103841,  0.53230937]),
 'param_subsample': masked_array(data = [0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 1.0],
              mask = [False False False False False False False False False],
        fill_value = ?),
 'params': ({'subsample': 0.1},
  {'subsample': 0.2},
  {'subsample': 0.3},
  {'subsample': 0.4},
  {'subsample': 0.5},
  {'subsample': 0.6},
  {'subsample': 0.7},
  {'subsample': 0.8},
  {'subsample': 1.0}),
 'rank_test_score': array([8, 9, 4, 6, 5, 7, 2, 3, 1], dtype=int32),
 'split0_test_score': array([ 0.48495396,  0.48992193,  0.49485867,  0.49605334,  0.49990795,
         0.50044486,  0.49892667,  0.49644705,  0.49848254]),
 'split0_train_score': array([ 0.52960193,  0.53432657,  0.53327846,  0.53291754,  0.53559476,
         0.53532652,  0.53562279,  0.5357719 ,  0.53546957]),
 'split1_test_score': array([ 0.45781181,  0.46235239,  0.46131677,  0.46380884,  0.46295738,
         0.46323497,  0.46533023,  0.4619017 ,  0.46333653]),
 'split1_train_score': array([ 0.54735279,  0.54738404,  0.54435946,  0.54753438,  0.55000352,
         0.54396756,  0.54661591,  0.54567355,  0.54748602]),
 'split2_test_score': array([ 0.43205443,  0.4056878 ,  0.4590562 ,  0.44988569,  0.44714583,
         0.4241408 ,  0.46767192,  0.4625818 ,  0.47327525]),
 'split2_train_score': array([ 0.50283914,  0.50886712,  0.5184134 ,  0.51429974,  0.51548944,
         0.51631822,  0.51442209,  0.51166979,  0.51397251]),
 'std_fit_time': array([ 0.03468956,  1.56806389,  0.17731887,  0.08404964,  0.17640389,
         0.37043793,  0.28607418,  0.18931806,  0.82959389]),
 'std_score_time': array([ 0.01687459,  0.03872609,  0.01886037,  0.01837927,  0.07238271,
         0.07224925,  0.01489875,  0.06787804,  0.02795523]),
 'std_test_score': array([ 0.02159861,  0.03506556,  0.01637066,  0.01933625,  0.0221088 ,
         0.03115417,  0.01531544,  0.01612693,  0.01479274]),
 'std_train_score': array([ 0.01829634,  0.01599387,  0.01062992,  0.01360072,  0.01415414,
         0.01154927,  0.01336147,  0.01427978,  0.01386312])}



In [ ]:



In [22]:

    
model = XGBRegressor()



In [23]:

    
colsample_bytree = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
param_grid = dict(colsample_bytree=colsample_bytree)



In [24]:

    
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, verbose=3)



In [25]:

    
result = grid_search.fit(X_train_scaled, y_train)









    



Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] colsample_bytree=0.1 ............................................
[CV] colsample_bytree=0.1 ............................................
[CV] colsample_bytree=0.1 ............................................
[CV] colsample_bytree=0.2 ............................................
[CV] ................... colsample_bytree=0.1, score=0.164646 -   0.2s
[CV] colsample_bytree=0.2 ............................................
[CV] ................... colsample_bytree=0.1, score=0.163248 -   0.2s
[CV] colsample_bytree=0.2 ............................................
[CV] ................... colsample_bytree=0.1, score=0.149747 -   0.2s
[CV] colsample_bytree=0.3 ............................................
[CV] ................... colsample_bytree=0.2, score=0.270653 -   0.3s
[CV] colsample_bytree=0.3 ............................................
[CV] ................... colsample_bytree=0.2, score=0.277469 -   0.4s
[CV] colsample_bytree=0.3 ............................................
[CV] ................... colsample_bytree=0.2, score=0.241283 -   0.4s
[CV] colsample_bytree=0.4 ............................................
[CV] ................... colsample_bytree=0.3, score=0.361171 -   0.3s
[CV] colsample_bytree=0.4 ............................................
[CV] ................... colsample_bytree=0.3, score=0.343253 -   0.3s
[CV] colsample_bytree=0.4 ............................................
[CV] ................... colsample_bytree=0.3, score=0.332786 -   0.3s
[CV] colsample_bytree=0.5 ............................................
[CV] ................... colsample_bytree=0.4, score=0.432744 -   0.3s
[CV] colsample_bytree=0.5 ............................................
[CV] ................... colsample_bytree=0.4, score=0.404372 -   0.4s
[CV] colsample_bytree=0.5 ............................................
[CV] ................... colsample_bytree=0.4, score=0.394815 -   0.3s
[CV] colsample_bytree=0.6 ............................................
[CV] ................... colsample_bytree=0.5, score=0.461426 -   0.3s
[CV] colsample_bytree=0.6 ............................................
[CV] ................... colsample_bytree=0.5, score=0.426662 -   0.4s
[CV] colsample_bytree=0.6 ............................................
[CV] ................... colsample_bytree=0.5, score=0.428052 -   0.3s
[CV] colsample_bytree=0.7 ............................................
[CV] ................... colsample_bytree=0.6, score=0.484498 -   0.4s
[CV] colsample_bytree=0.7 ............................................
[CV] ................... colsample_bytree=0.6, score=0.435976 -   0.4s
[CV] colsample_bytree=0.7 ............................................
[CV] ................... colsample_bytree=0.6, score=0.432028 -   0.4s
[CV] colsample_bytree=0.8 ............................................
[CV] ................... colsample_bytree=0.7, score=0.481932 -   0.4s
[CV] colsample_bytree=0.8 ............................................
[CV] ................... colsample_bytree=0.7, score=0.449476 -   0.3s
[CV] colsample_bytree=0.8 ............................................
[CV] ................... colsample_bytree=0.7, score=0.445217 -   0.4s
[CV] colsample_bytree=1.0 ............................................
[CV] ................... colsample_bytree=0.8, score=0.477589 -   0.3s
[CV] colsample_bytree=1.0 ............................................
[CV] ................... colsample_bytree=0.8, score=0.455795 -   0.3s
[CV] colsample_bytree=1.0 ............................................
[CV] ................... colsample_bytree=0.8, score=0.440667 -   0.3s
[CV] ................... colsample_bytree=1.0, score=0.498483 -   0.3s
[CV] ................... colsample_bytree=1.0, score=0.463337 -   0.2s
[CV] ................... colsample_bytree=1.0, score=0.473275 -   0.2s






    



[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:  1.5min finished



In [26]:

    
result.cv_results_









    Out[26]:





{'mean_fit_time': array([  4.56124123,   6.81195768,   8.10874836,   9.60875932,
         12.16170565,  14.600293  ,  17.21607868,  16.53451133,  15.66608071]),
 'mean_score_time': array([ 0.22303112,  0.37348668,  0.32521661,  0.35028831,  0.35460496,
         0.38649503,  0.35863702,  0.32337038,  0.23110159]),
 'mean_test_score': array([ 0.15921352,  0.26313523,  0.3457365 ,  0.41064376,  0.43871338,
         0.45083418,  0.45887504,  0.45801704,  0.47836477]),
 'mean_train_score': array([ 0.16892206,  0.31116065,  0.39683733,  0.46549971,  0.49976069,
         0.51085103,  0.51879221,  0.52348525,  0.53230937]),
 'param_colsample_bytree': masked_array(data = [0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 1.0],
              mask = [False False False False False False False False False],
        fill_value = ?),
 'params': ({'colsample_bytree': 0.1},
  {'colsample_bytree': 0.2},
  {'colsample_bytree': 0.3},
  {'colsample_bytree': 0.4},
  {'colsample_bytree': 0.5},
  {'colsample_bytree': 0.6},
  {'colsample_bytree': 0.7},
  {'colsample_bytree': 0.8},
  {'colsample_bytree': 1.0}),
 'rank_test_score': array([9, 8, 7, 6, 5, 4, 2, 3, 1], dtype=int32),
 'split0_test_score': array([ 0.16464645,  0.27065299,  0.36117083,  0.43274381,  0.46142571,
         0.48449781,  0.48193165,  0.47758942,  0.49848254]),
 'split0_train_score': array([ 0.17610519,  0.30573853,  0.38943222,  0.4613343 ,  0.49598943,
         0.51501353,  0.5193987 ,  0.51984055,  0.53546957]),
 'split1_test_score': array([ 0.16324756,  0.2774694 ,  0.34325273,  0.40437222,  0.42666201,
         0.43597646,  0.44947615,  0.45579473,  0.46333653]),
 'split1_train_score': array([ 0.17075628,  0.3177841 ,  0.40282043,  0.48186734,  0.51148631,
         0.51749275,  0.53201472,  0.53787768,  0.54748602]),
 'split2_test_score': array([ 0.14974656,  0.24128329,  0.33278595,  0.39481524,  0.42805241,
         0.43202828,  0.44521732,  0.44066697,  0.47327525]),
 'split2_train_score': array([ 0.15990471,  0.30995931,  0.39825933,  0.4532975 ,  0.49180633,
         0.50004681,  0.50496321,  0.51273751,  0.51397251]),
 'std_fit_time': array([ 0.04071576,  0.34999139,  0.31733372,  0.03549674,  0.1198805 ,
         0.91900495,  0.88420138,  0.37777915,  0.98321903]),
 'std_score_time': array([ 0.00639099,  0.05793678,  0.00767493,  0.0210572 ,  0.03189375,
         0.02170566,  0.03632257,  0.00240637,  0.01842607]),
 'std_test_score': array([ 0.00671847,  0.01570024,  0.01172041,  0.0161068 ,  0.01607007,
         0.02385829,  0.01639593,  0.01515522,  0.01479274]),
 'std_train_score': array([ 0.00673979,  0.00499042,  0.00555743,  0.01202974,  0.00846531,
         0.00770649,  0.01105205,  0.01058206,  0.01386312])}



In [ ]:



In [27]:

    
model = XGBRegressor()



In [28]:

    
colsample_bylevel = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
param_grid = dict(colsample_bylevel=colsample_bylevel)



In [29]:

    
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, verbose=3)



In [30]:

    
result = grid_search.fit(X_train_scaled, y_train)









    



Fitting 3 folds for each of 9 candidates, totalling 27 fits
[CV] colsample_bylevel=0.1 ...........................................
[CV] colsample_bylevel=0.1 ...........................................
[CV] colsample_bylevel=0.1 ...........................................
[CV] colsample_bylevel=0.2 ...........................................
[CV] .................. colsample_bylevel=0.1, score=0.236454 -   0.2s
[CV] colsample_bylevel=0.2 ...........................................
[CV] .................. colsample_bylevel=0.1, score=0.230266 -   0.3s
[CV] colsample_bylevel=0.2 ...........................................
[CV] .................. colsample_bylevel=0.1, score=0.221978 -   0.3s
[CV] colsample_bylevel=0.3 ...........................................
[CV] .................. colsample_bylevel=0.2, score=0.389605 -   0.3s
[CV] colsample_bylevel=0.3 ...........................................
[CV] .................. colsample_bylevel=0.2, score=0.357623 -   0.3s
[CV] .................. colsample_bylevel=0.2, score=0.361486 -   0.3s
[CV] colsample_bylevel=0.3 ...........................................
[CV] colsample_bylevel=0.4 ...........................................
[CV] .................. colsample_bylevel=0.3, score=0.443983 -   0.4s
[CV] colsample_bylevel=0.4 ...........................................
[CV] .................. colsample_bylevel=0.3, score=0.396302 -   0.3s
[CV] colsample_bylevel=0.4 ...........................................
[CV] .................. colsample_bylevel=0.3, score=0.425530 -   0.3s
[CV] colsample_bylevel=0.5 ...........................................
[CV] .................. colsample_bylevel=0.4, score=0.433810 -   0.4s
[CV] colsample_bylevel=0.5 ...........................................
[CV] .................. colsample_bylevel=0.4, score=0.429944 -   0.6s
[CV] colsample_bylevel=0.5 ...........................................
[CV] .................. colsample_bylevel=0.4, score=0.450593 -   0.4s
[CV] colsample_bylevel=0.6 ...........................................
[CV] .................. colsample_bylevel=0.5, score=0.461982 -   0.3s
[CV] colsample_bylevel=0.6 ...........................................
[CV] .................. colsample_bylevel=0.5, score=0.436827 -   0.3s
[CV] colsample_bylevel=0.6 ...........................................
[CV] .................. colsample_bylevel=0.5, score=0.443715 -   0.4s
[CV] colsample_bylevel=0.7 ...........................................
[CV] .................. colsample_bylevel=0.6, score=0.466433 -   0.6s
[CV] colsample_bylevel=0.7 ...........................................
[CV] .................. colsample_bylevel=0.6, score=0.438975 -   0.3s
[CV] colsample_bylevel=0.7 ...........................................
[CV] .................. colsample_bylevel=0.6, score=0.445031 -   0.6s
[CV] colsample_bylevel=0.8 ...........................................
[CV] .................. colsample_bylevel=0.7, score=0.483877 -   0.3s
[CV] colsample_bylevel=0.8 ...........................................
[CV] .................. colsample_bylevel=0.7, score=0.455740 -   0.5s
[CV] colsample_bylevel=0.8 ...........................................
[CV] .................. colsample_bylevel=0.7, score=0.430706 -   0.3s
[CV] colsample_bylevel=1.0 ...........................................
[CV] .................. colsample_bylevel=0.8, score=0.484522 -   0.3s
[CV] colsample_bylevel=1.0 ...........................................
[CV] .................. colsample_bylevel=0.8, score=0.447451 -   0.3s
[CV] colsample_bylevel=1.0 ...........................................
[CV] .................. colsample_bylevel=0.8, score=0.482551 -   0.6s
[CV] .................. colsample_bylevel=1.0, score=0.498483 -   0.3s
[CV] .................. colsample_bylevel=1.0, score=0.463337 -   0.4s
[CV] .................. colsample_bylevel=1.0, score=0.473275 -   0.2s






    



[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:  1.6min finished



In [31]:

    
result.cv_results_









    Out[31]:





{'mean_fit_time': array([  5.58680964,   7.96834898,   9.22394466,  11.17805529,
         14.44551174,  14.79537845,  16.52017864,  16.49024232,  16.76054573]),
 'mean_score_time': array([ 0.27904503,  0.30942973,  0.3542637 ,  0.47181129,  0.36044025,
         0.49709463,  0.39499768,  0.40493266,  0.27615031]),
 'mean_test_score': array([ 0.22956596,  0.36957139,  0.42193826,  0.43811555,  0.44750804,
         0.45014643,  0.45677407,  0.47150803,  0.47836477]),
 'mean_train_score': array([ 0.2551972 ,  0.4250602 ,  0.46792848,  0.49801298,  0.50912125,
         0.51348689,  0.52657848,  0.52464538,  0.53230937]),
 'param_colsample_bylevel': masked_array(data = [0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 1.0],
              mask = [False False False False False False False False False],
        fill_value = ?),
 'params': ({'colsample_bylevel': 0.1},
  {'colsample_bylevel': 0.2},
  {'colsample_bylevel': 0.3},
  {'colsample_bylevel': 0.4},
  {'colsample_bylevel': 0.5},
  {'colsample_bylevel': 0.6},
  {'colsample_bylevel': 0.7},
  {'colsample_bylevel': 0.8},
  {'colsample_bylevel': 1.0}),
 'rank_test_score': array([9, 8, 7, 6, 5, 4, 3, 2, 1], dtype=int32),
 'split0_test_score': array([ 0.23645415,  0.38960514,  0.44398314,  0.43380956,  0.46198182,
         0.46643296,  0.48387676,  0.48452161,  0.49848254]),
 'split0_train_score': array([ 0.25873623,  0.43174816,  0.47813957,  0.48825062,  0.50763914,
         0.51478784,  0.52760406,  0.52697517,  0.53546957]),
 'split1_test_score': array([ 0.23026568,  0.35762309,  0.39630159,  0.42994417,  0.43682736,
         0.43897526,  0.45573985,  0.44745105,  0.46333653]),
 'split1_train_score': array([ 0.26557786,  0.43169105,  0.47092311,  0.51763234,  0.52470075,
         0.52806783,  0.54517381,  0.53490598,  0.54748602]),
 'split2_test_score': array([ 0.22197804,  0.36148594,  0.42553005,  0.45059293,  0.44371494,
         0.44503108,  0.43070559,  0.48255143,  0.47327525]),
 'split2_train_score': array([ 0.2412775 ,  0.41174138,  0.45472278,  0.48815597,  0.49502386,
         0.497605  ,  0.50695756,  0.512055  ,  0.51397251]),
 'std_fit_time': array([ 0.01258047,  0.33746784,  0.59310873,  0.53371143,  0.60921623,
         0.19267317,  0.29740686,  0.5224749 ,  0.81394888]),
 'std_score_time': array([ 0.0229224 ,  0.02305377,  0.05598837,  0.11034201,  0.04290427,
         0.11777629,  0.10116711,  0.11118439,  0.06571458]),
 'std_test_score': array([ 0.00593052,  0.01425351,  0.0196309 ,  0.00896285,  0.01061374,
         0.01177869,  0.02171936,  0.01702986,  0.01479274]),
 'std_train_score': array([ 0.01023134,  0.00941785,  0.00979157,  0.01387304,  0.01216078,
         0.01247037,  0.01561856,  0.00947321,  0.01386312])}



In [ ]:



In [32]:

    
model = XGBRegressor()



In [33]:

    
max_depth = [3, 6, 9]
n_estimators = [100, 250, 500]
reg_alpha = [1e-5, 1e-3, 0.1]
reg_lambda = [1e-3, 0.1, 1]
param_grid = dict(max_depth=max_depth, n_estimators=n_estimators,
                  reg_alpha=reg_alpha, reg_lambda=reg_lambda)



In [35]:

    
grid_search = GridSearchCV(model, param_grid, n_jobs=-1, verbose=2)



In [36]:

    
result = grid_search.fit(X_train_scaled, y_train)









    



Fitting 3 folds for each of 81 candidates, totalling 243 fits
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.6s
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.7s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.3s
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.4s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.5s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.5s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.5s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.3s
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.4s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.4s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.4s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.4s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.3s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.3s
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.3s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.7s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 






    



[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  4.1min






    



[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   1.0s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.7s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.7s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.8s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   0.9s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   1.9s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   1.9s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   2.2s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   2.2s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=3 -   2.6s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   3.5s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   2.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   2.4s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=3 -   2.7s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   2.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=3 -   2.6s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   1.9s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   2.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=3 -   2.1s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   1.9s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   2.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=3 -   2.1s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   1.1s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   0.7s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   0.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   0.6s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   0.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   1.3s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   2.5s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   1.9s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   3.0s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   1.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   1.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   2.2s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   1.8s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   3.1s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   2.3s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   2.1s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   2.1s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   2.7s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   1.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   2.2s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   2.3s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   1.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   2.2s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   2.2s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   1.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   2.1s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   2.1s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   1.8s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   1.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   2.6s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   2.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   3.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   2.2s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   5.5s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   4.2s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=6 -   5.4s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   4.7s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   4.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=6 -   4.7s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   5.3s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -   4.3s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=6 -  12.7s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   6.3s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   7.0s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=6 -   7.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   5.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   4.5s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=6 -   4.9s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   4.6s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   5.4s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=6 -   4.5s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   4.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....






    



[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 56.3min






    



[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   4.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=6 -   4.3s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   4.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   4.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=6 -   4.5s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   4.8s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   1.4s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   4.4s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=6 -   5.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   1.5s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   2.1s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   1.4s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   1.4s
[CV] n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   2.0s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   1.1s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   1.3s
[CV] n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=100, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   1.1s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   1.2s
[CV] n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=100, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   1.3s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   1.1s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   1.1s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=100, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   1.2s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   4.1s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -   4.1s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.001, max_depth=9 -   4.0s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   3.8s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   3.6s
[CV] n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=250, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -   4.2s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   3.7s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   3.9s
[CV] n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=250, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -   4.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   3.7s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   4.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 
[CV]  n_estimators=250, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   5.1s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -  11.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -  11.8s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=1e-05, max_depth=9 -  14.8s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -  11.7s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -  12.9s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=1e-05, max_depth=9 -  11.8s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -  10.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -  14.1s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=1e-05, max_depth=9 -  16.6s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -  14.7s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -  19.2s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.001, max_depth=9 -  17.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -  15.0s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -  18.7s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.001, max_depth=9 -  21.7s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 -  19.0s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 -  17.6s
[CV] n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 ..
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.001, max_depth=9 -  21.5s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -  16.9s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -  17.1s
[CV] n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 ....
[CV]  n_estimators=500, reg_lambda=0.001, reg_alpha=0.1, max_depth=9 -  17.2s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -  13.5s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -  19.1s
[CV] n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 ......
[CV]  n_estimators=500, reg_lambda=0.1, reg_alpha=0.1, max_depth=9 -  21.3s
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 -  10.8s
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   9.0s
[CV]  n_estimators=500, reg_lambda=1, reg_alpha=0.1, max_depth=9 -   8.6s






    



[Parallel(n_jobs=-1)]: Done 243 out of 243 | elapsed: 134.4min finished



In [37]:

    
import cPickle as pickle



In [38]:

    
pickle.dump((grid_search, result), open( "xgb gridsearch and results.pkl", "wb" ) )



In [39]:

    
result.cv_results_









    Out[39]:





{'mean_fit_time': array([  22.47146734,   19.85001024,   18.88750442,   19.05822746,
          18.78651031,   20.00944002,   21.43022664,   23.52011832,
          18.54295969,   45.00260838,   45.99241924,   47.29698006,
          47.67425768,   43.60707927,   44.10489106,   44.84957798,
          43.919686  ,   43.47668465,   86.81721862,   86.67804432,
          91.35499477,  102.640498  ,   95.7173396 ,   96.57667494,
         102.3701551 ,   95.73338199,   94.18298801,   39.80882366,
          43.65931567,   42.42765705,   38.12971965,   38.45218102,
          38.24791106,   37.63723095,   38.28630408,   40.42925262,
         105.61946464,  108.43322961,  107.24544835,  107.80645768,
         103.35916114,   97.95467575,   97.98183091,   99.43998098,
         107.10949628,  202.54374401,  194.41516336,  204.30298233,
         313.67145197,  223.47959693,  188.01607633,  189.07257533,
         183.10826127,  187.89160872,   62.74625071,   61.75910044,
          62.5911816 ,   63.4838237 ,   64.60604366,   59.78916963,
          60.54867752,   56.1776793 ,   58.63835327,  139.06640402,
         135.61382731,  134.05349803,  134.35580373,  134.53863064,
         135.34126663,  137.74004738,  144.99287208,  143.55949601,
         298.56969301,  310.15868433,  327.98374923,  380.83669599,
         389.13507271,  444.9323643 ,  465.68078033,  399.23097968,
         318.15350103]),
 'mean_score_time': array([  0.64510202,   0.42531403,   0.36831458,   0.47510425,
          0.334831  ,   0.32499361,   0.36263768,   0.40617736,
          0.33033705,   0.83359599,   0.85212811,   0.82079935,
          0.87318301,   0.79149508,   0.81061602,   0.85486698,
          0.83743834,   0.79407867,   1.96986628,   1.9699014 ,
          2.13690694,   2.23936931,   2.51991105,   2.35435534,
          2.4416492 ,   2.04840358,   2.08888197,   0.61286894,
          0.816492  ,   0.64597535,   0.64961934,   0.62140663,
          0.62591966,   0.62220828,   0.62799899,   0.91073902,
          2.45118427,   1.96037833,   2.40257279,   2.28467162,
          2.11549822,   2.02359533,   1.99199939,   2.09874129,
          2.43012071,   5.05041567,   4.47006869,   7.42688624,
          6.80764429,   4.78142881,   4.81631398,   4.48680139,
          4.20406866,   4.8163929 ,   1.28546071,   1.23436419,
          1.23773265,   1.26887202,   1.57145294,   1.5407354 ,
          1.21871765,   1.18149567,   1.13583446,   3.95044796,
          3.75426229,   3.82978996,   3.87594604,   3.78032033,
          3.78483701,   3.86818258,   3.87522388,   4.31407698,
         12.73754597,  12.12339306,  13.60442074,  17.01043169,
         18.49543595,  19.34253867,  17.0617253 ,  17.94042699,   9.46461463]),
 'mean_test_score': array([ 0.47476509,  0.47481771,  0.47836477,  0.47476509,  0.47481771,
         0.47836477,  0.47476534,  0.47481795,  0.47836479,  0.47250483,
         0.47744062,  0.48390166,  0.47250484,  0.47744063,  0.48390166,
         0.47895337,  0.47744106,  0.48390186,  0.45305078,  0.47485295,
         0.47797076,  0.45305083,  0.47485296,  0.47797077,  0.4681252 ,
         0.47485382,  0.4779718 ,  0.43881415,  0.46167842,  0.48331064,
         0.43881423,  0.46167846,  0.48331065,  0.44369675,  0.46168283,
         0.48331197,  0.35605434,  0.38107234,  0.44572734,  0.35605463,
         0.38107256,  0.4457274 ,  0.37149349,  0.38264133,  0.4445941 ,
         0.26132655,  0.26890554,  0.3989032 ,  0.261327  ,  0.26890589,
         0.3989033 ,  0.30566842,  0.29996746,  0.39139257,  0.36714381,
         0.39205064,  0.40212936,  0.36714405,  0.39206942,  0.40212902,
         0.37245284,  0.38867676,  0.40326716,  0.27551112,  0.31712893,
         0.35460872,  0.27551179,  0.32270191,  0.35407602,  0.28001021,
         0.30735607,  0.35042325,  0.21114584,  0.23148839,  0.31036296,
         0.20403695,  0.23413726,  0.30455753,  0.20892431,  0.22948024,
         0.30888958]),
 'mean_train_score': array([ 0.53234346,  0.53233605,  0.53230937,  0.53234346,  0.53233605,
         0.53230937,  0.53234342,  0.53233601,  0.53230933,  0.57047365,
         0.57112058,  0.57142654,  0.57047365,  0.57112057,  0.57142654,
         0.57053486,  0.57112051,  0.57142648,  0.59332666,  0.59350791,
         0.59318737,  0.59332666,  0.5935079 ,  0.59318737,  0.59334044,
         0.59350778,  0.59318726,  0.63510804,  0.6338578 ,  0.63299249,
         0.63510803,  0.6338578 ,  0.63299248,  0.63527208,  0.6338571 ,
         0.63299202,  0.66823407,  0.66803645,  0.6642309 ,  0.66823405,
         0.66803643,  0.66423089,  0.66835504,  0.66824809,  0.66412709,
         0.70165232,  0.70107866,  0.69507789,  0.70165228,  0.70107863,
         0.69507787,  0.7013474 ,  0.70088766,  0.69564498,  0.73353895,
         0.73017286,  0.72482406,  0.7335389 ,  0.73017308,  0.72482403,
         0.73325362,  0.72999212,  0.72706969,  0.77799831,  0.77652584,
         0.76795271,  0.7779982 ,  0.77764112,  0.76779032,  0.78021514,
         0.77756907,  0.76924628,  0.82742815,  0.82474962,  0.81420268,
         0.82730749,  0.82604533,  0.8141921 ,  0.82947992,  0.8257738 ,
         0.81282365]),
 'param_max_depth': masked_array(data = [3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 6 6 6 6 6 6 6 6 6 6
  6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
  9 9 9 9 9 9 9],
              mask = [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False],
        fill_value = ?),
 'param_n_estimators': masked_array(data = [100 100 100 100 100 100 100 100 100 250 250 250 250 250 250 250 250 250
  500 500 500 500 500 500 500 500 500 100 100 100 100 100 100 100 100 100
  250 250 250 250 250 250 250 250 250 500 500 500 500 500 500 500 500 500
  100 100 100 100 100 100 100 100 100 250 250 250 250 250 250 250 250 250
  500 500 500 500 500 500 500 500 500],
              mask = [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False],
        fill_value = ?),
 'param_reg_alpha': masked_array(data = [1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001
  0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1
  1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001
  0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1
  1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001
  0.001 0.001 0.1 0.1 0.1 1e-05 1e-05 1e-05 0.001 0.001 0.001 0.1 0.1 0.1],
              mask = [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False],
        fill_value = ?),
 'param_reg_lambda': masked_array(data = [0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1
  0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1
  0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1
  0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1 0.001 0.1 1
  0.001 0.1 1 0.001 0.1 1 0.001 0.1 1],
              mask = [False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False False False False
  False False False False False False False False False],
        fill_value = ?),
 'params': ({'max_depth': 3,
   'n_estimators': 100,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 100,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 250,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 250,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 500,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 3,
   'n_estimators': 500,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 3, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 100,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 100,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 250,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 250,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 500,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 6,
   'n_estimators': 500,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 6, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 100,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 100,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 100, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 250,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 250,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 250, 'reg_alpha': 0.1, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 500,
   'reg_alpha': 1e-05,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 1e-05, 'reg_lambda': 1},
  {'max_depth': 9,
   'n_estimators': 500,
   'reg_alpha': 0.001,
   'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.001, 'reg_lambda': 1},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.001},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 0.1},
  {'max_depth': 9, 'n_estimators': 500, 'reg_alpha': 0.1, 'reg_lambda': 1}),
 'rank_test_score': array([25, 22, 10, 24, 21,  9, 23, 20,  8, 27, 16,  3, 26, 15,  2,  7, 14,
         1, 33, 19, 13, 32, 18, 12, 28, 17, 11, 39, 31,  6, 38, 30,  5, 37,
        29,  4, 57, 51, 35, 56, 50, 34, 53, 49, 36, 75, 73, 44, 74, 72, 43,
        66, 68, 47, 55, 46, 41, 54, 45, 42, 52, 48, 40, 71, 62, 58, 70, 61,
        59, 69, 65, 60, 79, 77, 63, 81, 76, 67, 80, 78, 64], dtype=int32),
 'split0_test_score': array([ 0.49474562,  0.4947435 ,  0.49848254,  0.49474562,  0.4947435 ,
         0.49848254,  0.49474561,  0.49474349,  0.49848253,  0.51155364,
         0.51519969,  0.51491556,  0.51155365,  0.5151997 ,  0.51491556,
         0.51155414,  0.51520025,  0.51491582,  0.51140193,  0.51334442,
         0.51074804,  0.51140195,  0.51334443,  0.51074807,  0.51140374,
         0.51334641,  0.51075093,  0.49165984,  0.49945349,  0.50725681,
         0.49165994,  0.49945353,  0.50725682,  0.49167024,  0.49945839,
         0.50725825,  0.45872446,  0.45993189,  0.48671092,  0.4587247 ,
         0.45993212,  0.48671099,  0.4587478 ,  0.45995494,  0.48671808,
         0.36925464,  0.39665942,  0.45589877,  0.36925507,  0.39665972,
         0.4558989 ,  0.39324021,  0.41973453,  0.45148489,  0.43334863,
         0.44487705,  0.44699823,  0.43334879,  0.44487712,  0.44699828,
         0.43662048,  0.45247234,  0.45897372,  0.33860778,  0.39140448,
         0.42587786,  0.33860837,  0.39157033,  0.4231349 ,  0.37165177,
         0.39551687,  0.43416741,  0.28393352,  0.2923014 ,  0.40590884,
         0.26260459,  0.29192227,  0.38489991,  0.29430021,  0.33147511,
         0.39738151]),
 'split0_train_score': array([ 0.53443315,  0.53442782,  0.53546957,  0.53443315,  0.53442781,
         0.53546957,  0.53443311,  0.53442778,  0.53546954,  0.57421078,
         0.57615405,  0.57593423,  0.57421078,  0.57615405,  0.57593423,
         0.57421072,  0.576154  ,  0.57593419,  0.59912142,  0.59948991,
         0.59893569,  0.59912141,  0.59948991,  0.59893569,  0.59912131,
         0.59948982,  0.5989356 ,  0.64202307,  0.64048347,  0.63969532,
         0.64202307,  0.64048346,  0.63969532,  0.6420224 ,  0.64048288,
         0.63969489,  0.6761729 ,  0.6750221 ,  0.67121003,  0.67617287,
         0.67502208,  0.67121002,  0.67617103,  0.67502044,  0.67120888,
         0.70817042,  0.70789133,  0.70282594,  0.70817038,  0.7078913 ,
         0.70282591,  0.70872647,  0.70776006,  0.70405758,  0.7386827 ,
         0.73612474,  0.7316606 ,  0.73868265,  0.73612469,  0.73166057,
         0.73821941,  0.7360215 ,  0.73365953,  0.78451879,  0.78412555,
         0.77380276,  0.78451868,  0.78434793,  0.77427044,  0.78843261,
         0.78302777,  0.77361169,  0.83273399,  0.83139538,  0.8192197 ,
         0.83237237,  0.83112358,  0.81774209,  0.83618019,  0.82865958,
         0.81730153]),
 'split1_test_score': array([ 0.46470655,  0.46470555,  0.46333653,  0.46470655,  0.46470555,
         0.46333653,  0.46470654,  0.46470553,  0.46333652,  0.48478241,
         0.48386656,  0.48454846,  0.48478241,  0.48386656,  0.48454846,
         0.4847824 ,  0.48386655,  0.48454846,  0.48402567,  0.48087675,
         0.48347694,  0.48402567,  0.48087675,  0.48347694,  0.48402571,
         0.48087679,  0.48347698,  0.48519849,  0.48725037,  0.48585777,
         0.48519849,  0.48725038,  0.48585777,  0.48519893,  0.48725115,
         0.48585804,  0.46137887,  0.46710904,  0.46621289,  0.46137889,
         0.46710906,  0.4662129 ,  0.45924851,  0.46864208,  0.46621381,
         0.4339561 ,  0.44320544,  0.4411906 ,  0.43395615,  0.44320549,
         0.44119063,  0.43586745,  0.43973808,  0.44418094,  0.43863514,
         0.44714133,  0.45598225,  0.43863526,  0.44714138,  0.45598106,
         0.43312991,  0.44692783,  0.45469225,  0.41004252,  0.41883592,
         0.4313145 ,  0.41004272,  0.41883603,  0.43245912,  0.40508357,
         0.41363066,  0.43391615,  0.38284684,  0.39217874,  0.40497338,
         0.3828471 ,  0.39271342,  0.40856566,  0.38132134,  0.39065795,
         0.41280208]),
 'split1_train_score': array([ 0.54778305,  0.54777428,  0.54748602,  0.54778305,  0.54777428,
         0.54748602,  0.54778302,  0.54777425,  0.54748599,  0.58982963,
         0.5894682 ,  0.59121695,  0.58982963,  0.5894682 ,  0.59121695,
         0.58982957,  0.58946814,  0.5912169 ,  0.61638981,  0.61647412,
         0.61606183,  0.6163898 ,  0.61647412,  0.61606182,  0.61638968,
         0.616474  ,  0.61606172,  0.65788031,  0.65564431,  0.65611787,
         0.6578803 ,  0.6556443 ,  0.65611786,  0.65787959,  0.65564363,
         0.65611746,  0.6912664 ,  0.69166357,  0.68768399,  0.69126638,
         0.69166355,  0.68768397,  0.69057456,  0.69154044,  0.68768289,
         0.72473959,  0.72251788,  0.71870908,  0.72473955,  0.72251785,
         0.71870906,  0.72355782,  0.72394925,  0.71750637,  0.75349928,
         0.75059864,  0.74411467,  0.75349922,  0.75059859,  0.74411464,
         0.75337096,  0.74877678,  0.7494593 ,  0.79506945,  0.79360631,
         0.78468385,  0.79506936,  0.79360623,  0.78372906,  0.79546223,
         0.79504327,  0.79013929,  0.84016911,  0.83991614,  0.82806248,
         0.84016893,  0.83964441,  0.82950846,  0.84232836,  0.84010113,
         0.83015236]),
 'split2_test_score': array([ 0.46484309,  0.46500409,  0.47327525,  0.46484309,  0.46500409,
         0.47327525,  0.46484387,  0.46500484,  0.47327532,  0.42117845,
         0.43325562,  0.45224095,  0.42117846,  0.43325563,  0.45224095,
         0.44052359,  0.43325637,  0.4522413 ,  0.36372474,  0.4303377 ,
         0.43968731,  0.36372487,  0.43033771,  0.43968731,  0.40894615,
         0.43033827,  0.4396875 ,  0.33958412,  0.39833139,  0.45681733,
         0.33958424,  0.39833147,  0.45681735,  0.35422108,  0.39833894,
         0.45681961,  0.14805969,  0.2161761 ,  0.3842582 ,  0.1480603 ,
         0.21617651,  0.3842583 ,  0.19648415,  0.21932699,  0.38085041,
        -0.01923108, -0.03314824,  0.29962022, -0.01923021, -0.03314753,
         0.29962036,  0.0878976 ,  0.04042977,  0.27851188,  0.22944764,
         0.28413355,  0.30340759,  0.22944811,  0.28418975,  0.30340771,
         0.24760812,  0.26663011,  0.2961355 ,  0.07788307,  0.1411464 ,
         0.20663378,  0.07788428,  0.15769938,  0.20663405,  0.06329527,
         0.11292069,  0.18318618, -0.03334283,  0.00998503,  0.12020665,
        -0.03334083,  0.0177761 ,  0.12020701, -0.04884863, -0.03369236,
         0.11648514]),
 'split2_train_score': array([ 0.51481418,  0.51480605,  0.51397251,  0.51481418,  0.51480605,
         0.51397251,  0.51481414,  0.51480601,  0.51397247,  0.54738053,
         0.54773948,  0.54712843,  0.54738053,  0.54773948,  0.54712843,
         0.54756428,  0.5477394 ,  0.54712836,  0.56446877,  0.56455968,
         0.5645646 ,  0.56446876,  0.56455968,  0.5645646 ,  0.56451033,
         0.56455951,  0.56456445,  0.60542074,  0.60544563,  0.60316427,
         0.60542073,  0.60544562,  0.60316426,  0.60591425,  0.6054448 ,
         0.6031637 ,  0.6372629 ,  0.63742369,  0.63379867,  0.63726288,
         0.63742366,  0.63379866,  0.63831952,  0.63818338,  0.63348951,
         0.67204694,  0.67282678,  0.66369865,  0.6720469 ,  0.67282674,
         0.66369862,  0.6717579 ,  0.67095368,  0.665371  ,  0.70843488,
         0.7037952 ,  0.6986969 ,  0.70843481,  0.70379597,  0.69869687,
         0.70817048,  0.70517808,  0.69809023,  0.75440669,  0.75184564,
         0.74537152,  0.75440657,  0.7549692 ,  0.74537145,  0.75675059,
         0.75463619,  0.74398786,  0.80938134,  0.80293733,  0.79532585,
         0.80938117,  0.80736799,  0.79532575,  0.80993121,  0.80856069,
         0.79101705]),
 'std_fit_time': array([  2.99713593e-01,   1.45161885e+00,   8.23305642e-02,
          4.77228656e-02,   1.14599278e-02,   9.10339186e-01,
          1.30985559e+00,   1.28078482e-01,   5.80239798e-02,
          1.49698147e-01,   5.09727484e-01,   1.18158731e+00,
          2.06953935e-01,   2.91895303e-01,   4.73679049e-01,
          1.50929430e-01,   1.85554288e-01,   1.67375845e-01,
          2.09170227e-01,   8.60335677e-01,   4.42216335e+00,
          6.36690827e-01,   1.27684095e+00,   1.18541026e+00,
          5.02413311e-01,   1.13041931e+00,   8.17649112e-01,
          5.49341966e-01,   1.81196674e+00,   2.61398226e+00,
          6.54316157e-02,   4.11409483e-01,   2.34180846e-01,
          4.26086167e-01,   7.82664854e-01,   5.28110505e-01,
          4.24322978e-01,   2.43857535e+00,   8.10478530e-01,
          1.44150804e+00,   1.14971375e-01,   3.76105824e+00,
          1.58316693e+00,   3.27741342e-01,   1.30915286e+00,
          2.47151014e+00,   4.26324758e+00,   1.19374073e+01,
          3.49852112e+00,   3.96843910e+01,   3.01803875e+00,
          3.48924140e-01,   1.71577176e+00,   2.06242688e+00,
          1.95720131e+00,   3.63089410e-01,   1.65008413e-01,
          7.98189421e-01,   8.76426975e-01,   3.07840540e+00,
          7.32344973e-01,   5.56713539e-01,   4.02997792e-01,
          1.24116207e+00,   3.11765142e+00,   8.83974596e-01,
          3.59716483e-01,   1.40035883e+00,   1.41651323e-01,
          2.21443961e+00,   1.79963028e-01,   4.87691295e-01,
          3.64960171e+00,   4.10403747e+00,   1.73023177e+01,
          2.37619574e+00,   1.70580339e+00,   3.39207293e+01,
          2.39839741e+01,   9.43313040e-01,   3.71276827e+01]),
 'std_score_time': array([ 0.03477721,  0.13213481,  0.05241081,  0.01453639,  0.00579445,
         0.00553128,  0.04102249,  0.03178402,  0.01755374,  0.07724467,
         0.06059319,  0.02871818,  0.06942251,  0.04319767,  0.06491631,
         0.05446811,  0.06071537,  0.05083732,  0.03220441,  0.02057478,
         0.07976481,  0.25569367,  0.70822144,  0.28879692,  0.21550601,
         0.0779901 ,  0.1608823 ,  0.01388435,  0.22605973,  0.01499108,
         0.04621139,  0.02562523,  0.04154988,  0.05081106,  0.03163774,
         0.2917878 ,  0.47096391,  0.13466506,  0.52484685,  0.28712639,
         0.2180192 ,  0.20107782,  0.14147027,  0.32799384,  0.47030536,
         0.58448209,  0.30056085,  3.72115841,  0.34811232,  0.21413475,
         0.40288007,  0.1144288 ,  0.18721975,  0.31894268,  0.06309075,
         0.06240809,  0.03923366,  0.13834726,  0.37960482,  0.36731932,
         0.05455984,  0.08971731,  0.02544776,  0.1625441 ,  0.17493712,
         0.11906818,  0.15546693,  0.22220849,  0.16679494,  0.24653544,
         0.11245885,  0.57521304,  1.47198121,  0.54999468,  2.68216352,
         1.82000625,  2.74997058,  1.6299797 ,  0.12690978,  3.29420102,
         0.94589519]),
 'std_test_score': array([ 0.01412848,  0.01409019,  0.01479274,  0.01412848,  0.01409018,
         0.01479274,  0.0141283 ,  0.01409001,  0.01479274,  0.03790315,
         0.0337607 ,  0.02559089,  0.03790315,  0.0337607 ,  0.02559089,
         0.02928957,  0.03376058,  0.02559085,  0.06414422,  0.03415399,
         0.02927053,  0.06414416,  0.034154  ,  0.02927054,  0.04331289,
         0.0341545 ,  0.02927152,  0.07021579,  0.04506931,  0.02067045,
         0.07021576,  0.04506928,  0.02067045,  0.06332399,  0.04506728,
         0.02067004,  0.14707842,  0.11663606,  0.04426348,  0.14707819,
         0.11663592,  0.04426346,  0.12375046,  0.11553513,  0.0458443 ,
         0.20013497,  0.21442792,  0.07045999,  0.20013465,  0.21442766,
         0.07045996,  0.15496745,  0.18370247,  0.07987438,  0.09738981,
         0.0763145 ,  0.06990312,  0.09738965,  0.07628804,  0.06990277,
         0.08829004,  0.08632969,  0.07577368,  0.14275472,  0.12494135,
         0.10465762,  0.14275431,  0.11720419,  0.10432669,  0.1538472 ,
         0.13768531,  0.11825451,  0.17753308,  0.16184699,  0.13446136,
         0.17488229,  0.15842733,  0.13071305,  0.18570287,  0.18765307,
         0.13619605]),
 'std_train_score': array([ 0.01354036,  0.01354025,  0.01386312,  0.01354036,  0.01354025,
         0.01386312,  0.01354036,  0.01354025,  0.01386312,  0.01753009,
         0.01740351,  0.01827911,  0.01753009,  0.01740351,  0.01827911,
         0.01744941,  0.01740352,  0.01827912,  0.02158909,  0.02161196,
         0.02141298,  0.02158909,  0.02161196,  0.02141298,  0.02157051,
         0.02161199,  0.021413  ,  0.02196763,  0.02102223,  0.02213168,
         0.02196763,  0.02102223,  0.02213168,  0.0217451 ,  0.02102231,
         0.02213175,  0.02275029,  0.0226876 ,  0.02254533,  0.02275029,
         0.0226876 ,  0.02254533,  0.02203731,  0.0223031 ,  0.02268398,
         0.02199989,  0.02085043,  0.02311653,  0.0219999 ,  0.02085043,
         0.02311653,  0.02178143,  0.02217438,  0.02209982,  0.01875355,
         0.01956543,  0.01916154,  0.01875356,  0.01956507,  0.01916155,
         0.01878413,  0.01830258,  0.02148278,  0.0172289 ,  0.01787559,
         0.01657372,  0.01722891,  0.01647101,  0.01631606,  0.01683831,
         0.01694168,  0.01909243,  0.01311706,  0.01581102,  0.0138275 ,
         0.01306934,  0.01365731,  0.014179  ,  0.01404906,  0.01303702,
         0.01628766])}



In [41]:

    
result.best_estimator_









    Out[41]:





XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='reg:linear', reg_alpha=0.1, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)



In [40]:

    
grid_search.score(X_va_scaled, y_va)









    Out[40]:





0.52178018832808326

Try XGBoost on non-scaled data

Turns out this works better



In [42]:

    
xgb = XGBRegressor(n_estimators=250, reg_alpha=0.1)



In [43]:

    
xgb.fit(X_train, y_train)









    Out[43]:





XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='reg:linear', reg_alpha=0.1, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)



In [44]:

    
xgb.score(X_va, y_va)









    Out[44]:





0.54909280212343337



In [46]:

    
y_pr = xgb.predict(X_va)



In [47]:

    
y_xgb_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))



In [50]:

    
y_xgb_resids.loc[:,'residuals'] = y_xgb_resids.loc[:,'y_pr'] - y_xgb_resids.loc[:,'Gen Change (MW)']



In [51]:

    
g = sns.FacetGrid(y_xgb_resids, hue='cluster', col='cluster',
                  col_wrap=3)
g.map(plt.scatter, 'y_pr', 'residuals')
g.add_legend()









    Out[51]:





<seaborn.axisgrid.FacetGrid at 0x115d8ea90>






    Out[51]:





<seaborn.axisgrid.FacetGrid at 0x115d8ea90>



In [52]:

    
y_xgb_resids.describe()









    Out[52]:






  
    
      
      Gen Change (MW)
      cluster
      y_pr
      residuals
    
  
  
    
      count
      105264.000000
      105264.000000
      105264.000000
      105264.000000
    
    
      mean
      0.072684
      2.500000
      -4.463920
      -4.536561
    
    
      std
      252.875892
      1.707833
      179.048598
      169.744599
    
    
      min
      -2236.000000
      0.000000
      -1508.438721
      -1661.269684
    
    
      25%
      -54.000000
      1.000000
      -71.290184
      -66.515215
    
    
      50%
      0.000000
      2.500000
      3.031837
      -1.428239
    
    
      75%
      55.000000
      4.000000
      62.890798
      58.265282
    
    
      max
      3104.000000
      5.000000
      1829.738892
      1646.675659

Try ratio of fuel prices



In [54]:

    
X_train.columns









    Out[54]:





Index([u'nameplate_capacity', u'GROSS LOAD (MW)', u'ERCOT Load, MW',
       u'Total Wind Installed, MW', u'Total Wind Output, MW',
       u'Net Load Change (MW)', u'NG Price ($/mcf)', u'All coal', u'Lignite',
       u'Subbituminous', u'cluster_0', u'cluster_1', u'cluster_2',
       u'cluster_3', u'cluster_4', u'cluster_5'],
      dtype='object')



In [66]:

    
X_train_ratio = X_train.copy()
X_va_ratio = X_va.copy()
for fuel in ['All coal', 'Lignite', 'Subbituminous']:
    X_train_ratio.loc[:,fuel] = X_train_ratio.loc[:,fuel].values/X_train_ratio.loc[:,'NG Price ($/mcf)'].values
    X_va_ratio.loc[:,fuel] = X_va.loc[:,fuel]/X_va.loc[:,'NG Price ($/mcf)']
    
X_train_ratio.drop('NG Price ($/mcf)', axis=1, inplace=True)
X_va_ratio.drop('NG Price ($/mcf)', axis=1, inplace=True)



In [67]:

    
X_train.head()









    Out[67]:






  
    
      
      nameplate_capacity
      GROSS LOAD (MW)
      ERCOT Load, MW
      Total Wind Installed, MW
      Total Wind Output, MW
      Net Load Change (MW)
      NG Price ($/mcf)
      All coal
      Lignite
      Subbituminous
      cluster_0
      cluster_1
      cluster_2
      cluster_3
      cluster_4
      cluster_5
    
  
  
    
      0
      5949.0
      4596.0
      30428.0
      2790.0
      1074.0
      0.0
      6.42
      25.1475
      20.0275
      28.115
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      5949.0
      4566.0
      30133.0
      2790.0
      922.6
      -143.6
      6.42
      25.1475
      20.0275
      28.115
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      5949.0
      4667.0
      29941.0
      2790.0
      849.2
      -118.6
      6.42
      25.1475
      20.0275
      28.115
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      5949.0
      4668.0
      29949.0
      2790.0
      1056.3
      -199.1
      6.42
      25.1475
      20.0275
      28.115
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      4
      5949.0
      4685.0
      30248.0
      2790.0
      837.1
      518.2
      6.42
      25.1475
      20.0275
      28.115
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0



In [68]:

    
X_train_ratio.head()









    Out[68]:






  
    
      
      nameplate_capacity
      GROSS LOAD (MW)
      ERCOT Load, MW
      Total Wind Installed, MW
      Total Wind Output, MW
      Net Load Change (MW)
      All coal
      Lignite
      Subbituminous
      cluster_0
      cluster_1
      cluster_2
      cluster_3
      cluster_4
      cluster_5
    
  
  
    
      0
      5949.0
      4596.0
      30428.0
      2790.0
      1074.0
      0.0
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      5949.0
      4566.0
      30133.0
      2790.0
      922.6
      -143.6
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      5949.0
      4667.0
      29941.0
      2790.0
      849.2
      -118.6
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      5949.0
      4668.0
      29949.0
      2790.0
      1056.3
      -199.1
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      4
      5949.0
      4685.0
      30248.0
      2790.0
      837.1
      518.2
      3.917056
      3.119548
      4.379283
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0



In [71]:

    
xgb_ratio = XGBRegressor(n_estimators=250, reg_alpha=0.1)



In [72]:

    
xgb_ratio.fit(X_train_ratio, y_train)









    Out[72]:





XGBRegressor(base_score=0.5, colsample_bylevel=1, colsample_bytree=1, gamma=0,
       learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='reg:linear', reg_alpha=0.1, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)



In [73]:

    
xgb_ratio.score(X_va_ratio, y_va)









    Out[73]:





0.55063694186589407



In [75]:

    
y_pr = xgb_ratio.predict(X_va_ratio)



In [76]:

    
y_xgb_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))



In [77]:

    
y_xgb_resids.loc[:,'residuals'] = y_xgb_resids.loc[:,'y_pr'] - y_xgb_resids.loc[:,'Gen Change (MW)']



In [78]:

    
g = sns.FacetGrid(y_xgb_resids, hue='cluster', col='cluster',
                  col_wrap=3)
g.map(plt.scatter, 'y_pr', 'residuals')
g.add_legend()









    Out[78]:





<seaborn.axisgrid.FacetGrid at 0x11632a810>






    Out[78]:





<seaborn.axisgrid.FacetGrid at 0x11632a810>



In [79]:

    
y_xgb_resids.describe()









    Out[79]:






  
    
      
      Gen Change (MW)
      cluster
      y_pr
      residuals
    
  
  
    
      count
      105264.000000
      105264.000000
      105264.000000
      105264.000000
    
    
      mean
      0.072684
      2.500000
      2.518387
      2.445684
    
    
      std
      252.875892
      1.707833
      181.456628
      169.496567
    
    
      min
      -2236.000000
      0.000000
      -1515.943848
      -1643.027626
    
    
      25%
      -54.000000
      1.000000
      -64.903372
      -60.947704
    
    
      50%
      0.000000
      2.500000
      9.577057
      4.558166
    
    
      75%
      55.000000
      4.000000
      69.535152
      64.237963
    
    
      max
      3104.000000
      5.000000
      1779.647217
      1636.885956



In [81]:

    
from xgboost import plot_importance



In [82]:

    
plot_importance(xgb_ratio)









    Out[82]:





<matplotlib.axes._subplots.AxesSubplot at 0x11b42b790>



In [ ]:

Linear Regression (OLS) with ratio of fuel prices

Slight improvement from first OLS try, but still nowhere near as good as gradient boosting



In [83]:

    
lm = LinearRegression(normalize=True)
lm.fit(X_train_ratio, y_train)









    Out[83]:





LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=True)



In [84]:

    
lm.score(X_va_ratio, y_va)









    Out[84]:





0.26965150769180002



In [24]:

    
y_pr = lm.predict(X_va_scaled)



In [41]:

    
y_va.values.shape, y_pr.shape, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values.shape









    Out[41]:





((105266,), (105264,), (105264,))



In [25]:

    
y_lm_resids = pd.DataFrame(dict(zip(['Gen Change (MW)', 'y_pr', 'cluster'],
                               [y_va.values, y_pr, X.loc[X['Year'].isin([2012, 2013]),'cluster'].values])))
# y_lm_resids['y_pr'] = y_pr
# y_lm_resids['cluster'] = X.loc[:,'cluster']



In [26]:

    
y_lm_resids.head()









    Out[26]:






  
    
      
      Gen Change (MW)
      cluster
      y_pr
    
  
  
    
      0
      0.0
      0
      -61.176003
    
    
      1
      1.0
      0
      -20.166237
    
    
      2
      -1.0
      0
      -17.027565
    
    
      3
      0.0
      0
      -14.052956
    
    
      4
      0.0
      0
      21.281029



In [27]:

    
y_lm_resids.loc[:,'residuals'] = y_lm_resids.loc[:,'y_pr'] - y_lm_resids.loc[:,'Gen Change (MW)']



In [29]:

    
g = sns.FacetGrid(y_lm_resids, hue='cluster', col='cluster',
                  col_wrap=3)
g.map(plt.scatter, 'Gen Change (MW)', 'residuals')
g.add_legend()









    Out[29]:





<seaborn.axisgrid.FacetGrid at 0x116244110>






    Out[29]:





<seaborn.axisgrid.FacetGrid at 0x116244110>



In [ ]:

	Year	nameplate_capacity	DATETIME	GROSS LOAD (MW)	ERCOT Load, MW	Total Wind Installed, MW	Total Wind Output, MW	Wind Output, % of Installed	Wind Output, % of Load	1-hr MW change	1-hr % change	Net Load (MW)	Net Load Change (MW)	Month	NG Price ($/mcf)	All coal	Lignite	Subbituminous
0	2007	13263.0	2007-01-01 00:00:00	670.0	30428.0	2790.0	1074.0	38.494624	3.529644	NaN	NaN	29354.0	NaN	1	6.42	25.1475	20.0275	28.115
1	2007	13263.0	2007-01-01 01:00:00	492.0	30133.0	2790.0	922.6	33.068100	3.061760	-151.4	-14.096834	29210.4	-143.6	1	6.42	25.1475	20.0275	28.115
2	2007	13263.0	2007-01-01 02:00:00	461.0	29941.0	2790.0	849.2	30.437276	2.836245	-73.4	-7.955777	29091.8	-118.6	1	6.42	25.1475	20.0275	28.115
3	2007	13263.0	2007-01-01 03:00:00	417.0	29949.0	2790.0	1056.3	37.860215	3.526996	207.1	24.387659	28892.7	-199.1	1	6.42	25.1475	20.0275	28.115
4	2007	13263.0	2007-01-01 04:00:00	677.0	30248.0	2790.0	837.1	30.003584	2.767456	-219.2	-20.751680	29410.9	518.2	1	6.42	25.1475	20.0275	28.115

	cluster	Year	nameplate_capacity	DATETIME	GROSS LOAD (MW)	ERCOT Load, MW	Total Wind Installed, MW	Total Wind Output, MW	Wind Output, % of Installed	Wind Output, % of Load	...	Month	All coal	Lignite	Subbituminous	cluster_5
473329	5	2015	10014.8	2015-12-31 19:00:00	3625.0	39908.77734	16170.0	3824.932373	23.654498	9.584188	...	12	12.793722	10.780269	14.596413	1.0
473330	5	2015	10014.8	2015-12-31 20:00:00	3319.0	38736.85938	16170.0	4625.632813	28.606264	11.941166	...	12	12.793722	10.780269	14.596413	1.0
473331	5	2015	10014.8	2015-12-31 21:00:00	3332.0	37587.70313	16170.0	4957.714844	30.659956	13.189725	...	12	12.793722	10.780269	14.596413	1.0
473332	5	2015	10014.8	2015-12-31 22:00:00	3214.0	36356.26172	16170.0	4699.097656	29.060592	12.925140	...	12	12.793722	10.780269	14.596413	1.0
473333	5	2015	10014.8	2015-12-31 23:00:00	3173.0	35150.33984	16170.0	4313.125000	26.673624	12.270507	...	12	12.793722	10.780269	14.596413	1.0

	Gen Change (MW)	y_pr
0	0.0	-61.176003
1	1.0	-20.166237
2	-1.0	-17.027565
3	0.0	-14.052956
4	0.0	21.281029

	Gen Change (MW)	cluster	y_pr	residuals
count	105264.000000	105264.000000	105264.000000	105264.000000
mean	0.072684	2.500000	-4.463920	-4.536561
std	252.875892	1.707833	179.048598	169.744599
min	-2236.000000	0.000000	-1508.438721	-1661.269684
25%	-54.000000	1.000000	-71.290184	-66.515215
50%	0.000000	2.500000	3.031837	-1.428239
75%	55.000000	4.000000	62.890798	58.265282
max	3104.000000	5.000000	1829.738892	1646.675659

	nameplate_capacity	GROSS LOAD (MW)	ERCOT Load, MW	Total Wind Installed, MW	Total Wind Output, MW	Net Load Change (MW)	NG Price ($/mcf)	All coal	Lignite	Subbituminous	cluster_0
0	5949.0	4596.0	30428.0	2790.0	1074.0	0.0	6.42	25.1475	20.0275	28.115	1.0
1	5949.0	4566.0	30133.0	2790.0	922.6	-143.6	6.42	25.1475	20.0275	28.115	1.0
2	5949.0	4667.0	29941.0	2790.0	849.2	-118.6	6.42	25.1475	20.0275	28.115	1.0
3	5949.0	4668.0	29949.0	2790.0	1056.3	-199.1	6.42	25.1475	20.0275	28.115	1.0
4	5949.0	4685.0	30248.0	2790.0	837.1	518.2	6.42	25.1475	20.0275	28.115	1.0