BayesSearchCV

  • skopt
  • pip3 install scikit-optimize

BayesSearchCV implements a "fit" and a "score" method. It also implements "predict", "predict_proba", "decision_function", "transform" and "inverse_transform" if they are implemented in the estimator used.

The parameters of the estimator used to apply these methods are optimized by cross-validated search over parameter settings.

In contrast to GridSearchCV, not all parameter values are tried out, but rather a fixed number of parameter settings is sampled from the specified distributions. The number of parameter settings that are tried is given by n_iter.(n_iter!)


In [16]:
import pandas as pd
import numpy as np
import xgboost as xgb
import lightgbm as lgb
from skopt import BayesSearchCV
from sklearn.model_selection import StratifiedKFold, KFold

In [2]:
%config InlineBackend.figure_format = 'retina'

In [3]:
ITERATIONS = 10 # 1000
TRAINING_SIZE = 100000 # 20000000
TEST_SIZE = 25000

# Load data
X = pd.read_csv(
    './data/train_sample.csv', 
    nrows=TRAINING_SIZE,
    parse_dates=['click_time']
)

# Split into X and y
y = X['is_attributed']
X = X.drop(['click_time','is_attributed', 'attributed_time'], axis=1)

XGBoost


In [4]:
# Classifier
bayes_cv_tuner = BayesSearchCV(
    estimator = xgb.XGBClassifier(
        n_jobs = 1,
        objective = 'binary:logistic',
        eval_metric = 'auc',
        silent=1,
        tree_method='approx'
    ),
    search_spaces = {
        'learning_rate': (0.01, 1.0, 'log-uniform'),
        'min_child_weight': (0, 10),
        'max_depth': (0, 50),
        'max_delta_step': (0, 20),
        'subsample': (0.01, 1.0, 'uniform'),
        'colsample_bytree': (0.01, 1.0, 'uniform'),
        'colsample_bylevel': (0.01, 1.0, 'uniform'),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'gamma': (1e-9, 0.5, 'log-uniform'),
        'min_child_weight': (0, 5),
        'n_estimators': (50, 100),
        'scale_pos_weight': (1e-6, 500, 'log-uniform')
    },    
    scoring = 'roc_auc',
    cv = StratifiedKFold(
        n_splits=3,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 3,
    n_iter = ITERATIONS,   
    verbose = 0,
    refit = True,
    random_state = 42
)

def status_print(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner.best_params_)
    print('Model #{}\nBest ROC-AUC: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(bayes_cv_tuner.best_score_, 4),
        bayes_cv_tuner.best_params_
    ))
    
    # Save all model results
    clf_name = bayes_cv_tuner.estimator.__class__.__name__
    all_models.to_csv(clf_name+"_cv_results.csv")

In [5]:
xgb_result = bayes_cv_tuner.fit(X.values, y.values, callback=status_print)


Model #1
Best ROC-AUC: 0.5
Best params: {'colsample_bylevel': 0.4160029192647807, 'colsample_bytree': 0.7304484857455519, 'gamma': 0.13031389926541354, 'learning_rate': 0.042815319280763466, 'max_delta_step': 13, 'max_depth': 21, 'min_child_weight': 2, 'n_estimators': 87, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'scale_pos_weight': 0.060830282487222144, 'subsample': 0.13556548021189216}

Model #2
Best ROC-AUC: 0.9279
Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 68, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}

Model #3
Best ROC-AUC: 0.9279
Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 68, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}

Model #4
Best ROC-AUC: 0.9279
Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 68, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}

Model #5
Best ROC-AUC: 0.9279
Best params: {'colsample_bylevel': 0.8390144719977516, 'colsample_bytree': 0.8844821246070537, 'gamma': 4.358684608480795e-07, 'learning_rate': 0.7988179462781242, 'max_delta_step': 17, 'max_depth': 3, 'min_child_weight': 1, 'n_estimators': 68, 'reg_alpha': 0.0005266983003701547, 'reg_lambda': 276.5424475574225, 'scale_pos_weight': 0.3016410771843142, 'subsample': 0.9923710598637134}

Model #6
Best ROC-AUC: 0.9438
Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}

Model #7
Best ROC-AUC: 0.9438
Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}

Model #8
Best ROC-AUC: 0.9438
Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}

Model #9
Best ROC-AUC: 0.9438
Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}

Model #10
Best ROC-AUC: 0.9438
Best params: {'colsample_bylevel': 0.7366877378057127, 'colsample_bytree': 0.9399760402267441, 'gamma': 2.6498051478267012e-08, 'learning_rate': 0.0238149998729586, 'max_delta_step': 16, 'max_depth': 19, 'min_child_weight': 2, 'n_estimators': 77, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536}


In [6]:
xgb_result.best_score_


Out[6]:
0.9437820084781707

In [7]:
xgb_result.best_params_


Out[7]:
{'colsample_bylevel': 0.7366877378057127,
 'colsample_bytree': 0.9399760402267441,
 'gamma': 2.6498051478267012e-08,
 'learning_rate': 0.0238149998729586,
 'max_delta_step': 16,
 'max_depth': 19,
 'min_child_weight': 2,
 'n_estimators': 77,
 'reg_alpha': 0.011683028450342707,
 'reg_lambda': 0.0048879464985534336,
 'scale_pos_weight': 0.13267482411031659,
 'subsample': 0.5689543694097536}

In [8]:
xgb_result.best_estimator_


Out[8]:
XGBClassifier(base_score=0.5, booster='gbtree',
       colsample_bylevel=0.7366877378057127,
       colsample_bytree=0.9399760402267441, eval_metric='auc',
       gamma=2.6498051478267012e-08, learning_rate=0.0238149998729586,
       max_delta_step=16, max_depth=19, min_child_weight=2, missing=None,
       n_estimators=77, n_jobs=1, nthread=None,
       objective='binary:logistic', random_state=0,
       reg_alpha=0.011683028450342707, reg_lambda=0.0048879464985534336,
       scale_pos_weight=0.13267482411031659, seed=None, silent=1,
       subsample=0.5689543694097536, tree_method='approx')

In [9]:
new_model = xgb_result.best_estimator_

In [10]:
xgb.plot_importance(new_model);

In [11]:
xgb_result.cv_results_


Out[11]:
defaultdict(list,
            {'split0_test_score': [0.5,
              0.942736769309165,
              0.9036215267557312,
              0.7664768429281755,
              0.9149290554548015,
              0.9508199056182765,
              0.970266156777475,
              0.5,
              0.9619782814423754,
              0.8688657418397158],
             'split1_test_score': [0.5,
              0.9204035989757906,
              0.9202405990169361,
              0.8314109624593686,
              0.8862962532164799,
              0.9419943677975383,
              0.9252757943478577,
              0.5,
              0.9345857822890258,
              0.9155486135508354],
             'split2_test_score': [0.5,
              0.9205941606278379,
              0.8919333273195618,
              0.7698788225035331,
              0.8939457357348328,
              0.9385314369907087,
              0.933384249932345,
              0.5,
              0.9272590231630433,
              0.8850760240951778],
             'mean_test_score': [0.5,
              0.9279116559845781,
              0.9052654176672172,
              0.7892559301647617,
              0.8983904370276193,
              0.9437820084781707,
              0.9429755921755676,
              0.5,
              0.9412746426049309,
              0.889830221577291],
             'std_test_score': [0.0,
              0.0104833840703361,
              0.011614653047009727,
              0.02984089344163704,
              0.012104519222291919,
              0.005173525816300345,
              0.019579488305032023,
              0.0,
              0.014942341537875773,
              0.01935259332214665],
             'rank_test_score': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
             'mean_fit_time': [0.34290297826131183,
              0.754174550374349,
              0.6671676635742188,
              0.40001877148946124,
              0.37077172597249347,
              1.405093749364217,
              1.6076428890228271,
              0.26987171173095703,
              1.0809760093688965,
              0.30988844235738117],
             'std_fit_time': [0.0034053772387067456,
              0.00894768554778718,
              0.010761718229036845,
              0.003188315515353434,
              0.003918948230922813,
              0.01365679822911968,
              0.03164965717137351,
              0.004066227056245113,
              0.05171909487097653,
              0.0179053241838573],
             'mean_score_time': [0.013190587361653646,
              0.03159968058268229,
              0.020743608474731445,
              0.015934228897094727,
              0.014749368031819662,
              0.0346372922261556,
              0.13779433568318686,
              0.009719451268513998,
              0.03555425008138021,
              0.01338354746500651],
             'std_score_time': [0.0013884540400461664,
              0.0005411997531292687,
              0.0013822142703546138,
              0.0005896041167790662,
              0.0003052665613508982,
              0.001916110975315102,
              0.003969088228005105,
              0.00019924950654634242,
              0.003008662813512281,
              0.0006047749486508725],
             'param_colsample_bylevel': [0.4160029192647807,
              0.8390144719977516,
              0.4503841871781403,
              0.8142720284737898,
              0.8015579071911014,
              0.7366877378057127,
              0.6209085649172932,
              0.5479690370134094,
              0.955923206446829,
              0.013594004182195795],
             'param_colsample_bytree': [0.7304484857455519,
              0.8844821246070537,
              0.9195352964526833,
              0.1801528457825951,
              0.44364889457651413,
              0.9399760402267441,
              0.7776107350396038,
              0.9208091341729433,
              0.7036152301751524,
              0.819651719467114],
             'param_gamma': [0.13031389926541354,
              4.358684608480795e-07,
              8.168958221061441e-09,
              0.00015936523535755285,
              3.811128976537413e-05,
              2.6498051478267012e-08,
              1.3277909848852635e-06,
              2.083286323303108e-05,
              0.03823613443879595,
              0.002807995180059625],
             'param_learning_rate': [0.042815319280763466,
              0.7988179462781242,
              0.07356404539935663,
              0.4032083917998946,
              0.2700390206185342,
              0.0238149998729586,
              0.5605967693796124,
              0.4734922490673386,
              0.06786442521779147,
              0.03229300915669146],
             'param_max_delta_step': [13, 17, 4, 10, 18, 16, 12, 6, 8, 12],
             'param_max_depth': [21, 3, 23, 5, 36, 19, 30, 3, 11, 14],
             'param_min_child_weight': [2, 1, 1, 4, 2, 2, 3, 3, 0, 2],
             'param_n_estimators': [87, 68, 88, 94, 83, 77, 71, 51, 69, 58],
             'param_reg_alpha': [5.497557739289786e-07,
              0.0005266983003701547,
              0.00010376808625045426,
              0.1611980387486336,
              1.5057560255472018e-06,
              0.011683028450342707,
              0.004026635957416632,
              2.9618722230360503e-06,
              0.00022356829889037284,
              0.11080071157037095],
             'param_reg_lambda': [0.05936070635912049,
              276.5424475574225,
              476.96194787286544,
              4.3806965488564525e-05,
              0.08186810622382998,
              0.0048879464985534336,
              0.040887904512512056,
              8.153638964242,
              1.2908532337409298e-07,
              5.745523087821567],
             'param_scale_pos_weight': [0.060830282487222144,
              0.3016410771843142,
              1.3165669602830552,
              0.0009365503147654213,
              0.029004593634154585,
              0.13267482411031659,
              109.72255122430063,
              0.0015718563651880596,
              4.73588486119117,
              3.573713830065675],
             'param_subsample': [0.13556548021189216,
              0.9923710598637134,
              0.387658500562527,
              0.8391548832503206,
              0.8835665823899177,
              0.5689543694097536,
              0.6612742297240571,
              0.577028860872224,
              0.4499578015509351,
              0.029649078936835577],
             'params': [{'colsample_bylevel': 0.4160029192647807,
               'colsample_bytree': 0.7304484857455519,
               'gamma': 0.13031389926541354,
               'learning_rate': 0.042815319280763466,
               'max_delta_step': 13,
               'max_depth': 21,
               'min_child_weight': 2,
               'n_estimators': 87,
               'reg_alpha': 5.497557739289786e-07,
               'reg_lambda': 0.05936070635912049,
               'scale_pos_weight': 0.060830282487222144,
               'subsample': 0.13556548021189216},
              {'colsample_bylevel': 0.8390144719977516,
               'colsample_bytree': 0.8844821246070537,
               'gamma': 4.358684608480795e-07,
               'learning_rate': 0.7988179462781242,
               'max_delta_step': 17,
               'max_depth': 3,
               'min_child_weight': 1,
               'n_estimators': 68,
               'reg_alpha': 0.0005266983003701547,
               'reg_lambda': 276.5424475574225,
               'scale_pos_weight': 0.3016410771843142,
               'subsample': 0.9923710598637134},
              {'colsample_bylevel': 0.4503841871781403,
               'colsample_bytree': 0.9195352964526833,
               'gamma': 8.168958221061441e-09,
               'learning_rate': 0.07356404539935663,
               'max_delta_step': 4,
               'max_depth': 23,
               'min_child_weight': 1,
               'n_estimators': 88,
               'reg_alpha': 0.00010376808625045426,
               'reg_lambda': 476.96194787286544,
               'scale_pos_weight': 1.3165669602830552,
               'subsample': 0.387658500562527},
              {'colsample_bylevel': 0.8142720284737898,
               'colsample_bytree': 0.1801528457825951,
               'gamma': 0.00015936523535755285,
               'learning_rate': 0.4032083917998946,
               'max_delta_step': 10,
               'max_depth': 5,
               'min_child_weight': 4,
               'n_estimators': 94,
               'reg_alpha': 0.1611980387486336,
               'reg_lambda': 4.3806965488564525e-05,
               'scale_pos_weight': 0.0009365503147654213,
               'subsample': 0.8391548832503206},
              {'colsample_bylevel': 0.8015579071911014,
               'colsample_bytree': 0.44364889457651413,
               'gamma': 3.811128976537413e-05,
               'learning_rate': 0.2700390206185342,
               'max_delta_step': 18,
               'max_depth': 36,
               'min_child_weight': 2,
               'n_estimators': 83,
               'reg_alpha': 1.5057560255472018e-06,
               'reg_lambda': 0.08186810622382998,
               'scale_pos_weight': 0.029004593634154585,
               'subsample': 0.8835665823899177},
              {'colsample_bylevel': 0.7366877378057127,
               'colsample_bytree': 0.9399760402267441,
               'gamma': 2.6498051478267012e-08,
               'learning_rate': 0.0238149998729586,
               'max_delta_step': 16,
               'max_depth': 19,
               'min_child_weight': 2,
               'n_estimators': 77,
               'reg_alpha': 0.011683028450342707,
               'reg_lambda': 0.0048879464985534336,
               'scale_pos_weight': 0.13267482411031659,
               'subsample': 0.5689543694097536},
              {'colsample_bylevel': 0.6209085649172932,
               'colsample_bytree': 0.7776107350396038,
               'gamma': 1.3277909848852635e-06,
               'learning_rate': 0.5605967693796124,
               'max_delta_step': 12,
               'max_depth': 30,
               'min_child_weight': 3,
               'n_estimators': 71,
               'reg_alpha': 0.004026635957416632,
               'reg_lambda': 0.040887904512512056,
               'scale_pos_weight': 109.72255122430063,
               'subsample': 0.6612742297240571},
              {'colsample_bylevel': 0.5479690370134094,
               'colsample_bytree': 0.9208091341729433,
               'gamma': 2.083286323303108e-05,
               'learning_rate': 0.4734922490673386,
               'max_delta_step': 6,
               'max_depth': 3,
               'min_child_weight': 3,
               'n_estimators': 51,
               'reg_alpha': 2.9618722230360503e-06,
               'reg_lambda': 8.153638964242,
               'scale_pos_weight': 0.0015718563651880596,
               'subsample': 0.577028860872224},
              {'colsample_bylevel': 0.955923206446829,
               'colsample_bytree': 0.7036152301751524,
               'gamma': 0.03823613443879595,
               'learning_rate': 0.06786442521779147,
               'max_delta_step': 8,
               'max_depth': 11,
               'min_child_weight': 0,
               'n_estimators': 69,
               'reg_alpha': 0.00022356829889037284,
               'reg_lambda': 1.2908532337409298e-07,
               'scale_pos_weight': 4.73588486119117,
               'subsample': 0.4499578015509351},
              {'colsample_bylevel': 0.013594004182195795,
               'colsample_bytree': 0.819651719467114,
               'gamma': 0.002807995180059625,
               'learning_rate': 0.03229300915669146,
               'max_delta_step': 12,
               'max_depth': 14,
               'min_child_weight': 2,
               'n_estimators': 58,
               'reg_alpha': 0.11080071157037095,
               'reg_lambda': 5.745523087821567,
               'scale_pos_weight': 3.573713830065675,
               'subsample': 0.029649078936835577}]})

LightGBM


In [12]:
bayes_cv_tuner = BayesSearchCV(
    estimator = lgb.LGBMRegressor(
        objective='binary',
        metric='auc',
        n_jobs=1,
        verbose=0
    ),
    search_spaces = {
        'learning_rate': (0.01, 1.0, 'log-uniform'),
        'num_leaves': (1, 100),      
        'max_depth': (0, 50),
        'min_child_samples': (0, 50),
        'max_bin': (100, 1000),
        'subsample': (0.01, 1.0, 'uniform'),
        'subsample_freq': (0, 10),
        'colsample_bytree': (0.01, 1.0, 'uniform'),
        'min_child_weight': (0, 10),
        'subsample_for_bin': (100000, 500000),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'scale_pos_weight': (1e-6, 500, 'log-uniform'),
        'n_estimators': (50, 100),
    },    
    scoring = 'roc_auc',
    cv = StratifiedKFold(
        n_splits=3,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 3,
    n_iter = ITERATIONS,   
    verbose = 0,
    refit = True,
    random_state = 42
)

# Fit the model
lgbm_result = bayes_cv_tuner.fit(X.values, y.values, callback=status_print)


Model #1
Best ROC-AUC: 0.5
Best params: {'colsample_bytree': 0.4160029192647807, 'learning_rate': 0.28539836866041823, 'max_bin': 940, 'max_depth': 16, 'min_child_samples': 34, 'min_child_weight': 4, 'n_estimators': 68, 'num_leaves': 74, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'scale_pos_weight': 0.060830282487222144, 'subsample': 0.13556548021189216, 'subsample_for_bin': 171234, 'subsample_freq': 6}

Model #2
Best ROC-AUC: 0.5
Best params: {'colsample_bytree': 0.4160029192647807, 'learning_rate': 0.28539836866041823, 'max_bin': 940, 'max_depth': 16, 'min_child_samples': 34, 'min_child_weight': 4, 'n_estimators': 68, 'num_leaves': 74, 'reg_alpha': 5.497557739289786e-07, 'reg_lambda': 0.05936070635912049, 'scale_pos_weight': 0.060830282487222144, 'subsample': 0.13556548021189216, 'subsample_for_bin': 171234, 'subsample_freq': 6}

Model #3
Best ROC-AUC: 0.5108
Best params: {'colsample_bytree': 0.4503841871781403, 'learning_rate': 0.6877728743793542, 'max_bin': 194, 'max_depth': 22, 'min_child_samples': 9, 'min_child_weight': 5, 'n_estimators': 58, 'num_leaves': 75, 'reg_alpha': 0.00010376808625045426, 'reg_lambda': 476.96194787286544, 'scale_pos_weight': 1.3165669602830552, 'subsample': 0.387658500562527, 'subsample_for_bin': 179142, 'subsample_freq': 5}

Model #4
Best ROC-AUC: 0.5108
Best params: {'colsample_bytree': 0.4503841871781403, 'learning_rate': 0.6877728743793542, 'max_bin': 194, 'max_depth': 22, 'min_child_samples': 9, 'min_child_weight': 5, 'n_estimators': 58, 'num_leaves': 75, 'reg_alpha': 0.00010376808625045426, 'reg_lambda': 476.96194787286544, 'scale_pos_weight': 1.3165669602830552, 'subsample': 0.387658500562527, 'subsample_for_bin': 179142, 'subsample_freq': 5}

Model #5
Best ROC-AUC: 0.5108
Best params: {'colsample_bytree': 0.4503841871781403, 'learning_rate': 0.6877728743793542, 'max_bin': 194, 'max_depth': 22, 'min_child_samples': 9, 'min_child_weight': 5, 'n_estimators': 58, 'num_leaves': 75, 'reg_alpha': 0.00010376808625045426, 'reg_lambda': 476.96194787286544, 'scale_pos_weight': 1.3165669602830552, 'subsample': 0.387658500562527, 'subsample_for_bin': 179142, 'subsample_freq': 5}

Model #6
Best ROC-AUC: 0.9245
Best params: {'colsample_bytree': 0.7366877378057127, 'learning_rate': 0.7563790218678241, 'max_bin': 247, 'max_depth': 9, 'min_child_samples': 40, 'min_child_weight': 4, 'n_estimators': 73, 'num_leaves': 54, 'reg_alpha': 0.011683028450342707, 'reg_lambda': 0.0048879464985534336, 'scale_pos_weight': 0.13267482411031659, 'subsample': 0.5689543694097536, 'subsample_for_bin': 108942, 'subsample_freq': 5}

Model #7
Best ROC-AUC: 0.9455
Best params: {'colsample_bytree': 0.6209085649172932, 'learning_rate': 0.35540927532494104, 'max_bin': 423, 'max_depth': 44, 'min_child_samples': 30, 'min_child_weight': 6, 'n_estimators': 82, 'num_leaves': 43, 'reg_alpha': 0.004026635957416632, 'reg_lambda': 0.040887904512512056, 'scale_pos_weight': 109.72255122430063, 'subsample': 0.6612742297240571, 'subsample_for_bin': 344698, 'subsample_freq': 3}

Model #8
Best ROC-AUC: 0.9455
Best params: {'colsample_bytree': 0.6209085649172932, 'learning_rate': 0.35540927532494104, 'max_bin': 423, 'max_depth': 44, 'min_child_samples': 30, 'min_child_weight': 6, 'n_estimators': 82, 'num_leaves': 43, 'reg_alpha': 0.004026635957416632, 'reg_lambda': 0.040887904512512056, 'scale_pos_weight': 109.72255122430063, 'subsample': 0.6612742297240571, 'subsample_for_bin': 344698, 'subsample_freq': 3}

Model #9
Best ROC-AUC: 0.9455
Best params: {'colsample_bytree': 0.6209085649172932, 'learning_rate': 0.35540927532494104, 'max_bin': 423, 'max_depth': 44, 'min_child_samples': 30, 'min_child_weight': 6, 'n_estimators': 82, 'num_leaves': 43, 'reg_alpha': 0.004026635957416632, 'reg_lambda': 0.040887904512512056, 'scale_pos_weight': 109.72255122430063, 'subsample': 0.6612742297240571, 'subsample_for_bin': 344698, 'subsample_freq': 3}

Model #10
Best ROC-AUC: 0.9455
Best params: {'colsample_bytree': 0.6209085649172932, 'learning_rate': 0.35540927532494104, 'max_bin': 423, 'max_depth': 44, 'min_child_samples': 30, 'min_child_weight': 6, 'n_estimators': 82, 'num_leaves': 43, 'reg_alpha': 0.004026635957416632, 'reg_lambda': 0.040887904512512056, 'scale_pos_weight': 109.72255122430063, 'subsample': 0.6612742297240571, 'subsample_for_bin': 344698, 'subsample_freq': 3}


In [13]:
lgbm_result.best_params_


Out[13]:
{'colsample_bytree': 0.6209085649172932,
 'learning_rate': 0.35540927532494104,
 'max_bin': 423,
 'max_depth': 44,
 'min_child_samples': 30,
 'min_child_weight': 6,
 'n_estimators': 82,
 'num_leaves': 43,
 'reg_alpha': 0.004026635957416632,
 'reg_lambda': 0.040887904512512056,
 'scale_pos_weight': 109.72255122430063,
 'subsample': 0.6612742297240571,
 'subsample_for_bin': 344698,
 'subsample_freq': 3}

In [14]:
lgbm_result.estimator


Out[14]:
LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       learning_rate=0.1, max_depth=-1, metric='auc', min_child_samples=20,
       min_child_weight=0.001, min_split_gain=0.0, n_estimators=100,
       n_jobs=1, num_leaves=31, objective='binary', random_state=None,
       reg_alpha=0.0, reg_lambda=0.0, silent=True, subsample=1.0,
       subsample_for_bin=200000, subsample_freq=1, verbose=0)

In [21]:
bayes_cv_tuner = BayesSearchCV(
    estimator = lgb.LGBMRegressor(objective='regression', boosting_type='gbdt', subsample=0.6143), #colsample_bytree=0.6453, subsample=0.6143
    search_spaces = {
        'learning_rate': (0.01, 1.0, 'log-uniform'),
        'num_leaves': (10, 100),      
        'max_depth': (0, 50),
        'min_child_samples': (0, 50),
        'max_bin': (100, 1000),
        'subsample_freq': (0, 10),
        'min_child_weight': (0, 10),
        'reg_lambda': (1e-9, 1000, 'log-uniform'),
        'reg_alpha': (1e-9, 1.0, 'log-uniform'),
        'scale_pos_weight': (1e-6, 500, 'log-uniform'),
        'n_estimators': (50, 150),
    },    
    scoring = 'neg_mean_squared_error', #neg_mean_squared_log_error
    cv = KFold(
        n_splits=5,
        shuffle=True,
        random_state=42
    ),
    n_jobs = 1,
    n_iter = 100,   
    verbose = 0,
    refit = True,
    random_state = 42
)


def status_print(optim_result):
    """Status callback durring bayesian hyperparameter search"""
    
    # Get all the models tested so far in DataFrame format
    all_models = pd.DataFrame(bayes_cv_tuner.cv_results_)    
    
    # Get current parameters and the best parameters    
    best_params = pd.Series(bayes_cv_tuner.best_params_)
    print('Model #{}\nBest MSE: {}\nBest params: {}\n'.format(
        len(all_models),
        np.round(bayes_cv_tuner.best_score_, 4),
        bayes_cv_tuner.best_params_
    ))
    
    # Save all model results
    clf_name = bayes_cv_tuner.estimator.__class__.__name__
    all_models.to_csv(clf_name+"_cv_results.csv")

# Fit the model
result = bayes_cv_tuner.fit(X.values, y.values, callback=status_print)


---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
/usr/local/lib/python3.6/site-packages/sklearn/metrics/scorer.py in get_scorer(scoring)
    228         try:
--> 229             scorer = SCORERS[scoring]
    230         except KeyError:

KeyError: 'neg1_mean_squared_error'

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-21-cb4460c089fd> in <module>()
     47 
     48 # Fit the model
---> 49 result = bayes_cv_tuner.fit(X.values, y.values, callback=status_print)

/usr/local/lib/python3.6/site-packages/skopt/searchcv.py in fit(self, X, y, groups, callback)
    652                 optim_result = self._step(
    653                     X, y, search_space, optimizer,
--> 654                     groups=groups, n_points=n_points_adjusted
    655                 )
    656                 n_iter -= n_points

/usr/local/lib/python3.6/site-packages/skopt/searchcv.py in _step(self, X, y, search_space, optimizer, groups, n_points)
    548         refit = self.refit
    549         self.refit = False
--> 550         self._fit(X, y, groups, params_dict)
    551         self.refit = refit
    552 

/usr/local/lib/python3.6/site-packages/skopt/searchcv.py in _fit(self, X, y, groups, parameter_iterable)
    374             self.cv, y, classifier=is_classifier(estimator))
    375         self.scorer_ = check_scoring(
--> 376             self.estimator, scoring=self.scoring)
    377 
    378         X, y, groups = indexable(X, y, groups)

/usr/local/lib/python3.6/site-packages/sklearn/metrics/scorer.py in check_scoring(estimator, scoring, allow_none)
    271                         "'fit' method, %r was passed" % estimator)
    272     if isinstance(scoring, six.string_types):
--> 273         return get_scorer(scoring)
    274     elif callable(scoring):
    275         # Heuristic to ensure user has not passed a metric

/usr/local/lib/python3.6/site-packages/sklearn/metrics/scorer.py in get_scorer(scoring)
    231             raise ValueError('%r is not a valid scoring value. '
    232                              'Use sorted(sklearn.metrics.SCORERS.keys()) '
--> 233                              'to get valid options.' % (scoring))
    234     else:
    235         scorer = scoring

ValueError: 'neg1_mean_squared_error' is not a valid scoring value. Use sorted(sklearn.metrics.SCORERS.keys()) to get valid options.
  • Scoring에서 쓸 수 있는 값은 아래와 같음

In [22]:
import sklearn

In [26]:
keys = sklearn.metrics.SCORERS.keys()

In [32]:
for key in keys:
    print(key)


explained_variance
r2
neg_median_absolute_error
neg_mean_absolute_error
neg_mean_squared_error
neg_mean_squared_log_error
accuracy
roc_auc
balanced_accuracy
average_precision
neg_log_loss
brier_score_loss
adjusted_rand_score
homogeneity_score
completeness_score
v_measure_score
mutual_info_score
adjusted_mutual_info_score
normalized_mutual_info_score
fowlkes_mallows_score
precision
precision_macro
precision_micro
precision_samples
precision_weighted
recall
recall_macro
recall_micro
recall_samples
recall_weighted
f1
f1_macro
f1_micro
f1_samples
f1_weighted

In [ ]: