In [1]:

    
import ensembles as en
import pandas as pd
import numpy as np
import xgboost as xgb
import category_encoders as ce
from sklearn import datasets, linear_model, preprocessing, grid_search
from sklearn.preprocessing import Imputer, PolynomialFeatures, StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import StratifiedKFold, KFold
from sklearn.preprocessing import OneHotEncoder
from sklearn.externals import joblib
from keras.layers import Dense, Activation, Dropout
from keras.models import Sequential
from keras.regularizers import l2, activity_l2
from sklearn.metrics import roc_auc_score, average_precision_score, f1_score, log_loss, accuracy_score, \
mean_absolute_error, mean_squared_error, r2_score
from sklearn.cross_validation import train_test_split
from joblib import Parallel, delayed
from sklearn.pipeline import Pipeline
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials 
from hyperas import optim
from hyperas.distributions import choice, uniform, conditional
from functools import partial
np.random.seed(1338)









    



/home/prajwal/anaconda3/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
/home/prajwal/anaconda3/lib/python3.5/site-packages/sklearn/grid_search.py:43: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. This module will be removed in 0.20.
  DeprecationWarning)
Using Theano backend.



In [2]:

    
#Setting the parameters for the Gradient Boosting Model

Example 1



In [3]:

    
#Default Values
param_gb = en.parameter_set_gradient_boosting(eval_metric = ['auc'], objective = ['binary:logistic'])
print(param_gb)









    



{'scale_pos_weigth': 0, 'base_score': 0.5, 'sketch_eps': 0.03, 'seed': 0, 'eta': 0.3, 'tree_method': 'auto', 'hyper_parameter_optimisation': False, 'min_child_weight': 1, 'gamma': 0, 'eval_metric': 'auc', 'booster': 'gbtree', 'alpha': 0, 'colsample_bylevel': 1, 'lambda_bias': 0, 'lambda': 1, 'colsample_bytree': 1, 'objective': 'binary:logistic', 'max_depth': 6, 'max_delta_step': 0, 'subsample': 1}

Example 2



In [4]:

    
#Changing max_depth and eta
param_gb = en.parameter_set_gradient_boosting(eval_metric = ['auc'], objective = ['binary:logistic'], \
                                                max_depth = [10], eta = [0.5])
print(param_gb)









    



{'scale_pos_weigth': 0, 'base_score': 0.5, 'sketch_eps': 0.03, 'seed': 0, 'eta': 0.5, 'tree_method': 'auto', 'hyper_parameter_optimisation': False, 'min_child_weight': 1, 'gamma': 0, 'eval_metric': 'auc', 'booster': 'gbtree', 'alpha': 0, 'colsample_bylevel': 1, 'lambda_bias': 0, 'lambda': 1, 'colsample_bytree': 1, 'objective': 'binary:logistic', 'max_depth': 10, 'max_delta_step': 0, 'subsample': 1}

Example 3



In [5]:

    
#Hyper Parameter Optimisation (max_depth and eta)
param_gb = en.parameter_set_gradient_boosting(hyper_parameter_optimisation = True, \
                                                eval_metric = ['auc'], objective = ['binary:logistic'], \
                                              max_depth = [5, 10, 15], eta = [0.1, 0.3, 0.5])
print(param_gb)









    



{'scale_pos_weigth': [0], 'base_score': [0.5], 'sketch_eps': [0.03], 'seed': 0, 'eta': [0.1, 0.3, 0.5], 'tree_method': ['auto'], 'hyper_parameter_optimisation': True, 'min_child_weight': [1], 'gamma': [0], 'eval_metric': ['auc'], 'booster': ['gbtree'], 'alpha': [0], 'colsample_bylevel': [1], 'lambda_bias': [0], 'lambda': [1], 'colsample_bytree': [1], 'objective': 'binary:logistic', 'max_depth': [5, 10, 15], 'max_delta_step': [0], 'subsample': [1]}

Example 4



In [6]:

    
#Hyper Parameter Optimisation (gamma and eta)
param_gb = en.parameter_set_gradient_boosting(hyper_parameter_optimisation = True, \
                                                eval_metric = ['auc'], objective = ['binary:logistic'], \
                                                gamma = [0, 1, 3, 5, 7], eta = [0.1, 0.3])
print(param_gb)









    



{'scale_pos_weigth': [0], 'base_score': [0.5], 'sketch_eps': [0.03], 'seed': 0, 'eta': [0.1, 0.3], 'tree_method': ['auto'], 'hyper_parameter_optimisation': True, 'min_child_weight': [1], 'gamma': [0, 1, 3, 5, 7], 'eval_metric': ['auc'], 'booster': ['gbtree'], 'alpha': [0], 'colsample_bylevel': [1], 'lambda_bias': [0], 'lambda': [1], 'colsample_bytree': [1], 'objective': 'binary:logistic', 'max_depth': [6], 'max_delta_step': [0], 'subsample': [1]}

Example 5



In [7]:

    
#Hyper Parameter Optimisation (gamma and eta)
param_gb = en.parameter_set_gradient_boosting(hyper_parameter_optimisation = True, \
                                                eval_metric = ['auc'], objective = ['binary:logistic'], \
                                                gamma = [0, 1, 3, 5, 7], eta = [0.1, 0.3], \
                                                max_depth = [5, 10, 15], colsample_bylevel = [0.1])
print(param_gb)









    



{'scale_pos_weigth': [0], 'base_score': [0.5], 'sketch_eps': [0.03], 'seed': 0, 'eta': [0.1, 0.3], 'tree_method': ['auto'], 'hyper_parameter_optimisation': True, 'min_child_weight': [1], 'gamma': [0, 1, 3, 5, 7], 'eval_metric': ['auc'], 'booster': ['gbtree'], 'alpha': [0], 'colsample_bylevel': [0.1], 'lambda_bias': [0], 'lambda': [1], 'colsample_bytree': [1], 'objective': 'binary:logistic', 'max_depth': [5, 10, 15], 'max_delta_step': [0], 'subsample': [1]}