In [1]:
import ensembles as en
import pandas as pd


Using Theano backend.

In [2]:
Data = pd.read_csv('/home/prajwal/Desktop/bank-additional/bank-additional-full.csv',delimiter=';',header=0)

In [3]:
data_test = en.data_import(Data,label_output='y')

In [4]:
%%time

data_test = en.data_import(Data,label_output='y')
en.metric_set('roc_auc_score')
param_gb_1 = en.parameter_set_gradient_boosting(eval_metric = ['auc'], objective = ['binary:logistic'])
param_dt = en.parameter_set_decision_tree(max_depth = [6])
param_rf = en.parameter_set_random_forest()
param_lr = en.parameter_set_linear_regression()
param_l2 = en.parameter_set_logistic_regression()
param_l1 = en.parameter_set_logistic_regression(penalty = ['l1'])
param_gb_2 = en.parameter_set_gradient_boosting(eval_metric = ['auc'], objective = ['binary:logistic'],
                                                booster=['gblinear'], eta = [0.1,0.3,0.5,0.7],
                                               hyper_parameter_optimisation = True)

en.train_base_models(['gradient_boosting','decision_tree',\
                                     'random_forest','linear_regression','logistic_regression',\
                                     'logistic_regression','gradient_boosting'],[param_gb_1, param_dt, param_rf
                                                                                 ,param_lr, param_l2, param_l1,
                                                                                 param_gb_2])

weights = en.assign_weights(weights = 'default', hyper_parameter_optimisation = True)


en.train_ensemble_models(['linear_regression', 'gradient_boosting'], [param_lr, param_gb_1],
                      ['gradient_boosting','logistic_regression'],[param_gb_1,param_l2], 
                      perform_weighted_average = True, weights_list = weights)

en.test_models(data_test)


TRAINING BASE MODELS


TESTING/CROSS VALIDATION BASE MODELS

gradient_boosting 
 0.945466200204
decision_tree 
 0.914565768283
random_forest 
 0.909355076256
linear_regression 
 0.926174465953
logistic_regression 
 0.924882479274
logistic_regression 
 0.925028862713
gradient_boosting 
 0.908089011143

TRAINING ENSEMBLE MODELS

Weighted Average
Weight [8, 6, 5, 7, 0, 1, 0]
Metric Score 0.94422713973

TESTING PHASE


TESTING/CROSS VALIDATION BASE MODELS

gradient_boosting 
 0.943940686406
decision_tree 
 0.917947354697
random_forest 
 0.912696812219
linear_regression 
 0.921872756052
logistic_regression 
 0.924375986027
logistic_regression 
 0.9244818938
gradient_boosting 
 0.909025157109

TESTING ENSEMBLE MODELS

Stacking linear_regression 
 0.94440441373
Stacking gradient_boosting 
 0.943041666011
Blending gradient_boosting 
 0.945733944473
Blending logistic_regression 
 0.942913482554
Weighted Average [8, 6, 5, 7, 0, 1, 0] 
 0.943077765723
CPU times: user 2.69 s, sys: 124 ms, total: 2.81 s
Wall time: 24.1 s

In [ ]: