In [1]:
import ensembles as en
import pandas as pd
In [2]:
Data = pd.read_csv('/home/prajwal/Desktop/forest/Forest_Cover.data',header = -1)
#Since label_output has to be a string
Data = Data.rename(columns = {54:'target_label'})
#Multi class classification, change classes 1-7 to 0-6
Data['target_label'] = Data['target_label'] - 1
In [3]:
%%time
test_data = en.data_import(Data,label_output='target_label')
en.metric_set('accuracy_score')
param_gb_1 = en.parameter_set_gradient_boosting(eval_metric = ['auc'], objective = ["multi:softmax"],num_class=7)
param_dt = en.parameter_set_decision_tree()
param_rf = en.parameter_set_random_forest()
param_l2 = en.parameter_set_logistic_regression(solver = ['lbfgs'])
param_l1 = en.parameter_set_logistic_regression(penalty = ['l2'], solver = ['lbfgs'])
en.train_base_models(['gradient_boosting','decision_tree',\
'random_forest','logistic_regression',\
'logistic_regression'],[param_gb_1, param_dt, param_rf
, param_l2, param_l1])
en.train_ensemble_models(['logistic_regression', 'gradient_boosting'], [param_l1, param_gb_1],
['gradient_boosting','logistic_regression'],[param_gb_1,param_l2])
en.test_models(test_data)
In [ ]: