In [10]:
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import BaggingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import log_loss
from sklearn.metrics import explained_variance_score,r2_score,mean_absolute_error
from sklearn import preprocessing
from random import randint
import numpy as np
import pandas as pd
from hyperopt import hp
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, rand
import sys
import xgboost as xgb
import os
from utils import get_allstate_train_valid_test_testids
shift = 200
train, valid, test, testids = get_allstate_train_valid_test_testids(0.15, shift, True)
y_train = train["loss"]
x_train = train.drop("loss", axis=1)
y_valid = valid["loss"]
x_valid = valid.drop("loss", axis=1)
#xgtrain = xgb.DMatrix(x_train, label=y_train)
#xgvalid = xgb.DMatrix(x_valid, label=y_valid)
In [ ]:
def score(params):
print("Training with params : ")
print(params)
#evallist = [(x_valid, 'eval')]
base_regressor = XGBRegressor(**params)
base_regressor.fit(x_train, y_train)
predictions = base_regressor.predict(x_valid)
#xgtrain, num_round, evals=evallist, early_stopping_rounds=20, verbose_eval=False,
# feval=custom_evalfn)
#print(predictions)
score = mean_absolute_error(np.exp(y_valid) - shift, np.exp(predictions) - shift)
print("\tMAE {0}\n\n".format(score))
return {'loss': score, 'status': STATUS_OK}
def custom_evalfn(preds, dtrain):
labels = dtrain.get_label()
return 'error', mean_absolute_error(np.exp(preds) - shift, np.exp(labels) - shift)
def optimize(trials):
space = {
'max_depth' : hp.choice('max_depth', [9,10,11,12,13,14,15]),
'learning_rate' : hp.quniform('eta', 0.01, 0.5, 0.01),
'n_estimators' : hp.choice('n_estimators', np.arange(85, 3000, dtype=int)),
'silent' : True,
'objective': 'reg:linear',
'nthread' : -1,
'gamma' : hp.quniform('gamma', 0.0, 3, 0.2),
'min_child_weight' : hp.quniform('min_child_weight', 1, 3, 1),
'subsample' : hp.quniform('subsample', 0.6, 1.0, 0.1),
'reg_lambda' : hp.quniform('reg_lambda', 0.0, 3, 0.2),
'base_score' : hp.quniform('base_score', 0.0, 3, 0.2),
'colsample_bytree' : hp.quniform('colsample_bytree', 0.4, 0.8, 0.1),
#'eval_metric': 'mae',
'reg_alpha' : 1,
#'verbose_eval': False,
'seed': randint(1,429496)
}
best = fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=250)
print("Best params are:")
print(best)
#Trials object where the history of search will be stored
trials = Trials()
optimize(trials)
In [ ]: