In [1]:
from sklearn.cross_validation import train_test_split
from sklearn.metrics import log_loss
from sklearn.metrics import explained_variance_score,r2_score,mean_absolute_error
from sklearn import preprocessing
from random import randint
import numpy as np
import pandas as pd
from hyperopt import hp
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, rand
import sys
import xgboost as xgb
import os
from utils import get_allstate_train_valid_test_testids, to_xy
from keras.wrappers.scikit_learn import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers.advanced_activations import PReLU
shift = 205
train, valid, test, testids = get_allstate_train_valid_test_testids(0.15, shift, True)
x_train,y_train = to_xy(train, "loss")
x_valid,y_valid = to_xy(valid, "loss")
x_test,y_test = to_xy(test, "loss")
In [ ]:
def build_fn_reg(hidden_dims=50):
model = Sequential()
model.add(Dense(input_dim = x_train.shape[1], output_dim=400))
model.add(PReLU())
model.add(Dense(hidden_dims))
model.add(Dropout(0.4))
model.add(Dense(input_dim=400, output_dim=200, init="uniform"))
model.add(PReLU())
model.add(Dropout(0.2))
model.add(Dense(input_dim=200,output_dim=1))
model.compile(loss = 'mean_absolute_error', optimizer = 'sgd', metrics=['accuracy']) #try optimizer adam, adadelta
return(model)
def score(params):
print("Training with params : ")
print(params)
model = KerasRegressor(build_fn=build_fn_reg,
nb_epoch=params["nb_epoch"],
verbose=0,
batch_size=params["batch_size"],
validation_split=params["validation_split"], shuffle='batch',
#hidden_dims=params["hidden_dims"]
)
model.fit(x_train, y_train)
predictions = model.predict(x_valid)
print(predictions)
score = mean_absolute_error(np.exp(y_valid) - shift, np.exp(predictions) - shift)
print("\tMAE {0}\n\n".format(score))
return {'loss': score, 'status': STATUS_OK}
def optimize(trials):
space = {
'nb_epoch' : hp.choice('nb_epoch', np.arange(3, 3000, dtype=int)),
'batch_size' : hp.choice('batch_size', np.arange(1, 300, dtype=int)),
'validation_split' : hp.quniform('validation_split', 0.01, 0.95, 0.01),
#'hidden_dims' : hp.choice('hidden_dims', np.arange(1, 125, dtype=int))
}
best = fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=250)
print("Best params are:")
print(best)
#Trials object where the history of search will be stored
trials = Trials()
optimize(trials)
In [ ]: