notebook.community

Edit and run



In [1]:

    
from sklearn.cross_validation import train_test_split
from sklearn.metrics import log_loss
from sklearn.metrics import explained_variance_score,r2_score,mean_absolute_error
from sklearn import preprocessing
from random import randint

import numpy as np
import pandas as pd

from hyperopt import hp
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, rand
import sys
import xgboost as xgb
import os
from utils import get_allstate_train_valid_test_testids, to_xy
from keras.wrappers.scikit_learn import KerasRegressor
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers.advanced_activations import PReLU

shift = 205
train, valid, test, testids = get_allstate_train_valid_test_testids(0.15, shift, True)
x_train,y_train = to_xy(train, "loss")
x_valid,y_valid = to_xy(valid, "loss")
x_test,y_test = to_xy(test, "loss")









    



/home/arvc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
Using Theano backend.






    



Train shape is: (188318, 132)
Test shape is: (125546, 131)






    



/home/arvc/t81_558_deep_learning/utils.py:139: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  train.drop("type", axis=1, inplace=True)
/home/arvc/t81_558_deep_learning/utils.py:140: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  test.drop("type", axis=1, inplace=True)






    



Final Train shape is: (160070, 131)
Final Valid shape is: (28248, 131)
Final Test shape is: (125546, 131)
float64
float64
float64



In [ ]:

    
def build_fn_reg(hidden_dims=50):
    
    model = Sequential()
    model.add(Dense(input_dim = x_train.shape[1], output_dim=400))
    model.add(PReLU())
    model.add(Dense(hidden_dims))
    model.add(Dropout(0.4))
    model.add(Dense(input_dim=400, output_dim=200, init="uniform"))
    model.add(PReLU())
    model.add(Dropout(0.2))
    model.add(Dense(input_dim=200,output_dim=1))
    model.compile(loss = 'mean_absolute_error', optimizer = 'sgd', metrics=['accuracy']) #try optimizer adam, adadelta
    return(model)

def score(params):           
    print("Training with params : ")
    print(params)   
   
    model = KerasRegressor(build_fn=build_fn_reg, 
                           nb_epoch=params["nb_epoch"], 
                           verbose=0, 
                           batch_size=params["batch_size"],
                           validation_split=params["validation_split"], shuffle='batch', 
                           #hidden_dims=params["hidden_dims"]
                          )

    model.fit(x_train, y_train)
    
    predictions = model.predict(x_valid)
    
    print(predictions)
    score =  mean_absolute_error(np.exp(y_valid) - shift, np.exp(predictions) - shift)
    print("\tMAE {0}\n\n".format(score))
    return {'loss': score, 'status': STATUS_OK}

def optimize(trials):
    space = {
             'nb_epoch' : hp.choice('nb_epoch', np.arange(3, 3000, dtype=int)),
             'batch_size' : hp.choice('batch_size', np.arange(1, 300, dtype=int)),            
             'validation_split' : hp.quniform('validation_split', 0.01, 0.95, 0.01),
             #'hidden_dims' : hp.choice('hidden_dims', np.arange(1, 125, dtype=int))
             }

    best = fmin(score, space, algo=tpe.suggest, trials=trials, max_evals=250)

    print("Best params are:")
    print(best)




#Trials object where the history of search will be stored
trials = Trials()

optimize(trials)









    



Training with params : 
{'nb_epoch': 502, 'validation_split': 0.17, 'batch_size': 231}



In [ ]: