In [6]:
# Run `pip install sigopt` to download the python API client
# Set environment variable SIGOPT_API_TOKEN to your client token
# or provide your client token below
import socket
import sigopt
import os
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from utils import encode_numeric_zscore_list, encode_numeric_zscore_all, to_xy, encode_text_index_list, encode_numeric_log_all
import xgboost as xgb
from sklearn.metrics import explained_variance_score,r2_score,mean_absolute_error
In [7]:
conn = sigopt.Connection(client_token="UAJKINHBEGLJVIYYMGWANLUPRORPFRLTJMESGZKNPTHKOSIW")
conn.set_api_url('https://api.sigopt.com')
ID=10594
experiment = conn.experiments(ID)
hostname = socket.gethostname()
In [8]:
path = "./data/allstate"
inputFilePath = os.path.join(path, "train.csv.zip")
df = pd.read_csv(inputFilePath, compression="zip", header=0, na_values=['NULL'])
np.random.seed(42)
df = df.reindex(np.random.permutation(df.index))
df.reset_index(inplace=True, drop=True)
df.drop('id', axis=1, inplace=True)
#df = df.sample(frac=0.01)
#encode categoricals as dummies
encode_text_index_list(df, ['cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9', 'cat10', 'cat11', 'cat12', 'cat13', 'cat14', 'cat15', 'cat16', 'cat17', 'cat18', 'cat19', 'cat20', 'cat21', 'cat22', 'cat23', 'cat24', 'cat25', 'cat26', 'cat27', 'cat28', 'cat29', 'cat30', 'cat31', 'cat32', 'cat33', 'cat34', 'cat35', 'cat36', 'cat37', 'cat38', 'cat39', 'cat40', 'cat41', 'cat42', 'cat43', 'cat44', 'cat45', 'cat46', 'cat47', 'cat48', 'cat49', 'cat50', 'cat51', 'cat52', 'cat53', 'cat54', 'cat55', 'cat56', 'cat57', 'cat58', 'cat59', 'cat60', 'cat61', 'cat62', 'cat63', 'cat64', 'cat65', 'cat66', 'cat67', 'cat68', 'cat69', 'cat70', 'cat71', 'cat72', 'cat73', 'cat74', 'cat75', 'cat76', 'cat77', 'cat78', 'cat79', 'cat80', 'cat81', 'cat82', 'cat83', 'cat84', 'cat85', 'cat86', 'cat87', 'cat88', 'cat89', 'cat90', 'cat91', 'cat92', 'cat93', 'cat94', 'cat95', 'cat96', 'cat97', 'cat98', 'cat99', 'cat100', 'cat101', 'cat102', 'cat103', 'cat104', 'cat105', 'cat106', 'cat107', 'cat108', 'cat109', 'cat110', 'cat111', 'cat112', 'cat113', 'cat114', 'cat115', 'cat116'])
#encode all numeric values to zscored values
encode_numeric_zscore_list(df, ['cont1', 'cont2', 'cont3', 'cont4', 'cont5', 'cont6', 'cont7', 'cont8', 'cont9', 'cont10', 'cont11', 'cont12', 'cont13', 'cont14'])
#discard rows where z-score > 2
df.fillna(0)
# Create x(predictors) and y (expected outcome)
X,Y = to_xy(df, "loss")
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.20, random_state=42)
In [9]:
def evaluate_metric(params):
learner = xgb.XGBRegressor(max_depth=params["max_depth"], n_estimators=params["n_estimators"],
learning_rate=params["learning_rate"], silent=False,
gamma=params["gamma"], subsample=params["subsample"],
colsample_bytree=params["colsample_bytree"],scale_pos_weight=1, seed=65)
learner.fit(x_train, y_train)
predictions = learner.predict(x_test)
mae = mean_absolute_error(y_test, predictions)
return mae
def handle_suggestion(suggestion):
print("---")
print(suggestion)
value = evaluate_metric(suggestion.assignments)
print("Value is {}. Submitting observation".format(value))
experiment.observations().create(
suggestion=suggestion.id,
value=float(-1.0 * value),
metadata=dict(hostname=hostname),
)
suggestions = experiment.suggestions().fetch(state="open")
for suggestion in suggestions.iterate_pages():
handle_suggestion(suggestion)
for counter in range(1,1000):
print("creating suggestion #{}".format(counter))
suggestion = experiment.suggestions().create()
handle_suggestion(suggestion)
In [ ]: