notebook.community

Edit and run



In [6]:

    
# Run `pip install sigopt` to download the python API client
# Set environment variable SIGOPT_API_TOKEN to your client token
# or provide your client token below
import socket
import sigopt
import os 
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from utils import encode_numeric_zscore_list, encode_numeric_zscore_all, to_xy, encode_text_index_list, encode_numeric_log_all
import xgboost as xgb
from sklearn.metrics import explained_variance_score,r2_score,mean_absolute_error



In [7]:

    
conn = sigopt.Connection(client_token="UAJKINHBEGLJVIYYMGWANLUPRORPFRLTJMESGZKNPTHKOSIW")
conn.set_api_url('https://api.sigopt.com')
ID=10594
experiment = conn.experiments(ID)
hostname = socket.gethostname()



In [8]:

    
path = "./data/allstate"
inputFilePath = os.path.join(path, "train.csv.zip")
df = pd.read_csv(inputFilePath, compression="zip", header=0, na_values=['NULL'])
np.random.seed(42)
df = df.reindex(np.random.permutation(df.index))
df.reset_index(inplace=True, drop=True)
df.drop('id', axis=1, inplace=True)
#df = df.sample(frac=0.01)
#encode categoricals as dummies
encode_text_index_list(df, ['cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9', 'cat10', 'cat11', 'cat12', 'cat13', 'cat14', 'cat15', 'cat16', 'cat17', 'cat18', 'cat19', 'cat20', 'cat21', 'cat22', 'cat23', 'cat24', 'cat25', 'cat26', 'cat27', 'cat28', 'cat29', 'cat30', 'cat31', 'cat32', 'cat33', 'cat34', 'cat35', 'cat36', 'cat37', 'cat38', 'cat39', 'cat40', 'cat41', 'cat42', 'cat43', 'cat44', 'cat45', 'cat46', 'cat47', 'cat48', 'cat49', 'cat50', 'cat51', 'cat52', 'cat53', 'cat54', 'cat55', 'cat56', 'cat57', 'cat58', 'cat59', 'cat60', 'cat61', 'cat62', 'cat63', 'cat64', 'cat65', 'cat66', 'cat67', 'cat68', 'cat69', 'cat70', 'cat71', 'cat72', 'cat73', 'cat74', 'cat75', 'cat76', 'cat77', 'cat78', 'cat79', 'cat80', 'cat81', 'cat82', 'cat83', 'cat84', 'cat85', 'cat86', 'cat87', 'cat88', 'cat89', 'cat90', 'cat91', 'cat92', 'cat93', 'cat94', 'cat95', 'cat96', 'cat97', 'cat98', 'cat99', 'cat100', 'cat101', 'cat102', 'cat103', 'cat104', 'cat105', 'cat106', 'cat107', 'cat108', 'cat109', 'cat110', 'cat111', 'cat112', 'cat113', 'cat114', 'cat115', 'cat116'])
#encode all numeric values to zscored values
encode_numeric_zscore_list(df, ['cont1', 'cont2', 'cont3', 'cont4', 'cont5', 'cont6', 'cont7', 'cont8', 'cont9', 'cont10', 'cont11', 'cont12', 'cont13', 'cont14'])
#discard rows where z-score > 2
df.fillna(0)
# Create x(predictors) and y (expected outcome)
X,Y = to_xy(df, "loss")
x_train, x_test, y_train, y_test = train_test_split(X,Y,test_size=0.20, random_state=42)









    



float64



In [9]:

    
def evaluate_metric(params):
    learner = xgb.XGBRegressor(max_depth=params["max_depth"], n_estimators=params["n_estimators"], 
                               learning_rate=params["learning_rate"], silent=False, 
                               gamma=params["gamma"], subsample=params["subsample"],
                               colsample_bytree=params["colsample_bytree"],scale_pos_weight=1, seed=65)
    learner.fit(x_train, y_train)
    predictions = learner.predict(x_test)
    mae = mean_absolute_error(y_test, predictions)
    return mae

def handle_suggestion(suggestion):
    print("---")
    print(suggestion)
    value = evaluate_metric(suggestion.assignments)
    print("Value is {}. Submitting observation".format(value))
    experiment.observations().create(
            suggestion=suggestion.id,
            value=float(-1.0 * value),
            metadata=dict(hostname=hostname),
        )

suggestions = experiment.suggestions().fetch(state="open")
for suggestion in suggestions.iterate_pages():
    handle_suggestion(suggestion)
    
for counter in range(1,1000):
    print("creating suggestion #{}".format(counter))
    suggestion = experiment.suggestions().create()
    handle_suggestion(suggestion)









    



---
Suggestion({
  "assignments": {
    "colsample_bytree": 0.824346485761,
    "gamma": 0.0561538612817,
    "learning_rate": 0.427426756563,
    "max_depth": 15,
    "min_child_weight": 7,
    "n_estimators": 22,
    "subsample": 1.0
  },
  "created": 1477641582,
  "experiment": "10594",
  "fold": null,
  "fold_index": null,
  "id": "4747196",
  "metadata": null,
  "object": "suggestion",
  "state": "open"
})
Value is 1288.27099609375. Submitting observation






    



---------------------------------------------------------------------------
ApiException                              Traceback (most recent call last)
<ipython-input-9-87dd1766df37> in <module>()
     22 suggestions = experiment.suggestions().fetch(state="open")
     23 for suggestion in suggestions.iterate_pages():
---> 24     handle_suggestion(suggestion)
     25 
     26 for counter in range(1,1000):

<ipython-input-9-87dd1766df37> in handle_suggestion(suggestion)
     17             suggestion=suggestion.id,
     18             value=float(-1.0 * value),
---> 19             metadata=dict(hostname=hostname),
     20         )
     21 

/home/arvc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/sigopt/endpoint.py in __call__(self, **kwargs)
     20       call = conn._delete
     21 
---> 22     raw_response = call(url, kwargs)
     23 
     24     if self._endpoint._response_cls is not None:

/home/arvc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/sigopt/interface.py in _post(self, url, params)
    108     return self.requestor.post(
    109       url,
--> 110       json=request_params,
    111     )
    112 

/home/arvc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/sigopt/requestor.py in post(self, url, params, json, headers)
     17 
     18   def post(self, url, params=None, json=None, headers=None):
---> 19     return self._request('post', url=url, params=params, json=json, headers=headers)
     20 
     21   def put(self, url, params=None, json=None, headers=None):

/home/arvc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/sigopt/requestor.py in _request(self, method, url, params, json, headers)
     44       message.append(str(e))
     45       raise ConnectionException('\n'.join(message))
---> 46     return self._handle_response(response)
     47 
     48   def _with_default_headers(self, headers):

/home/arvc/anaconda3/envs/tensorflow/lib/python3.5/site-packages/sigopt/requestor.py in _handle_response(self, response)
     60       return response_json
     61     else:
---> 62       raise ApiException(response_json, response.status_code)

ApiException: ApiException (403): This report would exceed the number of data points available for your plan.



In [ ]: