In [1]:
# restart your notebook if prompted on Colab
try:
import verta
except ImportError:
!pip install verta
This example features:
ParameterGrid
utility for iterating over a hyperparameter grid
In [2]:
HOST = "app.verta.ai"
PROJECT_NAME = "Wine Multiclassification"
EXPERIMENT_NAME = "Boosted Trees"
In [3]:
# import os
# os.environ['VERTA_EMAIL'] =
# os.environ['VERTA_DEV_KEY'] =
In [4]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
import itertools
import multiprocessing
import time
import six
import numpy as np
import pandas as pd
import sklearn
from sklearn import datasets
from sklearn import model_selection
import xgboost as xgb
In [5]:
data = datasets.load_wine()
X = data['data']
y = data['target']
dtrain = xgb.DMatrix(X, label=y)
In [6]:
df = pd.DataFrame(np.hstack((X, y.reshape(-1, 1))),
columns=data['feature_names'] + ['species'])
df.head()
In [7]:
grid = model_selection.ParameterGrid({
'eta': [0.5, 0.7],
'max_depth': [1, 2, 3],
'num_class': [10],
})
In [8]:
from verta import Client
from verta.utils import ModelAPI
client = Client(HOST)
proj = client.set_project(PROJECT_NAME)
expt = client.set_experiment(EXPERIMENT_NAME)
In [9]:
def run_experiment(hyperparams):
run = client.set_experiment_run()
# log training data
run.log_dataset("train_data", df)
# log hyperparameters
run.log_hyperparameters(hyperparams)
# run cross validation on hyperparameters
cv_history = xgb.cv(hyperparams, dtrain,
nfold=5,
metrics=("merror", "mlogloss"))
# log observations from each iteration
for _, iteration in cv_history.iterrows():
for obs, val in iteration.iteritems():
run.log_observation(obs, val)
# log error from final iteration
final_val_error = iteration['test-merror-mean']
run.log_metric("val_error", final_val_error)
print("{} Mean error: {:.4f}".format(hyperparams, final_val_error))
with multiprocessing.Pool() as pool:
pool.map(run_experiment, grid)
In [10]:
best_run = expt.expt_runs.sort("metrics.val_error", descending=False)[0]
print("Validation Error: {:.4f}".format(best_run.get_metric("val_error")))
best_hyperparams = best_run.get_hyperparameters()
print("Hyperparameters: {}".format(best_hyperparams))
In [11]:
model = xgb.XGBClassifier(**best_hyperparams)
model.fit(X, y)
In [12]:
train_acc = model.score(X, y)
best_run.log_metric("train_acc", train_acc)
print("Training accuracy: {:.4f}".format(train_acc))
In [13]:
# create deployment artifacts
model_api = ModelAPI(X, model.predict(X))
requirements = ["scikit-learn", "xgboost"]
best_run.log_model(model, model_api=model_api)
best_run.log_requirements(requirements)
In [14]:
best_run
In [15]:
from verta._demo_utils import DeployedModel
deployed_model = DeployedModel(HOST, best_run.id)
In [16]:
for x in itertools.cycle(np.random.permutation(X).tolist()):
print(deployed_model.predict([x]))
time.sleep(.5)